Dataset

Bases: StaticCollection

A class describing a given dataset.

Attributes:

Name	Type	Description
`name`	`String`	The name of the dataset.
`metadata`	`Dictionary`	A dictionary of metadata that describes the dataset.

Examples:

>>> Dataset.create(name="dataset1")
>>> Dataset.create(name="dataset1", metadata={})
>>> Dataset.create(name="dataset1", metadata={"foo": "bar", "pi": 3.14})

Source code in valor/coretypes.py

class Dataset(StaticCollection):
    """
    A class describing a given dataset.

    Attributes
    ----------
    name : String
        The name of the dataset.
    metadata : Dictionary
        A dictionary of metadata that describes the dataset.

    Examples
    --------
    >>> Dataset.create(name="dataset1")
    >>> Dataset.create(name="dataset1", metadata={})
    >>> Dataset.create(name="dataset1", metadata={"foo": "bar", "pi": 3.14})
    """

    name: String = String.symbolic(owner="dataset", name="name")
    metadata: Dictionary = Dictionary.symbolic(
        owner="dataset", name="metadata"
    )

    def __init__(
        self,
        *,
        name: str,
        metadata: Optional[dict] = None,
        connection: Optional[ClientConnection] = None,
    ):
        """
        Creates a local instance of a dataset.

        Use 'Dataset.create' classmethod to create a dataset with persistence.

        Parameters
        ----------
        name : str
            The name of the dataset.
        metadata : dict, optional
            A dictionary of metadata that describes the dataset.
        connection : ClientConnection, optional
            An initialized client connection.
        """
        self.conn = connection
        super().__init__(name=name, metadata=metadata if metadata else dict())

    @classmethod
    def create(
        cls,
        name: str,
        metadata: Optional[Dict[str, Any]] = None,
        connection: Optional[ClientConnection] = None,
    ) -> Dataset:
        """
        Creates a dataset that persists in the back end.

        Parameters
        ----------
        name : str
            The name of the dataset.
        metadata : dict, optional
            A dictionary of metadata that describes the dataset.
        connection : ClientConnection, optional
            An initialized client connection.
        """
        dataset = cls(name=name, metadata=metadata, connection=connection)
        Client(dataset.conn).create_dataset(dataset)
        return dataset

    @classmethod
    def get(
        cls,
        name: str,
        connection: Optional[ClientConnection] = None,
    ) -> Union[Dataset, None]:
        """
        Retrieves a dataset from the back end database.

        Parameters
        ----------
        name : str
            The name of the dataset.

        Returns
        -------
        Union[valor.Dataset, None]
            The dataset or 'None' if it doesn't exist.
        """
        return Client(connection).get_dataset(name)

    def add_groundtruth(
        self,
        groundtruth: GroundTruth,
    ) -> None:
        """
        Add a ground truth to the dataset.

        Parameters
        ----------
        groundtruth : GroundTruth
            The ground truth to create.
        """
        Client(self.conn).create_groundtruths(
            dataset=self,
            groundtruths=[groundtruth],
        )

    def add_groundtruths(
        self,
        groundtruths: List[GroundTruth],
        ignore_existing_datums: bool = False,
        timeout: Optional[float] = 10.0,
    ) -> None:
        """
        Add multiple ground truths to the dataset.

        Parameters
        ----------
        groundtruths : List[GroundTruth]
            The ground truths to create.
        ignore_existing_datums : bool, default=False
            If True, will ignore datums that already exist in the backend.
            If False, will raise an error if any datums already exist.
            Default is False.
        timeout : float, optional
            The number of seconds the client should wait until raising a timeout.
        """
        Client(self.conn).create_groundtruths(
            dataset=self,
            groundtruths=groundtruths,
            ignore_existing_datums=ignore_existing_datums,
            timeout=timeout,
        )

    def get_groundtruth(
        self,
        datum: Union[Datum, str],
    ) -> Union[GroundTruth, None]:
        """
        Get a particular ground truth.

        Parameters
        ----------
        datum : Union[Datum, str]
            The desired datum.

        Returns
        ----------
        Union[GroundTruth, None]
            The matching ground truth or 'None' if it doesn't exist.
        """
        return Client(self.conn).get_groundtruth(dataset=self, datum=datum)

    def get_labels(
        self,
    ) -> List[Label]:
        """
        Get all labels associated with a given dataset.

        Returns
        ----------
        List[Label]
            A list of `Labels` associated with the dataset.
        """
        return Client(self.conn).get_labels_from_dataset(self)

    def get_datums(self, filters: Optional[Filter] = None) -> List[Datum]:
        """
        Get all datums associated with a given dataset.

        Parameters
        ----------
        filters : Filter, optional
            An optional datum filter.

        Returns
        ----------
        List[Datum]
            A list of `Datums` associated with the dataset.
        """
        if filters is None:
            filters = Filter()
        filters.datasets = Dataset.name == self.name  # type: ignore - #issue 605
        return Client(self.conn).get_datums(filters=filters)

    def get_evaluations(
        self,
        metrics_to_sort_by: Optional[
            Dict[str, Union[Dict[str, str], str]]
        ] = None,
        *_,
        timeout: Optional[float] = None,
    ) -> List[Evaluation]:
        """
        Get all evaluations associated with a given dataset.

        Parameters
        ----------
        metrics_to_sort_by : dict[str, str | dict[str, str]], optional
            An optional dict of metric types to sort the evaluations by.
        timeout : float, optional
            The number of seconds the client should wait until raising a timeout.

        Returns
        ----------
        List[Evaluation]
            A list of `Evaluations` associated with the dataset.
        """
        return Client(self.conn).get_evaluations(
            datasets=[self],
            metrics_to_sort_by=metrics_to_sort_by,
            timeout=timeout,
        )

    def get_summary(
        self, *_, timeout: Optional[float] = None
    ) -> DatasetSummary:
        """
        Get the summary of a given dataset.

        Parameters
        ----------
        timeout : float, optional
            The number of seconds the client should wait until raising a timeout.

        Returns
        -------
        DatasetSummary
            The summary of the dataset. This class has the following fields:

            name : name of the dataset

            num_datums : total number of datums in the dataset

            num_annotations : total number of labeled annotations in the dataset; if an
            object (such as a bounding box) has multiple labels, then each label is counted separately

            num_bounding_boxes : total number of bounding boxes in the dataset

            num_polygons : total number of polygons in the dataset

            num_rasters : total number of rasters in the dataset

            labels : list of the unique labels in the dataset

            datum_metadata : list of the unique metadata dictionaries in the dataset that are associated
            to datums

            groundtruth_annotation_metadata : list of the unique metadata dictionaries in the dataset that are
            associated to annotations
        """
        return Client(self.conn).get_dataset_summary(self.name, timeout=timeout)  # type: ignore

    def finalize(
        self,
    ):
        """
        Finalizes the dataset such that new ground truths cannot be added to it.
        """
        return Client(self.conn).finalize_dataset(self)

    def delete(
        self,
        timeout: int = 0,
    ):
        """
        Delete the dataset from the back end.

        Parameters
        ----------
        timeout : int, default=0
            Sets a timeout in seconds.
        """
        Client(self.conn).delete_dataset(self.name, timeout)  # type: ignore

Functions

`valor.Dataset.init(*, name, metadata=None, connection=None)`

Creates a local instance of a dataset.

Use 'Dataset.create' classmethod to create a dataset with persistence.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the dataset.	required
`metadata`	`dict`	A dictionary of metadata that describes the dataset.	`None`
`connection`	`ClientConnection`	An initialized client connection.	`None`

Source code in valor/coretypes.py

def __init__(
    self,
    *,
    name: str,
    metadata: Optional[dict] = None,
    connection: Optional[ClientConnection] = None,
):
    """
    Creates a local instance of a dataset.

    Use 'Dataset.create' classmethod to create a dataset with persistence.

    Parameters
    ----------
    name : str
        The name of the dataset.
    metadata : dict, optional
        A dictionary of metadata that describes the dataset.
    connection : ClientConnection, optional
        An initialized client connection.
    """
    self.conn = connection
    super().__init__(name=name, metadata=metadata if metadata else dict())

`valor.Dataset.add_groundtruth(groundtruth)`

Add a ground truth to the dataset.

Parameters:

Name	Type	Description	Default
`groundtruth`	`GroundTruth`	The ground truth to create.	required

Source code in valor/coretypes.py

def add_groundtruth(
    self,
    groundtruth: GroundTruth,
) -> None:
    """
    Add a ground truth to the dataset.

    Parameters
    ----------
    groundtruth : GroundTruth
        The ground truth to create.
    """
    Client(self.conn).create_groundtruths(
        dataset=self,
        groundtruths=[groundtruth],
    )

`valor.Dataset.add_groundtruths(groundtruths, ignore_existing_datums=False, timeout=10.0)`

Add multiple ground truths to the dataset.

Parameters:

Name	Type	Description	Default
`groundtruths`	`List[GroundTruth]`	The ground truths to create.	required
`ignore_existing_datums`	`bool`	If True, will ignore datums that already exist in the backend. If False, will raise an error if any datums already exist. Default is False.	`False`
`timeout`	`float`	The number of seconds the client should wait until raising a timeout.	`10.0`

Source code in valor/coretypes.py

def add_groundtruths(
    self,
    groundtruths: List[GroundTruth],
    ignore_existing_datums: bool = False,
    timeout: Optional[float] = 10.0,
) -> None:
    """
    Add multiple ground truths to the dataset.

    Parameters
    ----------
    groundtruths : List[GroundTruth]
        The ground truths to create.
    ignore_existing_datums : bool, default=False
        If True, will ignore datums that already exist in the backend.
        If False, will raise an error if any datums already exist.
        Default is False.
    timeout : float, optional
        The number of seconds the client should wait until raising a timeout.
    """
    Client(self.conn).create_groundtruths(
        dataset=self,
        groundtruths=groundtruths,
        ignore_existing_datums=ignore_existing_datums,
        timeout=timeout,
    )

`valor.Dataset.create(name, metadata=None, connection=None)` `classmethod`

Creates a dataset that persists in the back end.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the dataset.	required
`metadata`	`dict`	A dictionary of metadata that describes the dataset.	`None`
`connection`	`ClientConnection`	An initialized client connection.	`None`

Source code in valor/coretypes.py

@classmethod
def create(
    cls,
    name: str,
    metadata: Optional[Dict[str, Any]] = None,
    connection: Optional[ClientConnection] = None,
) -> Dataset:
    """
    Creates a dataset that persists in the back end.

    Parameters
    ----------
    name : str
        The name of the dataset.
    metadata : dict, optional
        A dictionary of metadata that describes the dataset.
    connection : ClientConnection, optional
        An initialized client connection.
    """
    dataset = cls(name=name, metadata=metadata, connection=connection)
    Client(dataset.conn).create_dataset(dataset)
    return dataset

`valor.Dataset.delete(timeout=0)`

Delete the dataset from the back end.

Parameters:

Name	Type	Description	Default
`timeout`	`int`	Sets a timeout in seconds.	`0`

Source code in valor/coretypes.py

def delete(
    self,
    timeout: int = 0,
):
    """
    Delete the dataset from the back end.

    Parameters
    ----------
    timeout : int, default=0
        Sets a timeout in seconds.
    """
    Client(self.conn).delete_dataset(self.name, timeout)  # type: ignore

`valor.Dataset.finalize()`

Finalizes the dataset such that new ground truths cannot be added to it.

Source code in valor/coretypes.py

def finalize(
    self,
):
    """
    Finalizes the dataset such that new ground truths cannot be added to it.
    """
    return Client(self.conn).finalize_dataset(self)

`valor.Dataset.get(name, connection=None)` `classmethod`

Retrieves a dataset from the back end database.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the dataset.	required

Returns:

Type	Description
`Union[Dataset, None]`	The dataset or 'None' if it doesn't exist.

Source code in valor/coretypes.py

@classmethod
def get(
    cls,
    name: str,
    connection: Optional[ClientConnection] = None,
) -> Union[Dataset, None]:
    """
    Retrieves a dataset from the back end database.

    Parameters
    ----------
    name : str
        The name of the dataset.

    Returns
    -------
    Union[valor.Dataset, None]
        The dataset or 'None' if it doesn't exist.
    """
    return Client(connection).get_dataset(name)

`valor.Dataset.get_datums(filters=None)`

Get all datums associated with a given dataset.

Parameters:

Name	Type	Description	Default
`filters`	`Filter`	An optional datum filter.	`None`

Returns:

Type	Description
`List[Datum]`	A list of `Datums` associated with the dataset.

Source code in valor/coretypes.py

def get_datums(self, filters: Optional[Filter] = None) -> List[Datum]:
    """
    Get all datums associated with a given dataset.

    Parameters
    ----------
    filters : Filter, optional
        An optional datum filter.

    Returns
    ----------
    List[Datum]
        A list of `Datums` associated with the dataset.
    """
    if filters is None:
        filters = Filter()
    filters.datasets = Dataset.name == self.name  # type: ignore - #issue 605
    return Client(self.conn).get_datums(filters=filters)

`valor.Dataset.get_evaluations(metrics_to_sort_by=None, *_, timeout=None)`

Get all evaluations associated with a given dataset.

Parameters:

Name	Type	Description	Default
`metrics_to_sort_by`	`dict[str, str \| dict[str, str]]`	An optional dict of metric types to sort the evaluations by.	`None`
`timeout`	`float`	The number of seconds the client should wait until raising a timeout.	`None`

Returns:

Type	Description
`List[Evaluation]`	A list of `Evaluations` associated with the dataset.

Source code in valor/coretypes.py

def get_evaluations(
    self,
    metrics_to_sort_by: Optional[
        Dict[str, Union[Dict[str, str], str]]
    ] = None,
    *_,
    timeout: Optional[float] = None,
) -> List[Evaluation]:
    """
    Get all evaluations associated with a given dataset.

    Parameters
    ----------
    metrics_to_sort_by : dict[str, str | dict[str, str]], optional
        An optional dict of metric types to sort the evaluations by.
    timeout : float, optional
        The number of seconds the client should wait until raising a timeout.

    Returns
    ----------
    List[Evaluation]
        A list of `Evaluations` associated with the dataset.
    """
    return Client(self.conn).get_evaluations(
        datasets=[self],
        metrics_to_sort_by=metrics_to_sort_by,
        timeout=timeout,
    )

`valor.Dataset.get_groundtruth(datum)`

Get a particular ground truth.

Parameters:

Name	Type	Description	Default
`datum`	`Union[Datum, str]`	The desired datum.	required

Returns:

Type	Description
`Union[GroundTruth, None]`	The matching ground truth or 'None' if it doesn't exist.

Source code in valor/coretypes.py

def get_groundtruth(
    self,
    datum: Union[Datum, str],
) -> Union[GroundTruth, None]:
    """
    Get a particular ground truth.

    Parameters
    ----------
    datum : Union[Datum, str]
        The desired datum.

    Returns
    ----------
    Union[GroundTruth, None]
        The matching ground truth or 'None' if it doesn't exist.
    """
    return Client(self.conn).get_groundtruth(dataset=self, datum=datum)

`valor.Dataset.get_labels()`

Get all labels associated with a given dataset.

Returns:

Type	Description
`List[Label]`	A list of `Labels` associated with the dataset.

Source code in valor/coretypes.py

def get_labels(
    self,
) -> List[Label]:
    """
    Get all labels associated with a given dataset.

    Returns
    ----------
    List[Label]
        A list of `Labels` associated with the dataset.
    """
    return Client(self.conn).get_labels_from_dataset(self)

`valor.Dataset.get_summary(*_, timeout=None)`

Get the summary of a given dataset.

Parameters:

Name	Type	Description	Default
`timeout`	`float`	The number of seconds the client should wait until raising a timeout.	`None`

Returns:

Type Description

DatasetSummary

The summary of the dataset. This class has the following fields:

name : name of the dataset

num_datums : total number of datums in the dataset

num_annotations : total number of labeled annotations in the dataset; if an object (such as a bounding box) has multiple labels, then each label is counted separately

num_bounding_boxes : total number of bounding boxes in the dataset

num_polygons : total number of polygons in the dataset

num_rasters : total number of rasters in the dataset

labels : list of the unique labels in the dataset

datum_metadata : list of the unique metadata dictionaries in the dataset that are associated to datums

groundtruth_annotation_metadata : list of the unique metadata dictionaries in the dataset that are associated to annotations

Source code in valor/coretypes.py

def get_summary(
    self, *_, timeout: Optional[float] = None
) -> DatasetSummary:
    """
    Get the summary of a given dataset.

    Parameters
    ----------
    timeout : float, optional
        The number of seconds the client should wait until raising a timeout.

    Returns
    -------
    DatasetSummary
        The summary of the dataset. This class has the following fields:

        name : name of the dataset

        num_datums : total number of datums in the dataset

        num_annotations : total number of labeled annotations in the dataset; if an
        object (such as a bounding box) has multiple labels, then each label is counted separately

        num_bounding_boxes : total number of bounding boxes in the dataset

        num_polygons : total number of polygons in the dataset

        num_rasters : total number of rasters in the dataset

        labels : list of the unique labels in the dataset

        datum_metadata : list of the unique metadata dictionaries in the dataset that are associated
        to datums

        groundtruth_annotation_metadata : list of the unique metadata dictionaries in the dataset that are
        associated to annotations
    """
    return Client(self.conn).get_dataset_summary(self.name, timeout=timeout)  # type: ignore

Dataset

Functions

valor.Dataset.__init__(*, name, metadata=None, connection=None)

valor.Dataset.add_groundtruth(groundtruth)

valor.Dataset.add_groundtruths(groundtruths, ignore_existing_datums=False, timeout=10.0)

valor.Dataset.create(name, metadata=None, connection=None) classmethod

valor.Dataset.delete(timeout=0)

valor.Dataset.finalize()

valor.Dataset.get(name, connection=None) classmethod

valor.Dataset.get_datums(filters=None)

valor.Dataset.get_evaluations(metrics_to_sort_by=None, *_, timeout=None)

valor.Dataset.get_groundtruth(datum)

valor.Dataset.get_labels()

valor.Dataset.get_summary(*_, timeout=None)

`valor.Dataset.init(*, name, metadata=None, connection=None)`

`valor.Dataset.add_groundtruth(groundtruth)`

`valor.Dataset.add_groundtruths(groundtruths, ignore_existing_datums=False, timeout=10.0)`

`valor.Dataset.create(name, metadata=None, connection=None)` `classmethod`

`valor.Dataset.delete(timeout=0)`

`valor.Dataset.finalize()`

`valor.Dataset.get(name, connection=None)` `classmethod`

`valor.Dataset.get_datums(filters=None)`

`valor.Dataset.get_evaluations(metrics_to_sort_by=None, *_, timeout=None)`

`valor.Dataset.get_groundtruth(datum)`

`valor.Dataset.get_labels()`

`valor.Dataset.get_summary(*_, timeout=None)`