Source code for datalad_next.constraints.dataset

"""Constraints for DataLad datasets"""

from __future__ import annotations

from pathlib import (
    Path,
    PurePath,
)

from datalad_next.datasets import Dataset

from .base import (
    Constraint,
    DatasetParameter,
)
from .exceptions import NoDatasetFound



[docs]
class EnsureDataset(Constraint):
    """Ensure an absent/present `Dataset` from any path or Dataset instance

    Regardless of the nature of the input (`Dataset` instance or local path)
    a resulting instance (if it can be created) is optionally tested for
    absence or presence on the local file system.

    Due to the particular nature of the `Dataset` class (the same instance
    is used for a unique path), this constraint returns a `DatasetParameter`
    rather than a `Dataset` directly. Consuming commands can discover
    the original parameter value via its `original` property, and access a
    `Dataset` instance via its `ds` property.

    In addition to any value representing an explicit path, this constraint
    also recognizes the special value `None`. This instructs the implementation
    to find a dataset that contains the process working directory (PWD).
    Such a dataset need not have its root at PWD, but could be located in
    any parent directory too. If no such dataset can be found, PWD is used
    directly. Tests for ``installed`` are performed in the same way as with
    an explicit dataset location argument. If `None` is given and
    ``installed=True``, but no dataset is found, an exception is raised
    (this is the behavior of the ``required_dataset()`` function in
    the DataLad core package). With ``installed=False`` no exception is
    raised and a dataset instances matching PWD is returned.
    """
    def __init__(self,
                 installed: bool | None = None,
                 purpose: str | None = None,
                 require_id: bool | None = None):
        """
        Parameters
        ----------
        installed: bool, optional
          If given, a dataset will be verified to be installed or not.
          Otherwise the installation-state will not be inspected.
        purpose: str, optional
          If given, will be used in generated error messages to communicate
          why a dataset is required (to exist)
        idcheck: bool, option
          If given, performs an additional check whether the dataset has a
          valid dataset ID.
        """
        self._installed = installed
        self._purpose = purpose
        self._require_id = require_id
        super().__init__()

    def __call__(self, value) -> DatasetParameter:
        # good-enough test to recognize a dataset instance cheaply
        if hasattr(value, 'repo') and hasattr(value, 'pathobj'):
            ds = value
        # anticipate what require_dataset() could handle and fail if we got
        # something else
        elif not isinstance(value, (str, PurePath, type(None))):
            self.raise_for(
                value, "cannot create Dataset from {type}", type=type(value)
            )
        else:
            ds = self._require_dataset(value)
        assert ds
        if self._installed is not None:
            is_installed = ds.is_installed()
            if self._installed is False and is_installed:
                self.raise_for(ds, 'already exists locally')
            if self._installed and not is_installed:
                self.raise_for(ds, 'not installed')
        if self._require_id and not ds.id:
            self.raise_for(ds, 'does not have a valid datalad-id')
        return DatasetParameter(value, ds)


[docs]
    def short_description(self) -> str:
        return "(path to) {}Dataset".format(
            'an existing ' if self._installed is True
            else 'a non-existing ' if self._installed is False else 'a ')



    def _require_dataset(self, value):
        from datalad.distribution.dataset import require_dataset
        try:
            ds = require_dataset(
                value,
                check_installed=self._installed is True,
                purpose=self._purpose,
            )
            return ds
        except NoDatasetFound:
            # mitigation of non-uniform require_dataset() behavior.
            # with value == None it does not honor check_installed
            # https://github.com/datalad/datalad/issues/7281
            if self._installed is True:
                # if we are instructed to ensure an installed dataset
                raise
            else:
                # but otherwise go with CWD. require_dataset() did not
                # find a dataset in any parent dir either, so this is
                # the best we can do. Installation absence verification
                # will happen further down
                return Dataset(Path.cwd())