"""Constraints for DataLad datasets"""
from __future__ import annotations
from pathlib import (
Path,
PurePath,
)
from datalad_next.datasets import Dataset
from .base import (
Constraint,
DatasetParameter,
)
from .exceptions import NoDatasetFound
[docs]
class EnsureDataset(Constraint):
"""Ensure an absent/present `Dataset` from any path or Dataset instance
Regardless of the nature of the input (`Dataset` instance or local path)
a resulting instance (if it can be created) is optionally tested for
absence or presence on the local file system.
Due to the particular nature of the `Dataset` class (the same instance
is used for a unique path), this constraint returns a `DatasetParameter`
rather than a `Dataset` directly. Consuming commands can discover
the original parameter value via its `original` property, and access a
`Dataset` instance via its `ds` property.
In addition to any value representing an explicit path, this constraint
also recognizes the special value `None`. This instructs the implementation
to find a dataset that contains the process working directory (PWD).
Such a dataset need not have its root at PWD, but could be located in
any parent directory too. If no such dataset can be found, PWD is used
directly. Tests for ``installed`` are performed in the same way as with
an explicit dataset location argument. If `None` is given and
``installed=True``, but no dataset is found, an exception is raised
(this is the behavior of the ``required_dataset()`` function in
the DataLad core package). With ``installed=False`` no exception is
raised and a dataset instances matching PWD is returned.
"""
def __init__(self,
installed: bool | None = None,
purpose: str | None = None,
require_id: bool | None = None):
"""
Parameters
----------
installed: bool, optional
If given, a dataset will be verified to be installed or not.
Otherwise the installation-state will not be inspected.
purpose: str, optional
If given, will be used in generated error messages to communicate
why a dataset is required (to exist)
idcheck: bool, option
If given, performs an additional check whether the dataset has a
valid dataset ID.
"""
self._installed = installed
self._purpose = purpose
self._require_id = require_id
super().__init__()
def __call__(self, value) -> DatasetParameter:
# good-enough test to recognize a dataset instance cheaply
if hasattr(value, 'repo') and hasattr(value, 'pathobj'):
ds = value
# anticipate what require_dataset() could handle and fail if we got
# something else
elif not isinstance(value, (str, PurePath, type(None))):
self.raise_for(
value, "cannot create Dataset from {type}", type=type(value)
)
else:
ds = self._require_dataset(value)
assert ds
if self._installed is not None:
is_installed = ds.is_installed()
if self._installed is False and is_installed:
self.raise_for(ds, 'already exists locally')
if self._installed and not is_installed:
self.raise_for(ds, 'not installed')
if self._require_id and not ds.id:
self.raise_for(ds, 'does not have a valid datalad-id')
return DatasetParameter(value, ds)
[docs]
def short_description(self) -> str:
return "(path to) {}Dataset".format(
'an existing ' if self._installed is True
else 'a non-existing ' if self._installed is False else 'a ')
def _require_dataset(self, value):
from datalad.distribution.dataset import require_dataset
try:
ds = require_dataset(
value,
check_installed=self._installed is True,
purpose=self._purpose,
)
return ds
except NoDatasetFound:
# mitigation of non-uniform require_dataset() behavior.
# with value == None it does not honor check_installed
# https://github.com/datalad/datalad/issues/7281
if self._installed is True:
# if we are instructed to ensure an installed dataset
raise
else:
# but otherwise go with CWD. require_dataset() did not
# find a dataset in any parent dir either, so this is
# the best we can do. Installation absence verification
# will happen further down
return Dataset(Path.cwd())