Source code for datalad_core.commands.dataset

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

from datalad_core.constraints import Constraint
from datalad_core.repo import (
    Repo,
    Worktree,
)


[docs] class Dataset: """Dataset parameter type for DataLad command implementations Many DataLad commands operate on datasets, which are typically Git repositories. This class provides a type to represent this parameter. The main purpose of this class is to relay the semantics of the original parameter specification all the way to the implementation of a particular command. A dataset may be identified in a variety of ways, including auto-discovery based on a working directory. Individual commands may want to behave differently depending on how a dataset was identified, or if at all. A second use case are commands that can work on bare repositories and worktrees alike. This class is a single type from which the presence of both entities can be discovered without code duplication. A third use case are to-be-created datasets for which no repository or worktree exist on the file system yet, and consequently the :class:`~datalad_core.repo.Repo` and :class:`~datalad_core.repo.Worktree` classes cannot be used directly. .. note:: Despite the name, this class is very different from the ``Dataset`` class in legacy DataLad. This is not a convenience interface for DataLad commands that operate on datasets. Instead, it is merely a type to be used for implementing individual DataLad commands, with uniform semantics for this key parameter. """ def __init__( self, spec: str | Path | Repo | Worktree | None, ): """ A ``spec`` is required, even if the given value is ``None``. """ self._spec = spec self._repo: Repo | None = None self._worktree: Worktree | None = None self._path: Path | None = None @property def pristine_spec(self) -> str | Path | Repo | Worktree | None: """Returns the unaltered specification of the dataset This is the exact value that has been given to the constructor. """ return self._spec @property def path(self) -> Path: """Returns the local path associated with any (non-)existing dataset If an associated Git repository exists on the file system, the return path is the worktree root path for non-bare repositories and their worktree, or the repository root path for bare repositories. If no repository exists, the path is derived from the given ``spec`` regardless of a corresponding directory existing on the file system. If the spec is ``None``, the returned path will be the process working directory. """ if self._path is not None: return self._path if self._spec is None: self._path = Path.cwd() return self._path # use the (resolved) path of a worktree or repo, # if they exist. # this gives an absolute path self._path = ( self.worktree.path if self.worktree else self.repo.path if self.repo else None ) if self._path is not None: return self._path # there is nothing on the filesystem, we can only work with the # pristine_spec as-is ps = self.pristine_spec if isinstance(ps, Path): self._path = ps else: if TYPE_CHECKING: assert isinstance(ps, Path | str) # could be a str-path or some magic label. # for now we only support a path specification self._path = Path(ps) return self._path @property def repo(self) -> Repo | None: """Returns a repository associated with the dataset (if one exists) This property is mostly useful for datasets without a worktree. For datasets with a worktree it is generally more appropriate to access the ``repo`` property of the :attr:`worktree` property. Returns ``None`` if there is no associated repository. This may happen, if a repository is yet to be created. """ # short cut ps = self.pristine_spec if self._repo is not None: return self._repo if self.worktree is not None: self._repo = self.worktree.repo elif isinstance(ps, Repo): self._repo = ps elif isinstance(ps, Path): self._repo = get_gitmanaged_from_pathlike(Repo, ps) elif isinstance(ps, str): # could be a str-path or some magic label. # for now we only support a path specification self._repo = get_gitmanaged_from_pathlike(Repo, ps) return self._repo @property def worktree(self) -> Worktree | None: """Returns a worktree associated with the dataset (if one exists) Returns ``None`` if there is no associated worktree. This may happen, if the dataset is associated with a bare Git repository, or if the worktree (and repository) is yet to be created. """ if self._worktree is None: ps = self.pristine_spec if isinstance(ps, Worktree): # we can take this right away self._worktree = ps elif isinstance(ps, Path): self._worktree = get_gitmanaged_from_pathlike(Worktree, ps) elif isinstance(ps, str): # could be a str-path or some magic label. # for now we only support a path specification self._worktree = get_gitmanaged_from_pathlike(Worktree, ps) return self._worktree def __repr__(self) -> str: return f'{self.__class__.__name__}({self.pristine_spec!r})'
[docs] class EnsureDataset(Constraint): """Ensure an absent/present `Dataset` from any path or Dataset instance Regardless of the nature of the input (`Dataset` instance or local path) a resulting instance (if it can be created) is optionally tested for absence or presence on the local file system. Due to the particular nature of the `Dataset` class (the same instance is used for a unique path), this constraint returns a `DatasetParameter` rather than a `Dataset` directly. Consuming commands can discover the original parameter value via its `original` property, and access a `Dataset` instance via its `ds` property. In addition to any value representing an explicit path, this constraint also recognizes the special value `None`. This instructs the implementation to find a dataset that contains the process working directory (PWD). Such a dataset need not have its root at PWD, but could be located in any parent directory too. If no such dataset can be found, PWD is used directly. Tests for ``installed`` are performed in the same way as with an explicit dataset location argument. If `None` is given and ``installed=True``, but no dataset is found, an exception is raised (this is the behavior of the ``required_dataset()`` function in the DataLad core package). With ``installed=False`` no exception is raised and a dataset instances matching PWD is returned. """ def __init__(self, *, installed: bool | str | None = None): """ Parameters ---------- installed: bool, optional If given, a dataset will be verified to be installed or not. Otherwise the installation-state will not be inspected. """ self._installed = installed super().__init__() @property def input_synopsis(self) -> str: return '(path to) {}dataset'.format( 'an existing ' if self._installed else 'a non-existing ' if self._installed is False else 'a ' )
[docs] def __call__(self, value) -> Dataset: ds = Dataset(value) try: # resolve ds.path # noqa: B018 except (ValueError, TypeError) as e: self.raise_for( value, 'cannot create Dataset from {type}: {__caused_by__}', type=type(value), __caused_by__=e, ) if self._installed is False and (ds.worktree or ds.repo): self.raise_for(ds, 'already exists locally') if self._installed and not (ds.worktree or ds.repo): self.raise_for(ds, 'not installed') if self._installed != 'with-id': return ds to_query = ds.worktree or ds.repo if TYPE_CHECKING: assert to_query is not None if 'datalad.dataset.id' not in to_query.config.sources['datalad-branch']: self.raise_for(ds, 'does not have a datalad-id') return ds
def get_gitmanaged_from_pathlike(cls, path): if not isinstance(path, Path): path = Path(path) # the constructor will tell us, if this an instance of the # requested class try: return cls.from_path(path) except ValueError: return None