Source code for datalad_core.commands.dataset

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

from datalad_core.constraints import Constraint
from datalad_core.repo import (
    Repo,
    Worktree,
)



[docs]
class Dataset:
    """Dataset parameter type for DataLad command implementations

    Many DataLad commands operate on datasets, which are typically Git
    repositories. This class provides a type to represent this parameter.

    The main purpose of this class is to relay the semantics of the original
    parameter specification all the way to the implementation of a particular
    command. A dataset may be identified in a variety of ways, including
    auto-discovery based on a working directory. Individual commands may want
    to behave differently depending on how a dataset was identified, or if at
    all.

    A second use case are commands that can work on bare repositories and
    worktrees alike. This class is a single type from which the presence of
    both entities can be discovered without code duplication.

    A third use case are to-be-created datasets for which no repository or
    worktree exist on the file system yet, and consequently the
    :class:`~datalad_core.repo.Repo` and :class:`~datalad_core.repo.Worktree`
    classes cannot be used directly.

    .. note::

       Despite the name, this class is very different from the ``Dataset``
       class in legacy DataLad. This is not a convenience interface
       for DataLad commands that operate on datasets. Instead, it is
       merely a type to be used for implementing individual DataLad commands,
       with uniform semantics for this key parameter.
    """

    def __init__(
        self,
        spec: str | Path | Repo | Worktree | None,
    ):
        """
        A ``spec`` is required, even if the given value is ``None``.
        """
        self._spec = spec
        self._repo: Repo | None = None
        self._worktree: Worktree | None = None
        self._path: Path | None = None

    @property
    def pristine_spec(self) -> str | Path | Repo | Worktree | None:
        """Returns the unaltered specification of the dataset

        This is the exact value that has been given to the constructor.
        """
        return self._spec

    @property
    def path(self) -> Path:
        """Returns the local path associated with any (non-)existing dataset

        If an associated Git repository exists on the file system, the return
        path is the worktree root path for non-bare repositories and their
        worktree, or the repository root path for bare repositories.

        If no repository exists, the path is derived from the given ``spec``
        regardless of a corresponding directory existing on the file system.

        If the spec is ``None``, the returned path will be the process working
        directory.
        """
        if self._path is not None:
            return self._path

        if self._spec is None:
            self._path = Path.cwd()
            return self._path

        # use the (resolved) path of a worktree or repo,
        # if they exist.
        # this gives an absolute path
        self._path = (
            self.worktree.path
            if self.worktree
            else self.repo.path
            if self.repo
            else None
        )

        if self._path is not None:
            return self._path

        # there is nothing on the filesystem, we can only work with the
        # pristine_spec as-is
        ps = self.pristine_spec
        if isinstance(ps, Path):
            self._path = ps
        else:
            if TYPE_CHECKING:
                assert isinstance(ps, Path | str)
            # could be a str-path or some magic label.
            # for now we only support a path specification
            self._path = Path(ps)
        return self._path

    @property
    def repo(self) -> Repo | None:
        """Returns a repository associated with the dataset (if one exists)

        This property is mostly useful for datasets without a worktree.
        For datasets with a worktree it is generally more appropriate
        to access the ``repo`` property of the :attr:`worktree` property.

        Returns ``None`` if there is no associated repository. This may
        happen, if a repository is yet to be created.
        """
        # short cut
        ps = self.pristine_spec

        if self._repo is not None:
            return self._repo

        if self.worktree is not None:
            self._repo = self.worktree.repo
        elif isinstance(ps, Repo):
            self._repo = ps
        elif isinstance(ps, Path):
            self._repo = get_gitmanaged_from_pathlike(Repo, ps)
        elif isinstance(ps, str):
            # could be a str-path or some magic label.
            # for now we only support a path specification
            self._repo = get_gitmanaged_from_pathlike(Repo, ps)
        return self._repo

    @property
    def worktree(self) -> Worktree | None:
        """Returns a worktree associated with the dataset (if one exists)

        Returns ``None`` if there is no associated worktree. This may
        happen, if the dataset is associated with a bare Git repository,
        or if the worktree (and repository) is yet to be created.
        """
        if self._worktree is None:
            ps = self.pristine_spec
            if isinstance(ps, Worktree):
                # we can take this right away
                self._worktree = ps
            elif isinstance(ps, Path):
                self._worktree = get_gitmanaged_from_pathlike(Worktree, ps)
            elif isinstance(ps, str):
                # could be a str-path or some magic label.
                # for now we only support a path specification
                self._worktree = get_gitmanaged_from_pathlike(Worktree, ps)
        return self._worktree

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}({self.pristine_spec!r})'




[docs]
class EnsureDataset(Constraint):
    """Ensure an absent/present `Dataset` from any path or Dataset instance

    Regardless of the nature of the input (`Dataset` instance or local path)
    a resulting instance (if it can be created) is optionally tested for
    absence or presence on the local file system.

    Due to the particular nature of the `Dataset` class (the same instance
    is used for a unique path), this constraint returns a `DatasetParameter`
    rather than a `Dataset` directly. Consuming commands can discover
    the original parameter value via its `original` property, and access a
    `Dataset` instance via its `ds` property.

    In addition to any value representing an explicit path, this constraint
    also recognizes the special value `None`. This instructs the implementation
    to find a dataset that contains the process working directory (PWD).
    Such a dataset need not have its root at PWD, but could be located in
    any parent directory too. If no such dataset can be found, PWD is used
    directly. Tests for ``installed`` are performed in the same way as with
    an explicit dataset location argument. If `None` is given and
    ``installed=True``, but no dataset is found, an exception is raised
    (this is the behavior of the ``required_dataset()`` function in
    the DataLad core package). With ``installed=False`` no exception is
    raised and a dataset instances matching PWD is returned.
    """

    def __init__(self, *, installed: bool | str | None = None):
        """
        Parameters
        ----------
        installed: bool, optional
          If given, a dataset will be verified to be installed or not.
          Otherwise the installation-state will not be inspected.
        """
        self._installed = installed
        super().__init__()

    @property
    def input_synopsis(self) -> str:
        return '(path to) {}dataset'.format(
            'an existing '
            if self._installed
            else 'a non-existing '
            if self._installed is False
            else 'a '
        )


[docs]
    def __call__(self, value) -> Dataset:
        ds = Dataset(value)
        try:
            # resolve
            ds.path  # noqa: B018
        except (ValueError, TypeError) as e:
            self.raise_for(
                value,
                'cannot create Dataset from {type}: {__caused_by__}',
                type=type(value),
                __caused_by__=e,
            )
        if self._installed is False and (ds.worktree or ds.repo):
            self.raise_for(ds, 'already exists locally')
        if self._installed and not (ds.worktree or ds.repo):
            self.raise_for(ds, 'not installed')
        if self._installed != 'with-id':
            return ds

        to_query = ds.worktree or ds.repo
        if TYPE_CHECKING:
            assert to_query is not None
        if 'datalad.dataset.id' not in to_query.config.sources['datalad-branch']:
            self.raise_for(ds, 'does not have a datalad-id')
        return ds




def get_gitmanaged_from_pathlike(cls, path):
    if not isinstance(path, Path):
        path = Path(path)
    # the constructor will tell us, if this an instance of the
    # requested class
    try:
        return cls.from_path(path)
    except ValueError:
        return None