Source code for datalad_next.constraints.basic

# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the datalad package for the
#   copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Basic constraints for declaring essential data types, values, and ranges"""

from __future__ import annotations

__docformat__ = 'restructuredtext'

from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed
from pathlib import Path
import re
from typing import Callable

from datalad_next.datasets import resolve_path

from .base import (
    Constraint,
    DatasetParameter,
)
from .utils import _type_str


[docs] class NoConstraint(Constraint): """A constraint that represents no constraints"""
[docs] def short_description(self): return ''
def __call__(self, value): return value
[docs] class EnsureValue(Constraint): """Ensure an input is a particular value""" def __init__(self, value): super().__init__() self._target_value = value def __call__(self, value): if value == self._target_value: return value else: self.raise_for( value, "must be {target_value!r}", target_value=self._target_value, )
[docs] def short_description(self): return f'{self._target_value!r}'
[docs] def long_description(self): return f'value must be {self.short_description()}'
[docs] class EnsureDType(Constraint): """Ensure that an input (or several inputs) are of a particular data type. Examples: >>> c = EnsureDType(float) >>> type(c(8)) # doctest: +SKIP float >>> import numpy as np # doctest: +SKIP >>> c = EnsureDType(np.float64) # doctest: +SKIP >>> type(c(8)) # doctest: +SKIP numpy.float64 """ def __init__(self, dtype): """ Parameters ---------- dtype : functor """ self._dtype = dtype def __call__(self, value): try: return self._dtype(value) except Exception as e: self.raise_for( value, str(e), )
[docs] def short_description(self): return _type_str(self._dtype)
[docs] def long_description(self): return "value must be convertible to type '%s'" % self.short_description()
[docs] class EnsureInt(EnsureDType): """Ensure that an input (or several inputs) are of a data type 'int'. """ def __init__(self): """Initializes EnsureDType with int""" EnsureDType.__init__(self, int)
[docs] class EnsureFloat(EnsureDType): """Ensure that an input (or several inputs) are of a data type 'float'. """ def __init__(self): """Initializes EnsureDType with float""" EnsureDType.__init__(self, float)
[docs] class EnsureBool(Constraint): """Ensure that an input is a bool. A couple of literal labels are supported, such as: False: '0', 'no', 'off', 'disable', 'false' True: '1', 'yes', 'on', 'enable', 'true' """ def __call__(self, value): if isinstance(value, bool): return value elif isinstance(value, (bytes, str)): value = value.lower() if value in ('0', 'no', 'off', 'disable', 'false'): return False elif value in ('1', 'yes', 'on', 'enable', 'true'): return True self.raise_for(value, "must be convertible to boolean")
[docs] def long_description(self): return 'value must be convertible to type bool'
[docs] def short_description(self): return 'bool'
[docs] class EnsureStr(Constraint): """Ensure an input is a string of some min. length and matching a pattern Pattern matching is optional and minimum length is zero (empty string is OK). No type conversion is performed. """ def __init__(self, min_len: int = 0, match: str | None = None): """ Parameters ---------- min_len: int, optional Minimal length for a string. match: Regular expression used to match any input value against. Values not matching the expression will cause a `ValueError` to be raised. """ assert min_len >= 0 self._min_len = min_len self._match = match super().__init__() if match is not None: self._match = re.compile(match) def __call__(self, value) -> str: if not isinstance(value, (bytes, str)): # do not perform a blind conversion ala str(), as almost # anything can be converted and the result is most likely # unintended self.raise_for(value, "must be a string") if len(value) < self._min_len: self.raise_for(value, "must have minimum length {len}", len=self._min_len) if self._match: if not self._match.match(value): self.raise_for( value, 'does not match {pattern}', pattern=self._match.pattern, ) return value
[docs] def long_description(self): return 'must be a string{}'.format( f' and match {self._match.pattern}' if self._match else '', )
[docs] def short_description(self): return 'str{}'.format( f'({self._match.pattern})' if self._match else '', )
# TODO possibly consolidate on EnsureStr from -gooey, which can take # a regex that could perform this. CON: documentation less clear. # But if custom documentation will be supported, it might get even # more clear nevertheless
[docs] class EnsureStrPrefix(EnsureStr): """Ensure an input is a string that starts with a given prefix. """ def __init__(self, prefix): """ Parameters ---------- prefix : str Mandatory prefix. """ self._prefix = prefix super().__init__() def __call__(self, value): super().__call__(value) if not value.startswith(self._prefix): self.raise_for( value, "does not start with {prefix!r}", prefix=self._prefix, ) return value
[docs] def long_description(self): return "value must start with '{}'".format(self._prefix)
[docs] def short_description(self): return '{}...'.format(self._prefix)
[docs] class EnsureNone(EnsureValue): """Ensure an input is of value `None`""" def __init__(self): super().__init__(None)
[docs] class EnsureCallable(Constraint): """Ensure an input is a callable object""" def __call__(self, value): if hasattr(value, '__call__'): return value else: self.raise_for(value, "must be a callable")
[docs] def short_description(self): return 'callable'
[docs] def long_description(self): return 'value must be a callable'
[docs] class EnsureChoice(Constraint): """Ensure an input is element of a set of possible values""" def __init__(self, *values): """ Parameters ---------- *values Possible accepted values. """ self._allowed = values super(EnsureChoice, self).__init__() def __call__(self, value): if value not in self._allowed: self.raise_for( value, "is not one of {allowed}", allowed=self._allowed, ) return value
[docs] def long_description(self): return 'value must be one of [CMD: %s CMD][PY: %s PY]' % ( str(tuple(i for i in self._allowed if i is not None)), str(self._allowed) )
[docs] def short_description(self): return '{%s}' % ', '.join([repr(c) for c in self._allowed])
def __str__(self): return f"one of {self.short_description()}"
[docs] class EnsureKeyChoice(EnsureChoice): """Ensure value under a key in an input is in a set of possible values""" def __init__(self, key, values): """ Parameters ---------- key : str The to-be-tested values are looked up under the given key in a dict-like input object. values : tuple Possible accepted values. """ self._key = key super(EnsureKeyChoice, self).__init__(*values) def __call__(self, value): if self._key not in value: self.raise_for(value, "must be dict-like") super(EnsureKeyChoice, self).__call__(value[self._key]) return value
[docs] def long_description(self): return "value in '%s' must be one of %s" % (self._key, str(self._allowed),)
[docs] def short_description(self): return '%s:{%s}' % (self._key, ', '.join([repr(c) for c in self._allowed]))
[docs] class EnsureRange(Constraint): """Ensure an input is within a particular range No type checks are performed. """ def __init__(self, min=None, max=None): """ Parameters ---------- min Minimal value to be accepted in the range max Maximal value to be accepted in the range """ self._min = min self._max = max if self._min is None and self._max is None: raise ValueError('No range given, min == max == None') super(EnsureRange, self).__init__() def __call__(self, value): if self._min is not None: if self._max is not None: if value < self._min or value > self._max: self.raise_for( value, f"must be in range from {self._min!r} to {self._max!r}" ) else: if value < self._min: self.raise_for(value, f"must be at least {self._min!r}") if self._max is not None: if value > self._max: self.raise_for(value, f"must be at most {self._max!r}") return value
[docs] def long_description(self): return self.short_description()
[docs] def short_description(self): if self._max is None: return f'not less than {self._min!r}' elif self._min is None: return f'not greater than {self._max!r}' else: # it is inclusive, but spelling it out would be wordy return f'in range from {self._min!r} to {self._max!r}'
[docs] class EnsurePath(Constraint): """Ensures input is convertible to a (platform) path and returns a `Path` Optionally, the path can be tested for existence and whether it is absolute or relative. """ def __init__(self, *, path_type: type = Path, is_format: str | None = None, lexists: bool | None = None, is_mode: Callable | None = None, ref: Path | None = None, ref_is: str = 'parent-or-same-as', dsarg: DatasetParameter | None = None): """ Parameters ---------- path_type: Specific pathlib type to convert the input to. The default is `Path`, i.e. the platform's path type. Not all pathlib Path types can be instantiated on all platforms, and not all checks are possible with all path types. is_format: {'absolute', 'relative'} or None If not None, the path is tested whether it matches being relative or absolute. lexists: If not None, the path is tested to confirmed exists or not. A symlink need not point to an existing path to fulfil the "exists" condition. is_mode: If set, this callable will receive the path's `.lstat().st_mode`, and an exception is raised, if the return value does not evaluate to `True`. Typical callables for this feature are provided by the `stat` module, e.g. `S_ISDIR()` ref: If set, defines a reference Path any given path is compared to. The comparison operation is given by `ref_is`. ref_is: {'parent-or-same-as', 'parent-of'} Comparison operation to perform when `ref` is given. dsarg: DatasetParameter, optional If given, incoming paths are resolved in the following fashion: If, and only if, the original "dataset" parameter was a ``Dataset`` object instance, relative paths are interpreted as relative to the given dataset. In all other cases, relative paths are treated as relative to the current working directory. """ super().__init__() self._path_type = path_type self._is_format = is_format self._lexists = lexists self._is_mode = is_mode self._ref = ref self._ref_is = ref_is self._dsarg = dsarg assert self._ref_is in ('parent-or-same-as', 'parent-of'), \ 'Unrecognized `ref_is` operation label' def __call__(self, value): # turn it into the target type to make everything below # more straightforward path = self._path_type(value) # we are testing the format first, because resolve_path() # will always turn things into absolute paths if self._is_format is not None: is_abs = path.is_absolute() if self._is_format == 'absolute' and not is_abs: self.raise_for(path, 'is not an absolute path') elif self._is_format == 'relative' and is_abs: self.raise_for(path, 'is not a relative path') # resolve relative paths against a dataset, if given if self._dsarg: path = resolve_path( path, self._dsarg.original, self._dsarg.ds) mode = None if self._lexists is not None or self._is_mode is not None: try: mode = path.lstat().st_mode except FileNotFoundError: # this is fine, handled below pass if self._lexists is not None: if self._lexists and mode is None: self.raise_for(path, 'does not exist') elif not self._lexists and mode is not None: self.raise_for(path, 'does (already) exist') if self._is_mode is not None: if not self._is_mode(mode): self.raise_for(path, 'does not match desired mode') if self._ref: ok = True if self._ref_is == 'parent-or-same-as': ok = (path == self._ref or self._ref in path.parents) elif self._ref_is == 'parent-of': ok = self._ref in path.parents else: # pragma: nocover # this code cannot be reached with normal usage. # it is prevented by an assertion in __init__() raise RuntimeError('Unknown `ref_is` operation label') if not ok: self.raise_for( path, '{ref} is not {ref_is} {path}', ref=self._ref, ref_is=self._ref_is, ) return path
[docs] def for_dataset(self, dataset: DatasetParameter) -> Constraint: """Return an similarly parametrized variant that resolves paths against a given dataset (argument) """ return self.__class__( path_type=self._path_type, is_format=self._is_format, lexists=self._lexists, is_mode=self._is_mode, ref=self._ref, ref_is=self._ref_is, dsarg=dataset, )
[docs] def short_description(self): return '{}{}path{}'.format( 'existing ' if self._lexists else 'non-existing ' if self._lexists else '', 'absolute ' if self._is_format == 'absolute' else 'relative' if self._is_format == 'relative' else '', f' that is {self._ref_is} {self._ref}' if self._ref else '', )
[docs] class EnsureHashAlgorithm(EnsureChoice): """Ensure an input matches a name of a ``hashlib`` algorithm Specifically the item must be in the ``algorithms_guaranteed`` collection. """ def __init__(self): super().__init__(*hash_algorithms_guaranteed)