Source code for datalad_next.constraints.parameter

"""Constraints for command/function parameters"""

from __future__ import annotations

from collections.abc import Container
from itertools import chain
from typing import (
    Callable,
    Dict,
)

from .base import Constraint
from .basic import (
    NoConstraint,
)
from .dataset import DatasetParameter
from .exceptions import (
    ConstraintError,
    ParametrizationErrors,
    CommandParametrizationError,
    ParameterConstraintContext,
)


class NoValue:
    """Type to annotate the absence of a value

    For example in a list of parameter defaults. In general `None` cannot
    be used, as it may be an actual value, hence we use a local, private
    type.
    """
    pass



[docs]
class EnsureCommandParameterization(Constraint):
    """Base class for `ValidatedInterface` parameter validators

    This class can be used as-is, by declaring individual constraints
    in the constructor, or it can be subclassed to consolidate all
    custom validation-related code for a command in a single place.

    Commonly this constraint is used by declaring particular value constraints
    for individual parameters as a mapping. Declaring that the ``path``
    parameter should receive something that is or can be coerced to
    a valid ``Path`` object looks like this::

      EnsureCommandParameterization({'path': EnsurePath()})

    This class differs from a standard ``Constraint`` implementation,
    because its ``__call__()`` method support additional arguments
    that are used by the internal ``Interface`` handling code to
    control how parameters are validated.

    During validation, when no validator for a particular parameter is
    declared, any input value is passed on as-is, and otherwise an input is
    passed through the validator.

    There is one exception to this rule: When a parameter value is identical to
    its default value (as declared in the command signature, and communicated
    via the ``at_default`` argument of ``__call__()``), this default
    value is also passed as-is, unless the respective parameter name is
    included in the ``validate_defaults`` constructor argument.

    An important consequence of this behavior is that validators need
    not cover a default value. For example, a parameter constraint for
    ``path=None``, where ``None`` is a special value used to indicate an
    optional and unset value, but actually only paths are acceptable input
    values. can simply use ``EnsurePath()`` and it is not necessary to do
    something like ``EnsurePath() | EnsureNone()``.

    However, `EnsureCommandParameterization` can also be specifically
    instructed to perform validation of defaults for individual parameters, as
    described above.  A common use case is the auto-discovery of datasets,
    where often `None` is the default value of a `dataset` parameter (to make
    it optional), and an `EnsureDataset` constraint is used. This constraint
    can perform the auto-discovery (with the `None` value indicating that), but
    validation of defaults must be turned on for the `dataset` parameter in
    order to do that.

    A second difference to a common ``Constraint`` implementation is the
    ability to perform an "exhaustive validation" on request (via
    ``__call__(on_error=...)``). In this case, validation is not stopped at the
    first discovered violation, but all violations are collected and
    communicated by raising a ``CommandParametrizationError`` exception, which
    can be inspected by a caller for details on number and nature of all
    discovered violations.

    Exhaustive validation and joint reporting are only supported for individual
    constraint implementations that raise `ConstraintError` exceptions. For
    legacy constraints, any raised exception of another type are not caught
    and reraised immediately.
    """
    def __init__(
        self,
        param_constraints: Dict[str, Constraint],
        *,
        validate_defaults: Container[str] | None = None,
        joint_constraints:
            Dict[ParameterConstraintContext, Callable] | None = None,
        tailor_for_dataset: Dict[str, str] | None = None,
    ):
        """
        Parameters
        ----------
        param_constraints: dict
          Mapping of parameter names to parameter constraints. On validation
          an ``EnsureParameterConstraint`` instance will be created for
          each item in this dict.
        validate_defaults: container(str), optional
          If given, this is a set of parameter names for which the default
          rule, to not validate default values, does not apply and
          default values shall be passed through a given validator.
        joint_constraints: dict, optional
          Specification of higher-order constraints considering multiple
          parameters together. See the ``joint_validation()`` method for
          details. Constraints will be processed in the order in which
          they are declared in the mapping. Earlier validators can modify
          the parameter values that are eventually passed to validators
          executed later.
        tailor_for_dataset: dict, optional
          If given, this is a mapping of a name of a parameter whose
          constraint should be tailored to a particular dataset, to a name
          of a parameter providing this dataset. The dataset-providing
          parameter constraints will be evaluated first, and the resulting
          Dataset instances are used to tailor the constraints that
          require a dataset-context. The tailoring is performed if, and
          only if, the dataset-providing parameter actually evaluated
          to a `Dataset` instance. The non-tailored constraint is used
          otherwise.
        """
        super().__init__()
        self._param_constraints = param_constraints
        self._joint_constraints = joint_constraints
        self._validate_defaults = validate_defaults or set()
        self._tailor_for_dataset = tailor_for_dataset or {}


[docs]
    def joint_validation(self, params: Dict, on_error: str) -> Dict:
        """Higher-order validation considering multiple parameters at a time

        This method is called with all, individually validated, command
        parameters in keyword-argument form in the ``params`` dict argument.

        Arbitrary additional validation steps can be performed on the full
        set of parameters that may involve raising exceptions on validation
        errors, but also value transformation or replacements of individual
        parameters based on the setting of others.

        The parameter values returned by the method are passed on to the
        respective command implementation.

        The default implementation iterates over the ``joint_validators``
        specification given to the constructor, in order to perform
        any number of validations. This is a mapping of a
        ``ParameterConstraintContext`` instance to a callable implementing a
        validation for a particular parameter set.

        Example::

          _joint_validators_ = {
              ParameterConstraintContext(('p1', 'p2'), 'sum'):
                  MyValidator._check_sum,
          }

          def _checksum(self, p1, p2):
              if (p1 + p2) < 3:
                  self.raise_for(
                     dict(p1=p1, p2=p2),
                     'parameter sum is too large',
                  )

        The callable will be passed the arguments named in the
        ``ParameterConstraintContext`` as keyword arguments, using the same
        names as originally given to ``EnsureCommandParameterization``.

        Any raised ``ConstraintError`` is caught and reported together with the
        respective ``ParameterConstraintContext``. The violating value reported
        in such a ``ConstraintError`` must be a mapping of parameter name to
        value, comprising the full parameter set (i.e., keys matching the
        ``ParameterConstraintContext``).  The use of ``self.raise_for()`` is
        encouraged.

        If the callable anyhow modifies the passed arguments, it must return
        them as a kwargs-like mapping.  If nothing is modified, it is OK to
        return ``None``.

        Returns
        -------
        dict
          The returned dict must have a value for each item passed in via
          ``params``.
        on_error: {'raise-early', 'raise-at-end'}
          Flag how to handle constraint violation. By default, validation is
          stopped at the first error and an exception is raised. When an
          exhaustive validation is performed, an eventual exception contains
          information on all constraint violations.

        Raises
        ------
        ConstraintErrors
          With `on_error='raise-at-end'` an implementation can choose to
          collect more than one higher-order violation and raise them
          as a `ConstraintErrors` exception.
        """
        # if we have nothing, do nothing
        if not self._joint_constraints:
            return params

        exceptions = {}
        validated = params.copy()

        for ctx, validator in self._joint_constraints.items():
            # what the validator will produce
            res = None
            try:
                # call the validator with the parameters given in the context
                # and only with those, to make sure the context is valid
                # and not an underspecification.
                # pull the values form `validated` to be able to benefit
                # from incremental coercing done in individual checks
                res = validator(**{p: validated[p] for p in ctx.parameters})
            except ConstraintError as e:
                if not isinstance(e.value, dict) \
                        or set(ctx.parameters) != e.value.keys():  # pragma: no cover
                    raise RuntimeError(
                        'on raising a ConstraintError the joint validator '
                        f'{validator} did not report '
                        'a mapping of parameter name to (violating) value '
                        'comprising all constraint context parameters. '
                        'This is a software defect of the joint validator. '
                        'Please report!')
                exceptions[ctx] = e
                if on_error == 'raise-early':
                    raise CommandParametrizationError(exceptions)
            if res is not None:
                validated.update(**res)

        if exceptions:
            raise CommandParametrizationError(exceptions)

        return validated



[docs]
    def __call__(
        self,
        kwargs,
        at_default=None,
        required=None,
        on_error='raise-early',
    ) -> Dict:
        """
        Parameters
        ----------
        kwargs: dict
          Parameter name (``str``)) to value (any) mapping of the parameter
          set.
        at_default: set or None
          Set of parameter names where the respective values in ``kwargs``
          match their respective defaults. This is used for deciding whether
          or not to process them with an associated value constraint (see the
          ``validate_defaults`` constructor argument).
        required: set or None
          Set of parameter names that are known to be required.
        on_error: {'raise-early', 'raise-at-end'}
          Flag how to handle constraint violation. By default, validation is
          stopped at the first error and an exception is raised. When an
          exhaustive validation is performed, an eventual exception contains
          information on all constraint violations. Regardless of this mode
          more than one error can be reported (in case (future) implementation
          perform independent validations in parallel).

        Raises
        ------
        CommandParametrizationError
          Raised whenever one (or more) ``ConstraintError`` exceptions are
          caught during validation. Other exception types are not caught and
          pass through.
        """
        assert on_error in ('raise-early', 'raise-at-end')

        exceptions = {}
        missing_args = tuple(a for a in (required or []) if a not in kwargs)
        if missing_args:
            exceptions[ParameterConstraintContext(missing_args)] = \
                ConstraintError(
                    self,
                    dict(zip(missing_args, [NoValue()] * len(missing_args))),
                    'missing required arguments',
                )
            if on_error == 'raise-early':
                raise CommandParametrizationError(exceptions)

        # validators to work with. make a copy of the dict to be able to tailor
        # them for this run only
        # TODO copy likely not needed
        param_constraints = self._param_constraints.copy()

        # names of parameters we need to process
        to_validate = set(kwargs)
        # check for any dataset that are required for tailoring other parameters
        ds_provider_params = set(self._tailor_for_dataset.values())
        # take these out of the set of parameters to validate, because we need
        # to process them first.
        # the approach is to simply sort them first, but otherwise apply standard
        # handling
        to_validate.difference_update(ds_provider_params)
        # strip all args provider args that have not been provided
        ds_provider_params.intersection_update(kwargs)

        validated = {}
        # process all parameters. starts with those that are needed as
        # dependencies for others.
        # this dependency-based sorting is very crude for now. it does not
        # consider possible dependencies within `ds_provider_params` at all
        for argname in chain(ds_provider_params, to_validate):
            arg = kwargs[argname]
            if at_default \
                    and argname not in self._validate_defaults \
                    and argname in at_default:
                # do not validate any parameter where the value matches the
                # default declared in the signature. Often these are just
                # 'do-nothing' settings or have special meaning that need
                # not be communicated to a user. Not validating them has
                # two consequences:
                # - the condition can simply be referred to as "default
                #   behavior" regardless of complexity
                # - a command implementation must always be able to handle
                #   its own defaults directly, and cannot delegate a
                #   default value handling to a constraint
                #
                # we must nevertheless pass any such default value through
                # to make/keep them accessible to the general result handling
                # code
                validated[argname] = arg
                continue

            # look-up validator for this parameter, if there is none use
            # NoConstraint to avoid complex conditionals in the code below
            validator = param_constraints.get(argname, NoConstraint())

            # do we need to tailor this constraint for a specific dataset?
            # only do if instructed AND the respective other parameter
            # validated to a Dataset instance. Any such parameter was sorted
            # to be validated first in this loop, so the outcome of that is
            # already available
            tailor_for = self._tailor_for_dataset.get(argname)
            if tailor_for and isinstance(validated.get(tailor_for),
                                         DatasetParameter):
                validator = validator.for_dataset(validated[tailor_for])

            try:
                validated[argname] = validator(arg)
            # we catch only ConstraintError -- only these exceptions have what
            # we need for reporting. If any validator chooses to raise
            # something else, we do not handle it here, but let it bubble up.
            # it may be an indication of something being wrong with validation
            # itself
            except ConstraintError as e:
                # standard exception type, record and proceed
                exceptions[ParameterConstraintContext((argname,))] = e
                if on_error == 'raise-early':
                    raise CommandParametrizationError(exceptions)
            except Exception as e:
                # non-standard exception type
                # we need to achieve uniform CommandParametrizationError
                # raising, so let's create a ConstraintError for this
                # exception
                e = ConstraintError(
                    validator, arg, '{__caused_by__}',
                    ctx=dict(__caused_by__=e),
                )
                exceptions[ParameterConstraintContext((argname,))] = e
                if on_error == 'raise-early':
                    raise CommandParametrizationError(exceptions)

        # do not bother with joint validation when the set of expected
        # arguments is not complete
        expected_for_joint_validation = set()
        for jv in self._joint_constraints or []:
            expected_for_joint_validation.update(jv.parameters)

        if not expected_for_joint_validation.issubset(validated):
            raise CommandParametrizationError(exceptions)

        try:
            # call (subclass) method to perform holistic, cross-parameter
            # validation of the full parameterization
            final = self.joint_validation(validated, on_error)
            # check requirements of .joint_validation(), a particular
            # implementation could be faulty, and we want to report this
            # problem in the right context
            try:
                assert final.keys() == validated.keys()
            except Exception as e:
                raise RuntimeError(
                    f"{self.__class__.__name__}.joint_validation() "
                    "did not return items for all passed parameters. "
                    "Invalid implementation.") from e
        # we catch the good stuff first. the underlying implementation is
        # providing an exception with detailed context info on possibly
        # multiple errors
        except ParametrizationErrors as e:
            # we can simply suck in the reports, the context keys do not
            # overlap, unless the provided validators want that for some
            # reason
            exceptions.update(e.errors)

        if exceptions:
            raise CommandParametrizationError(exceptions)

        return final