"""Constraints for command/function parameters"""
from __future__ import annotations
from collections.abc import Container
from itertools import chain
from typing import (
Callable,
Dict,
)
from .base import Constraint
from .basic import (
NoConstraint,
)
from .dataset import DatasetParameter
from .exceptions import (
ConstraintError,
ParametrizationErrors,
CommandParametrizationError,
ParameterConstraintContext,
)
class NoValue:
"""Type to annotate the absence of a value
For example in a list of parameter defaults. In general `None` cannot
be used, as it may be an actual value, hence we use a local, private
type.
"""
pass
[docs]
class EnsureCommandParameterization(Constraint):
"""Base class for `ValidatedInterface` parameter validators
This class can be used as-is, by declaring individual constraints
in the constructor, or it can be subclassed to consolidate all
custom validation-related code for a command in a single place.
Commonly this constraint is used by declaring particular value constraints
for individual parameters as a mapping. Declaring that the ``path``
parameter should receive something that is or can be coerced to
a valid ``Path`` object looks like this::
EnsureCommandParameterization({'path': EnsurePath()})
This class differs from a standard ``Constraint`` implementation,
because its ``__call__()`` method support additional arguments
that are used by the internal ``Interface`` handling code to
control how parameters are validated.
During validation, when no validator for a particular parameter is
declared, any input value is passed on as-is, and otherwise an input is
passed through the validator.
There is one exception to this rule: When a parameter value is identical to
its default value (as declared in the command signature, and communicated
via the ``at_default`` argument of ``__call__()``), this default
value is also passed as-is, unless the respective parameter name is
included in the ``validate_defaults`` constructor argument.
An important consequence of this behavior is that validators need
not cover a default value. For example, a parameter constraint for
``path=None``, where ``None`` is a special value used to indicate an
optional and unset value, but actually only paths are acceptable input
values. can simply use ``EnsurePath()`` and it is not necessary to do
something like ``EnsurePath() | EnsureNone()``.
However, `EnsureCommandParameterization` can also be specifically
instructed to perform validation of defaults for individual parameters, as
described above. A common use case is the auto-discovery of datasets,
where often `None` is the default value of a `dataset` parameter (to make
it optional), and an `EnsureDataset` constraint is used. This constraint
can perform the auto-discovery (with the `None` value indicating that), but
validation of defaults must be turned on for the `dataset` parameter in
order to do that.
A second difference to a common ``Constraint`` implementation is the
ability to perform an "exhaustive validation" on request (via
``__call__(on_error=...)``). In this case, validation is not stopped at the
first discovered violation, but all violations are collected and
communicated by raising a ``CommandParametrizationError`` exception, which
can be inspected by a caller for details on number and nature of all
discovered violations.
Exhaustive validation and joint reporting are only supported for individual
constraint implementations that raise `ConstraintError` exceptions. For
legacy constraints, any raised exception of another type are not caught
and reraised immediately.
"""
def __init__(
self,
param_constraints: Dict[str, Constraint],
*,
validate_defaults: Container[str] | None = None,
joint_constraints:
Dict[ParameterConstraintContext, Callable] | None = None,
tailor_for_dataset: Dict[str, str] | None = None,
):
"""
Parameters
----------
param_constraints: dict
Mapping of parameter names to parameter constraints. On validation
an ``EnsureParameterConstraint`` instance will be created for
each item in this dict.
validate_defaults: container(str), optional
If given, this is a set of parameter names for which the default
rule, to not validate default values, does not apply and
default values shall be passed through a given validator.
joint_constraints: dict, optional
Specification of higher-order constraints considering multiple
parameters together. See the ``joint_validation()`` method for
details. Constraints will be processed in the order in which
they are declared in the mapping. Earlier validators can modify
the parameter values that are eventually passed to validators
executed later.
tailor_for_dataset: dict, optional
If given, this is a mapping of a name of a parameter whose
constraint should be tailored to a particular dataset, to a name
of a parameter providing this dataset. The dataset-providing
parameter constraints will be evaluated first, and the resulting
Dataset instances are used to tailor the constraints that
require a dataset-context. The tailoring is performed if, and
only if, the dataset-providing parameter actually evaluated
to a `Dataset` instance. The non-tailored constraint is used
otherwise.
"""
super().__init__()
self._param_constraints = param_constraints
self._joint_constraints = joint_constraints
self._validate_defaults = validate_defaults or set()
self._tailor_for_dataset = tailor_for_dataset or {}
[docs]
def joint_validation(self, params: Dict, on_error: str) -> Dict:
"""Higher-order validation considering multiple parameters at a time
This method is called with all, individually validated, command
parameters in keyword-argument form in the ``params`` dict argument.
Arbitrary additional validation steps can be performed on the full
set of parameters that may involve raising exceptions on validation
errors, but also value transformation or replacements of individual
parameters based on the setting of others.
The parameter values returned by the method are passed on to the
respective command implementation.
The default implementation iterates over the ``joint_validators``
specification given to the constructor, in order to perform
any number of validations. This is a mapping of a
``ParameterConstraintContext`` instance to a callable implementing a
validation for a particular parameter set.
Example::
_joint_validators_ = {
ParameterConstraintContext(('p1', 'p2'), 'sum'):
MyValidator._check_sum,
}
def _checksum(self, p1, p2):
if (p1 + p2) < 3:
self.raise_for(
dict(p1=p1, p2=p2),
'parameter sum is too large',
)
The callable will be passed the arguments named in the
``ParameterConstraintContext`` as keyword arguments, using the same
names as originally given to ``EnsureCommandParameterization``.
Any raised ``ConstraintError`` is caught and reported together with the
respective ``ParameterConstraintContext``. The violating value reported
in such a ``ConstraintError`` must be a mapping of parameter name to
value, comprising the full parameter set (i.e., keys matching the
``ParameterConstraintContext``). The use of ``self.raise_for()`` is
encouraged.
If the callable anyhow modifies the passed arguments, it must return
them as a kwargs-like mapping. If nothing is modified, it is OK to
return ``None``.
Returns
-------
dict
The returned dict must have a value for each item passed in via
``params``.
on_error: {'raise-early', 'raise-at-end'}
Flag how to handle constraint violation. By default, validation is
stopped at the first error and an exception is raised. When an
exhaustive validation is performed, an eventual exception contains
information on all constraint violations.
Raises
------
ConstraintErrors
With `on_error='raise-at-end'` an implementation can choose to
collect more than one higher-order violation and raise them
as a `ConstraintErrors` exception.
"""
# if we have nothing, do nothing
if not self._joint_constraints:
return params
exceptions = {}
validated = params.copy()
for ctx, validator in self._joint_constraints.items():
# what the validator will produce
res = None
try:
# call the validator with the parameters given in the context
# and only with those, to make sure the context is valid
# and not an underspecification.
# pull the values form `validated` to be able to benefit
# from incremental coercing done in individual checks
res = validator(**{p: validated[p] for p in ctx.parameters})
except ConstraintError as e:
if not isinstance(e.value, dict) \
or set(ctx.parameters) != e.value.keys(): # pragma: no cover
raise RuntimeError(
'on raising a ConstraintError the joint validator '
f'{validator} did not report '
'a mapping of parameter name to (violating) value '
'comprising all constraint context parameters. '
'This is a software defect of the joint validator. '
'Please report!')
exceptions[ctx] = e
if on_error == 'raise-early':
raise CommandParametrizationError(exceptions)
if res is not None:
validated.update(**res)
if exceptions:
raise CommandParametrizationError(exceptions)
return validated
[docs]
def __call__(
self,
kwargs,
at_default=None,
required=None,
on_error='raise-early',
) -> Dict:
"""
Parameters
----------
kwargs: dict
Parameter name (``str``)) to value (any) mapping of the parameter
set.
at_default: set or None
Set of parameter names where the respective values in ``kwargs``
match their respective defaults. This is used for deciding whether
or not to process them with an associated value constraint (see the
``validate_defaults`` constructor argument).
required: set or None
Set of parameter names that are known to be required.
on_error: {'raise-early', 'raise-at-end'}
Flag how to handle constraint violation. By default, validation is
stopped at the first error and an exception is raised. When an
exhaustive validation is performed, an eventual exception contains
information on all constraint violations. Regardless of this mode
more than one error can be reported (in case (future) implementation
perform independent validations in parallel).
Raises
------
CommandParametrizationError
Raised whenever one (or more) ``ConstraintError`` exceptions are
caught during validation. Other exception types are not caught and
pass through.
"""
assert on_error in ('raise-early', 'raise-at-end')
exceptions = {}
missing_args = tuple(a for a in (required or []) if a not in kwargs)
if missing_args:
exceptions[ParameterConstraintContext(missing_args)] = \
ConstraintError(
self,
dict(zip(missing_args, [NoValue()] * len(missing_args))),
'missing required arguments',
)
if on_error == 'raise-early':
raise CommandParametrizationError(exceptions)
# validators to work with. make a copy of the dict to be able to tailor
# them for this run only
# TODO copy likely not needed
param_constraints = self._param_constraints.copy()
# names of parameters we need to process
to_validate = set(kwargs)
# check for any dataset that are required for tailoring other parameters
ds_provider_params = set(self._tailor_for_dataset.values())
# take these out of the set of parameters to validate, because we need
# to process them first.
# the approach is to simply sort them first, but otherwise apply standard
# handling
to_validate.difference_update(ds_provider_params)
# strip all args provider args that have not been provided
ds_provider_params.intersection_update(kwargs)
validated = {}
# process all parameters. starts with those that are needed as
# dependencies for others.
# this dependency-based sorting is very crude for now. it does not
# consider possible dependencies within `ds_provider_params` at all
for argname in chain(ds_provider_params, to_validate):
arg = kwargs[argname]
if at_default \
and argname not in self._validate_defaults \
and argname in at_default:
# do not validate any parameter where the value matches the
# default declared in the signature. Often these are just
# 'do-nothing' settings or have special meaning that need
# not be communicated to a user. Not validating them has
# two consequences:
# - the condition can simply be referred to as "default
# behavior" regardless of complexity
# - a command implementation must always be able to handle
# its own defaults directly, and cannot delegate a
# default value handling to a constraint
#
# we must nevertheless pass any such default value through
# to make/keep them accessible to the general result handling
# code
validated[argname] = arg
continue
# look-up validator for this parameter, if there is none use
# NoConstraint to avoid complex conditionals in the code below
validator = param_constraints.get(argname, NoConstraint())
# do we need to tailor this constraint for a specific dataset?
# only do if instructed AND the respective other parameter
# validated to a Dataset instance. Any such parameter was sorted
# to be validated first in this loop, so the outcome of that is
# already available
tailor_for = self._tailor_for_dataset.get(argname)
if tailor_for and isinstance(validated.get(tailor_for),
DatasetParameter):
validator = validator.for_dataset(validated[tailor_for])
try:
validated[argname] = validator(arg)
# we catch only ConstraintError -- only these exceptions have what
# we need for reporting. If any validator chooses to raise
# something else, we do not handle it here, but let it bubble up.
# it may be an indication of something being wrong with validation
# itself
except ConstraintError as e:
# standard exception type, record and proceed
exceptions[ParameterConstraintContext((argname,))] = e
if on_error == 'raise-early':
raise CommandParametrizationError(exceptions)
except Exception as e:
# non-standard exception type
# we need to achieve uniform CommandParametrizationError
# raising, so let's create a ConstraintError for this
# exception
e = ConstraintError(
validator, arg, '{__caused_by__}',
ctx=dict(__caused_by__=e),
)
exceptions[ParameterConstraintContext((argname,))] = e
if on_error == 'raise-early':
raise CommandParametrizationError(exceptions)
# do not bother with joint validation when the set of expected
# arguments is not complete
expected_for_joint_validation = set()
for jv in self._joint_constraints or []:
expected_for_joint_validation.update(jv.parameters)
if not expected_for_joint_validation.issubset(validated):
raise CommandParametrizationError(exceptions)
try:
# call (subclass) method to perform holistic, cross-parameter
# validation of the full parameterization
final = self.joint_validation(validated, on_error)
# check requirements of .joint_validation(), a particular
# implementation could be faulty, and we want to report this
# problem in the right context
try:
assert final.keys() == validated.keys()
except Exception as e:
raise RuntimeError(
f"{self.__class__.__name__}.joint_validation() "
"did not return items for all passed parameters. "
"Invalid implementation.") from e
# we catch the good stuff first. the underlying implementation is
# providing an exception with detailed context info on possibly
# multiple errors
except ParametrizationErrors as e:
# we can simply suck in the reports, the context keys do not
# overlap, unless the provided validators want that for some
# reason
exceptions.update(e.errors)
if exceptions:
raise CommandParametrizationError(exceptions)
return final