Source code for datalad_next.patches.interface_utils

"""Uniform pre-execution parameter validation for commands

With this patch commands can now opt-in to receive fully validated parameters.
This can substantially simplify the implementation complexity of a command at
the expense of a more elaborate specification of the structural and semantic
properties of the parameters.

For details on implementing validation for individual commands see
:class:`datalad_next.commands.ValidatedInterface`.
"""

import logging
from typing import (
    Callable,
    Dict,
    Generator,
)

from datalad import cfg as dlcfg
from datalad.core.local.resulthooks import (
    get_jsonhooks_from_config,
    match_jsonhook2result,
    run_jsonhook,
)
from datalad.interface.results import known_result_xfms
from datalad.interface.utils import (
    anInterface,
    get_result_filter,
    keep_result,
    render_action_summary,
    xfm_result,
    _process_results,
)
from datalad_next.exceptions import IncompleteResultsError
from . import apply_patch
from datalad_next.constraints import DatasetParameter

# use same logger as -core
lgr = logging.getLogger('datalad.interface.utils')


# this is a replacement for datalad.interface.base.get_allargs_as_kwargs
# it reports which arguments were at their respective defaults
[docs] def get_allargs_as_kwargs(call, args, kwargs): """Generate a kwargs dict from a call signature and ``*args``, ``**kwargs`` Basically resolving the argnames for all positional arguments, and resolving the defaults for all kwargs that are not given in a kwargs dict Returns ------- (dict, set, set) The first return value is a mapping of argument names to their respective values. The second return value in the tuple is a set of argument names for which the effective value is identical to the default declared in the signature of the callable. The third value is a set with names of all mandatory arguments, whether or not they are included in the returned mapping. """ from datalad_next.utils import getargspec argspec = getargspec(call, include_kwonlyargs=True) defaults = argspec.defaults nargs = len(argspec.args) defaults = defaults or [] # ensure it is a list and not None assert (nargs >= len(defaults)) # map any args to their name argmap = list(zip(argspec.args[:len(args)], args)) kwargs_ = dict(argmap) # map defaults of kwargs to their names (update below) default_map = dict(zip(argspec.args[-len(defaults):], defaults)) for k, v in default_map.items(): if k not in kwargs_: kwargs_[k] = v # update with provided kwarg args kwargs_.update(kwargs) # determine which arguments still have values identical to their declared # defaults at_default = set( k for k in kwargs_ if k in default_map and default_map[k] == kwargs_[k] ) # XXX we cannot assert the following, because our own highlevel # API commands support more kwargs than what is discoverable # from their signature... #assert (nargs == len(kwargs_)) return ( # argument name/value mapping kwargs_, # names of arguments that are at their default at_default, # names of mandatory arguments (set for uniformity) set(argspec.args), )
# This function interface is taken from # datalad-core@209bc319db8f34cceae4fee86493bf41927676fd def _execute_command_( *, interface: anInterface, cmd: Callable[..., Generator[Dict, None, None]], cmd_args: tuple, cmd_kwargs: Dict, exec_kwargs: Dict, ) -> Generator[Dict, None, None]: """Internal helper to drive a command execution generator-style Parameters ---------- interface: Interface class of associated with the `cmd` callable cmd: A DataLad command implementation. Typically the `__call__()` of the given `interface`. cmd_args: Positional arguments for `cmd`. cmd_kwargs: Keyword arguments for `cmd`. exec_kwargs: Keyword argument affecting the result handling. See `datalad.interface.common_opts.eval_params`. """ # for result filters and validation # we need to produce a dict with argname/argvalue pairs for all args # incl. defaults and args given as positionals allkwargs, at_default, required_args = get_allargs_as_kwargs( cmd, cmd_args, {**cmd_kwargs, **exec_kwargs}, ) # validate the complete parameterization param_validator = interface.get_parameter_validator() \ if hasattr(interface, 'get_parameter_validator') else None if param_validator is None: lgr.debug( 'Command parameter validation skipped. %s declares no validator', interface) else: lgr.debug('Command parameter validation for %s', interface) validator_kwargs = dict( at_default=at_default, required=required_args or None, ) # make immediate vs exhaustive parameter validation # configurable raise_on_error = dlcfg.get( 'datalad.runtime.parameter-violation', None) if raise_on_error: validator_kwargs['on_error'] = raise_on_error allkwargs = param_validator( allkwargs, **validator_kwargs ) lgr.debug('Command parameter validation ended for %s', interface) # look for potential override of logging behavior result_log_level = dlcfg.get('datalad.log.result-level', 'debug') # resolve string labels for transformers too result_xfm = known_result_xfms.get( allkwargs['result_xfm'], # use verbatim, if not a known label allkwargs['result_xfm']) result_filter = get_result_filter(allkwargs['result_filter']) result_renderer = allkwargs['result_renderer'] if result_renderer == 'tailored' and not hasattr(interface, 'custom_result_renderer'): # a tailored result renderer is requested, but the class # does not provide any, fall back to the generic one result_renderer = 'generic' if result_renderer == 'default': # standardize on the new name 'generic' to avoid more complex # checking below result_renderer = 'generic' # figure out which hooks are relevant for this command execution # query cfg for defaults # .is_installed and .config can be costly, so ensure we do # it only once. See https://github.com/datalad/datalad/issues/3575 dataset_arg = allkwargs.get('dataset', None) ds = None if dataset_arg is not None: from datalad_next.datasets import Dataset if isinstance(dataset_arg, Dataset): ds = dataset_arg elif isinstance(dataset_arg, DatasetParameter): ds = dataset_arg.ds else: try: ds = Dataset(dataset_arg) except ValueError: pass # look for hooks hooks = get_jsonhooks_from_config(ds.config if ds else dlcfg) # end of hooks discovery # flag whether to raise an exception incomplete_results = [] # track what actions were performed how many times action_summary = {} # if a custom summary is to be provided, collect the results # of the command execution results = [] do_custom_result_summary = result_renderer in ( 'tailored', 'generic', 'default') and hasattr( interface, 'custom_result_summary_renderer') pass_summary = do_custom_result_summary \ and getattr(interface, 'custom_result_summary_renderer_pass_summary', None) # process main results for r in _process_results( # execution, call with any arguments from the validated # set that are no result-handling related cmd(**{k: v for k, v in allkwargs.items() if k not in exec_kwargs}), interface, allkwargs['on_failure'], # bookkeeping action_summary, incomplete_results, # communication result_renderer, result_log_level, # let renderers get to see how a command was called allkwargs): for hook, spec in hooks.items(): # run the hooks before we yield the result # this ensures that they are executed before # a potentially wrapper command gets to act # on them if match_jsonhook2result(hook, r, spec['match']): lgr.debug('Result %s matches hook %s', r, hook) # a hook is also a command that yields results # so yield them outside too # users need to pay attention to void infinite # loops, i.e. when a hook yields a result that # triggers that same hook again for hr in run_jsonhook(hook, spec, r, dataset_arg): # apply same logic as for main results, otherwise # any filters would only tackle the primary results # and a mixture of return values could happen if not keep_result(hr, result_filter, **allkwargs): continue hr = xfm_result(hr, result_xfm) # rationale for conditional is a few lines down if hr: yield hr if not keep_result(r, result_filter, **allkwargs): continue r = xfm_result(r, result_xfm) # in case the result_xfm decided to not give us anything # exclude it from the results. There is no particular reason # to do so other than that it was established behavior when # this comment was written. This will not affect any real # result record if r: yield r # collect if summary is desired if do_custom_result_summary: results.append(r) # result summary before a potential exception # custom first if do_custom_result_summary: if pass_summary: summary_args = (results, action_summary) else: summary_args = (results,) interface.custom_result_summary_renderer(*summary_args) elif result_renderer in ('generic', 'default') \ and action_summary \ and sum(sum(s.values()) for s in action_summary.values()) > 1: # give a summary in generic mode, when there was more than one # action performed render_action_summary(action_summary) if incomplete_results: raise IncompleteResultsError( failed=incomplete_results, msg="Command did not complete successfully") # apply patch patch_msg = \ 'Apply datalad-next patch to interface.(utils|base).py:_execute_command_' apply_patch('datalad.interface.base', None, '_execute_command_', _execute_command_, msg=patch_msg)