Source code for datalad_core.config.manager

from __future__ import annotations

from contextlib import contextmanager
from copy import copy
from typing import (
    TYPE_CHECKING,
    Any,
)

if TYPE_CHECKING:
    from collections.abc import (
        Generator,
        Hashable,
    )

    from datasalad.settings import Source

from datasalad.settings import (
    Setting,
    Settings,
    UnsetValue,
)

from datalad_core.config.defaults import (
    ImplementationDefaults,
    get_defaults,
)
from datalad_core.config.git import (
    GlobalGitConfig,
    SystemGitConfig,
)
from datalad_core.config.gitenv import GitEnvironment
from datalad_core.config.item import ConfigItem


[docs] class ConfigManager(Settings): """Multi-source (scope) configuration manager By default (``source=None``), a manager utilizes a fixed (order) collection of sources (from highest to lowest precedence): - ``git-command``: :class:`GitEnvironment` - ``git-global``: :class:`GlobalGitConfig` - ``git-system``: :class:`SystemGitConfig` - ``defaults``: :class:`ImplementationDefaults` If a different source collection is desired, it can be given as ``sources``. The ``default`` source will be added to the given sources. """ def __init__( self, defaults: ImplementationDefaults, # TODO: should be Mapping[str, Source] | None sources: dict[str, Source] | None = None, ): if sources is None: sources = { # call this one 'command', because that is what Git calls the # scope of items pulled from the process environment 'git-command': GitEnvironment(), 'git-global': GlobalGitConfig(), 'git-system': SystemGitConfig(), } sources['defaults'] = defaults super().__init__(sources) # set .item_type for all sources, such that any plain default # comes back wrapped in the same item type for s in self.sources.values(): s.item_type = ConfigItem # explicit collection of source keys that will be considered for # get_from_protected_sources() # we are not doing self.sources.keys(), because that would auto-add # any new sources to this list, and we require an explicit decision, # because this is choice with security impact (think source that # pull information from places that the executing user is not # trusting) self._protected_source_keys: set[str] = { # controlled by the executing user 'git-command', # owned by the executing user 'git-global', # controlled by super user (implicit trust) 'git-system', # there is implicit trust, any code executed # in this process can, in principle, alter # any configuration 'defaults', } def __str__(self) -> str: return ( f'{self.__class__.__name__}(' # aim for a compact display, leave our sources with no content f'{"<<".join(str(s) for s in self.sources.values() if len(s))}' ')' ) def __repr__(self) -> str: # just like __str__, but does not trim sources without content return ( f'{self.__class__.__name__}(' f'{"<<".join(str(s) for s in self.sources.values())}' ')' )
[docs] @contextmanager def overrides( self, overrides: dict[Hashable, Setting | tuple[Setting, ...]], ) -> Generator[ConfigManager]: """Context manager to temporarily set configuration overrides Internally, these overrides are posted to the ``git-command`` scope, hence affect the process environment and newly spawned subprocesses. """ gitcmdsrc = self.sources['git-command'] with gitcmdsrc.overrides(overrides): yield self
# TODO: migrate this implementation to datasalad.Settings
[docs] def get(self, key: Hashable, default: Any = None) -> Setting: """Return a particular setting identified by its key, or a default The composition of the returned setting follows the same rules as the access via ``__getitem__``. However, if the effective ``pristine_value`` retrieved from any existing configuration is ``UnsetValue``, it is update with the given default. When the ``default`` value is not given as an instance of :class:`~datasalad.settings.Setting`, it will be automatically wrapped into the one given by :attr:`Settings.item_type`. """ try: val = self[key] except KeyError: return self._get_default_setting(default) if val.pristine_value is UnsetValue: val.update(self._get_default_setting(default)) return val
# TODO: a generic variant could migrate to datasalad.Settings # (from select sources)
[docs] def get_from_protected_sources( self, key: Hashable, default: Any = None, ) -> Setting: """Return a particular setting from protected sources only This methods behaves like :meth`get`, but it considering only sources that have been declared as ``protected``. Thereby, this method can be used to query for security-related configuration from a limited set of source. By default, protected sources are: - ``git-command`` - ``git-global`` - ``git-system`` - ``defaults`` Additional sources can be declared ``protected`` via :meth:`declare_source_protected`. This should only be to for sources where the executing user (or an entity with elevated privileges) has exclusive write permissions, to prevent undesired and unvetted configuration changes. """ # this will become the return item item: Setting | None = None # now go from the back # - start with the first Setting class instance we get # - update a copy of this particular instance with all information # from sources with higher priority and flatten it across # sources for sk in reversed(self._sources): if sk not in self._protected_source_keys: continue s = self._sources[sk] update_item = None try: update_item = s[key] except KeyError: # source does not have it, proceed continue if item is None: # in-place modification and destroy the original # item's integrity item = copy(update_item) continue # we run the update() method of the first item we ever found. # this will practically make the type produced by the lowest # precedence source define the behavior. This is typically # some kind of implementation default item.update(update_item) if item is None: item = self._get_default_setting(default) if item.pristine_value is UnsetValue: item.update(self._get_default_setting(default)) return item
[docs] def declare_source_protected(self, key: str): """Add the source identified by `key` to the set of "protected" sources This qualifies them for being queried by :meth:`get_from_protected_sources`. Declaring sources as "protected" has to be done with care. :meth:`get_from_protected_sources` is queried for security-related decision making. It is important to not declare sources is protected that are not controlled by the executing user, or that the user already has to trust. An example of sources that should NOT be declared protected are any that update (automatically) from information not set explicitly by the executing user alone (requests to web service, dataset content possibly merged unverfied from contributors, etc). """ if key not in self._sources: msg = f'{key} is not a known configuration source' raise ValueError(msg) self._protected_source_keys.add(key)
__the_manager: ConfigManager | None = None
[docs] def get_manager() -> ConfigManager: """Return a a process-unique, global `ConfigManager` instance This function can be used obtain a :class:`ConfigManager` instance for query and manipulation of settings. """ global __the_manager # noqa: PLW0603 if __the_manager is None: __the_manager = ConfigManager(get_defaults()) return __the_manager