Source code for datalad_next.constraints.formats

"""Constraints for particular formats or protocols"""

# allow for |-type UnionType declarations
from __future__ import annotations

from json import loads
import re
from urllib.parse import (
    urlparse,
    ParseResult,
)

from .base import Constraint


[docs] class EnsureJSON(Constraint): """Ensures that string is JSON formatted and can be deserialized. """ def __init__(self): super().__init__() def __call__(self, value: str): try: return loads(value) except Exception as e: self.raise_for( value, str(e), )
[docs] def short_description(self): return 'JSON'
[docs] class EnsureURL(Constraint): """Ensures that a string is a valid URL with a select set of components and/or: - does not contain certain components - matches a particular regular expression Given that a large variety of strings are also a valid URL, a typical use of this constraint would involve using a `required=['scheme']` setting. All URL attribute names supported by `urllib.parse.urlparse()` are also supported here: scheme, netloc, path, params, query, fragment, username, password, hostname, port. .. seealso:: https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse """ def __init__( self, required: list | None = None, forbidden: list | None = None, match: str | None = None, ): """ Parameters ---------- required: list, optional List of any URL component names as recognized by ``urlparse()``, such as ``scheme``, ``netloc``, ``path``, ``params``, ``query``, ``fragment``, ``username``, ``password``, ``hostname``, ``port`` forbidden: list, optional Like ``required`` but names URL components that must not be present match: str, optional Regular expression that the URL must match """ self._required = required self._forbidden = forbidden self._match_exp = re.compile(match) if match else None super().__init__() def __call__(self, value: str) -> str: self._validate_parsed(value) # return the str here, see EnsureParsedURL for an alternative return value def _validate_parsed(self, value: str) -> ParseResult: if not isinstance(value, str): self.raise_for(value, 'not a string') if self._match_exp and not self._match_exp.match(value): self.raise_for( value, 'does not match expression {match_expression!r}', match_expression=self._match_exp.pattern, ) parsed = urlparse(value, scheme='', allow_fragments=True) for r in (self._required or []): if not getattr(parsed, r, None): self.raise_for( value, 'URL is missing {component!r} component', component=r, ) for f in (self._forbidden or []): if getattr(parsed, f, None): self.raise_for( value, 'URL has forbidden {component!r} component', component=f, ) return parsed
[docs] def short_description(self): return 'URL{}{}{}{}'.format( f' with required {self._required}' if self._required else '', ' and' if self._required and self._forbidden else '', f' with no {self._forbidden}' if self._forbidden else '', ' component(s)' if self._required or self._forbidden else '', )
[docs] class EnsureParsedURL(EnsureURL): """Like `EnsureURL`, but returns a parsed URL""" def __call__(self, value: str) -> ParseResult: return self._validate_parsed(value)