"""Collection of fixtures for facilitating test implementations
.. currentmodule:: datalad_core.tests.fixtures
.. autosummary::
:toctree: generated
annexrepo
baregitrepo
bareannexrepo
cfgman
gitrepo
http_fileserver
modify_dataset
progress_handler
skip_when_symlinks_not_supported
symlinks_supported
test_datasets
verify_pristine_gitconfig_global
"""
from __future__ import annotations
__all__ = [
'annexrepo',
'bareannexrepo',
'baregitrepo',
'cfgman',
'gitrepo',
'http_fileserver',
'modify_dataset',
'progress_handler',
'skip_when_symlinks_not_supported',
'symlinks_supported',
'test_datasets',
'verify_pristine_gitconfig_global',
]
import base64
import contextlib
import os
import socket
import threading
from dataclasses import dataclass
from http.server import (
SimpleHTTPRequestHandler,
ThreadingHTTPServer,
)
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import (
TYPE_CHECKING,
)
import pytest
if TYPE_CHECKING:
from collections.abc import (
Generator,
Iterator,
)
from datalad_core.config import get_manager
from datalad_core.git_utils import (
apply_changeset,
)
from datalad_core.git_utils import (
types as gt,
)
from datalad_core.repo import Repo
from datalad_core.runners import (
CommandError,
call_git,
call_git_lines,
call_git_oneline,
)
from datalad_core.tests import AuditProgressHandler
from datalad_core.tests.utils import (
call_git_commit,
create_populated_dataset_annex_wt,
modify_dataset,
patched_env,
)
magic_marker = 'c4d0de12-8008-11ef-86ea-3776083add61'
standard_gitconfig = f"""\
[datalad "magic"]
test-marker = {magic_marker}
[user]
name = DataLad Tester
email = test@example.com
[extensions]
worktreeConfig = false
[annex "security"]
# from annex 6.20180626 file:/// and http://localhost access isn't
# allowed by default
allowed-url-schemes = http https
allowed-http-addresses = all
allowed-ip-addresses = all
"""
git_name = 'Datalad Tester'
git_email = 'test@example.com'
# this is different from the base configuration above.
# the `cfgman` fixture is function-scope, hence it cannot
# be used for higher-scope fixtures. This environment
# is for those higher-level fixtures that must make commits.
test_committer_env = {
'GIT_AUTHOR_NAME': git_name,
'GIT_AUTHOR_EMAIL': git_email,
'GIT_COMMITTER_NAME': git_name,
'GIT_COMMITTER_EMAIL': git_email,
'GIT_CONFIG_COUNT': '1',
'GIT_CONFIG_KEY_0': 'init.defaultbranch',
'GIT_CONFIG_VALUE_0': 'unusualdefault',
}
@dataclass(kw_only=True)
class HttpFileServerInstance:
url: str
path: Path
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def cfgman(monkeypatch):
"""Yield a configuration manager with a test-specific global scope
Any test using this fixture will be skipped for Git versions earlier
than 2.32, because the `GIT_CONFIG_GLOBAL` environment variable used
here was only introduced with that version.
"""
manager = get_manager()
ggc = manager.sources['git-global']
with TemporaryDirectory(
prefix='datalad_gitcfg_global_',
) as tmpdir:
cfgpath = Path(tmpdir) / 'gitconfig'
cfgpath.write_text(standard_gitconfig)
with monkeypatch.context() as m:
m.setenv('GIT_CONFIG_GLOBAL', str(cfgpath))
ggc = manager.sources['git-global']
ggc.reinit()
ggc.load()
if (
ggc['datalad.magic.test-marker'].pristine_value != magic_marker
): # pragma: no cover
pytest.skip(
'Cannot establish isolated global Git config scope '
'(possibly Git too old (needs v2.32)'
)
yield manager
# reload to put the previous config in effect again
ggc.reinit()
ggc.load()
[docs]
@pytest.fixture(autouse=True, scope='function') # noqa: PT003
def verify_pristine_gitconfig_global():
"""No test must modify a user's global Git config.
If such modifications are needed, a custom configuration setup
limited to the scope of the test requiring it must be arranged.
"""
from datalad_core.config import GlobalGitConfig # noqa PLC0415
def get_ggc_state():
ggc = GlobalGitConfig()
return {k: ggc[k].pristine_value for k in ggc}
pre = get_ggc_state()
yield
if pre != get_ggc_state(): # pragma: no cover
# this is hard to test, because we are inside an autoused fixture.
# FWIW: this has been tested manually when it was implemented
# originally
msg = (
'Global Git config modification detected. '
'Test must be modified to use a temporary configuration target. '
'Hint: use the `isolated_global_cfg` fixture.'
)
raise AssertionError(msg)
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def gitrepo(tmp_path_factory) -> Generator[Path]:
"""Yield the path to an initialized Git repository"""
# must use the factory to get a unique path even when a concrete
# test also uses `tmp_path`
path = tmp_path_factory.mktemp('gitrepo')
call_git(
['init'],
cwd=path,
capture_output=True,
)
return path
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def baregitrepo(tmp_path_factory) -> Generator[Path]:
"""Yield the path to an initialized, bare Git repository"""
# must use the factory to get a unique path even when a concrete
# test also uses `tmp_path`
path = tmp_path_factory.mktemp('gitrepo')
call_git(
['init', '--bare'],
cwd=path,
capture_output=True,
)
return path
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def bareannexrepo(baregitrepo) -> Generator[Path]:
"""Yield the path to a bare Git repository with an initialized annex"""
call_git(
['annex', 'init'],
cwd=baregitrepo,
capture_output=True,
)
return baregitrepo
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def annexrepo(gitrepo) -> Generator[Path]:
"""Yield the path to a Git repository with an initialized annex"""
call_git(
['annex', 'init'],
cwd=gitrepo,
capture_output=True,
)
return gitrepo
[docs]
@pytest.fixture(autouse=False, scope='session')
def symlinks_supported(tmp_path_factory) -> bool:
"""Returns whether Git is ready to use symlinks
This checks (on Windows) of `core.symlinks` is enabled,
and whether creating symlinks is supported in test directories.
"""
if os.name == 'nt':
# on windows, whether or not symlinks are technically possible
# Git needs to have support enabled too, or they won't be
# used by Git
try:
setting = (
call_git(
['config', '--global', 'core.symlinks'],
capture_output=True,
).strip() # type: ignore
!= 'true'
)
except CommandError:
return False
if setting != 'true':
return False
testdir = tmp_path_factory.mktemp('symlink_check')
target = testdir / 'target'
source = testdir / 'source'
try:
target.touch()
source.symlink_to(target)
except Exception: # noqa: BLE001
return False
return True
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def skip_when_symlinks_not_supported(symlinks_supported):
if not symlinks_supported:
msg = 'skipped, symlinks are not supported in the test directory'
raise pytest.skip(msg)
@pytest.fixture(scope='session')
def modified_dataset(tmp_path_factory):
"""Produces a dataset with various modifications
The fixture is module-scope, aiming to be reused by many tests focused
on reporting. It does not support any further modification. The fixture
will fail, if any such modification is detected. Use the helper
``modify_dataset()`` to apply these modification to an existing repository
to circumwent these restriction.
``git status`` will report::
> git status -uall
On branch dl-test-branch
Changes to be committed:
(use "git restore --staged <file>..." to unstage)
new file: dir_m/file_a
new file: file_a
new file: file_am
Changes not staged for commit:
(use "git add/rm <file>..." to update what will be committed)
(use "git restore <file>..." to discard changes in working directory)
(commit or discard the untracked or modified content in submodules)
deleted: dir_d/file_d
deleted: dir_m/file_d
modified: dir_m/file_m
deleted: dir_sm/sm_d
modified: dir_sm/sm_m (modified content)
modified: dir_sm/sm_mu (modified content, untracked content)
modified: dir_sm/sm_n (new commits)
modified: dir_sm/sm_nm (new commits, modified content)
modified: dir_sm/sm_nmu (new commits, modified content, untracked content)
modified: dir_sm/sm_u (untracked content)
modified: file_am
deleted: file_d
modified: file_m
Untracked files:
(use "git add <file>..." to include in what will be committed)
dir_m/dir_u/file_u
dir_m/file_u
dir_u/file_u
file_u
Suffix indicates the ought-to state (multiple possible):
a - added
c - clean
d - deleted
n - new commits
m - modified
u - untracked content
Prefix indicated the item type:
file - file
sm - submodule
dir - directory
"""
path = tmp_path_factory.mktemp('gitrepo')
call_git(
['init'],
cwd=path,
capture_output=True,
)
# we need a git identify to commit stuff. We cannot use `cfgman`,
# because that is a function-scope fixture
with patched_env(**test_committer_env):
# the returned status promise is the comparison reference
status_promise = modify_dataset(path).splitlines()
yield path
# compare with initial git-status output, if there are any
# differences the assumptions of any consuming test could be
# invalidated. The modifying code must be found and fixed
if status_promise != call_git_lines(
['status', '-uall', '--porcelain=v1'], cwd=path
):
msg = 'Unexpected modification of the testbed'
raise AssertionError(msg)
[docs]
@pytest.fixture(scope='session')
def test_datasets(http_fileserver) -> Iterator[HttpFileServerInstance]:
"""
- ds1-wt: repo with annex and checkout, no submodules
- ds1: bare clone of ds1
- ds2: clone of ds1 with submodules (also ds1) added
- ds3: clone of ds2 with ds2 as a submodule added
"""
secret_file = http_fileserver.path / 'protected' / 'files' / 'secret.txt'
secret_file.parent.mkdir(parents=True)
secret_file.write_text('supersecret')
datasets = []
datasets.append(http_fileserver.path / 'ds1-wt')
# we need a git identify to commit stuff. We cannot use `cfgman`,
# because that is a function-scope fixture
with patched_env(**test_committer_env):
create_populated_dataset_annex_wt(datasets[-1])
call_git(
[
'-c',
'annex.security.allowed-ip-addresses=all',
'annex',
'addurl',
'--quiet',
'--relaxed',
'--raw',
'--fast',
'--file',
'secret.txt',
f'{http_fileserver.url}protected/files/secret.txt',
],
cwd=datasets[-1],
)
call_git_commit(datasets[-1], msg='add remote secret by URL')
def bareclone(src, dest):
call_git(['clone', '-q', '--bare', src, dest])
call_git(['annex', 'init', '--quiet'], cwd=dest)
# we clone from origin to not pull secret.txt via its
# URL
call_git(['annex', 'get', '--quiet', '-f', 'origin'], cwd=dest)
datasets.append(http_fileserver.path / 'ds1.git')
bareclone(datasets[-2], datasets[-1])
ds1_head = call_git_oneline(['rev-parse', 'HEAD'], cwd=datasets[-1])
if ds1_head is None:
msg = 'Cannot determine repo HEAD'
raise RuntimeError(msg)
datasets.append(http_fileserver.path / 'ds2.git')
bareclone(datasets[-2], datasets[-1])
ds2_head = apply_changeset(
Repo(datasets[-1]),
{
'subdir1/subm1': (gt.GitObjectMode.SUBMODULE, ds1_head),
'subm2': (gt.GitObjectMode.SUBMODULE, ds1_head),
'.gitmodules': f"""\
[submodule "subdir1/subm1"]
path = subdir1/subm1
url = {http_fileserver.url}/ds1.git
[submodule "subm2"]
path = subm2
url = {http_fileserver.url}/ds1.git
""",
},
message='register submodules',
)
if ds2_head is None:
msg = 'Cannot determine repo HEAD'
raise RuntimeError(msg)
datasets.append(http_fileserver.path / 'ds3.git')
bareclone(datasets[-2], datasets[-1])
apply_changeset(
Repo(datasets[-1]),
{
'subm3': (gt.GitObjectMode.SUBMODULE, ds2_head),
'.gitmodules': f'[submodule "subm3"]\n\tpath = subm3\n\turl = {http_fileserver.url}/ds2.git\n',
},
message='register submodule',
)
# finalize
ds_states = []
for ds in datasets:
# more efficient access from here on
call_git(['gc'], capture_output=True, cwd=ds)
call_git(['update-server-info'], cwd=ds)
ds_states.append(
(ds, call_git_oneline(['rev-parse', 'HEAD'], cwd=ds)),
)
yield http_fileserver
for ds, state in ds_states:
if state != call_git_oneline(['rev-parse', 'HEAD'], cwd=ds):
msg = 'Undesired modification of `test_datasets` fixture'
raise RuntimeError(msg)
# we do --branch HEAD to also function in bare repos
missing_keys = call_git(
['annex', 'find', '--not', '--in', 'here', '--branch', 'HEAD'],
capture_output=True,
cwd=ds,
)
if TYPE_CHECKING:
assert isinstance(missing_keys, bytes)
# secret.txt is served from a dedicated URL, need not be in the annex
missing_keys = missing_keys.replace(b'secret.txt', b'')
if missing_keys.strip():
msg = (
'Undesired missing annex key in `test_datasets` fixture: '
f'{ds}\n{missing_keys.decode()}'
)
raise RuntimeError(msg)
[docs]
@pytest.fixture(scope='session')
def http_fileserver(
tmp_path_factory,
host: str = '127.0.0.1',
port: int | None = None,
) -> Iterator[HttpFileServerInstance]:
"""Starts a ThreadingHTTPServer serving a temporary directory
Yields the base URL and served path.
"""
def _find_free_port(host=host):
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind((host, 0))
return s.getsockname()[1]
if port is None:
port = _find_free_port(host)
tmp_path = tmp_path_factory.mktemp('http_serve')
# create a password-proteced partition
protected_root = 'protected'
(tmp_path / protected_root).mkdir()
# a custom handler that server the TMP directory
class _ServeDirHandler(SimpleHTTPRequestHandler):
USERNAME = 'datalad'
PASSWORD = 'supersecret' # noqa: S105
# TODO: add a protected/ path that requires basic-auth
def __init__(self, *args):
super().__init__(*args, directory=str(tmp_path))
def log_message(*args):
# we do not want the noise of the file server log
# disable completely here. Maybe a use case for
# a debug switch as some point
pass
def is_protected(self) -> bool:
# protect the path and any subpaths, normalize to avoid
# trailing slashes issues
return self.path.startswith(f'/{protected_root}')
def send_unauthorized(self):
self.send_response(401)
self.send_header('WWW-Authenticate', 'Basic realm="Protected"')
self.send_header('Content-type', 'text/plain; charset=utf-8')
self.end_headers()
self.wfile.write(b'401 Unauthorized\n')
def _check_basic_auth(self, header_value: str) -> bool:
if not header_value:
return False
try:
scheme, cred = header_value.split(' ', 1)
if scheme.lower() != 'basic':
return False
decoded = base64.b64decode(cred).decode('utf-8')
user, pwd = decoded.split(':', 1)
return ( # noqa: TRY300
user == _ServeDirHandler.USERNAME
and pwd == _ServeDirHandler.PASSWORD
)
except Exception: # noqa: BLE001
return False
def do_GET(self):
if self.is_protected():
auth_header = self.headers.get('Authorization')
if not self._check_basic_auth(auth_header):
return self.send_unauthorized()
return super().do_GET()
def do_HEAD(self):
if self.is_protected():
auth_header = self.headers.get('Authorization')
if not self._check_basic_auth(auth_header):
return self.send_unauthorized()
return super().do_HEAD()
server = ThreadingHTTPServer((host, port), _ServeDirHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
base_url = f'http://{host}:{port}/'
try:
yield HttpFileServerInstance(url=base_url, path=tmp_path)
finally:
server.shutdown()
server.server_close()
thread.join(timeout=1)
[docs]
@pytest.fixture(autouse=False, scope='function') # noqa: PT003
def progress_handler():
"""Function-scope :class:`~datalad_core.tests.AuditProgressHandler` instance"""
return AuditProgressHandler()