"""Collection of fixtures for facilitation test implementations
"""
import getpass
import logging
import os
from pathlib import Path
import subprocess
import pytest
from tempfile import NamedTemporaryFile
from time import sleep
from urllib.request import urlopen
from datalad_next.datasets import Dataset
from datalad_next.runners import (
call_git_lines,
call_git_success,
)
from datalad_next.utils import patched_env
from .utils import (
HTTPPath,
WebDAVPath,
assert_ssh_access,
external_versions,
get_git_config_global_fpath,
md5sum,
rmtree,
)
lgr = logging.getLogger('datalad.next.tests.fixtures')
[docs]
@pytest.fixture(autouse=True, scope="session")
def reduce_logging():
"""Reduce the logging output during test runs
DataLad emits a large amount of repetitive INFO log messages that only
clutter the test output, and hardly ever help to identify an issue.
This fixture modifies the standard logger to throw away all INFO level
log messages.
With this approach, such messages are still fed to and processes by the
logger (in contrast to an apriori level setting).
"""
dllgr = logging.getLogger('datalad')
# leave a trace that this is happening
dllgr.info("Test fixture starts suppressing INFO level messages")
class NoInfo(logging.Filter):
def filter(self, record):
# it seems unnecessary to special case progress logs, moreover
# not filtering them out will make clone/fetch/push very visible
# in the logs with trivial messages
#if hasattr(record, 'dlm_progress'):
# # this is a progress log message that may trigger something
# # a test is looking for
# return True
if record.levelno == 20:
# this is a plain INFO message, ignore
return False
else:
return True
noinfo = NoInfo()
# we need to attach the filter to any handler to make it effective.
# adding to the logger only will not effect any log messages produced
# via descendant loggers
for hdlr in dllgr.handlers:
hdlr.addFilter(noinfo)
[docs]
@pytest.fixture(autouse=False, scope="function")
def no_result_rendering(monkeypatch):
"""Disable datalad command result rendering for all command calls
This is achieved by forcefully supplying `result_renderer='disabled'`
to any command call via a patch to internal argument normalizer
``get_allargs_as_kwargs()``.
"""
# we need to patch our patch function, because datalad-core's is no
# longer used
import datalad_next.patches.interface_utils as dnpiu
old_get_allargs_as_kwargs = dnpiu.get_allargs_as_kwargs
def no_render_get_allargs_as_kwargs(call, args, kwargs):
kwargs, one, two = old_get_allargs_as_kwargs(call, args, kwargs)
kwargs['result_renderer'] = 'disabled'
return kwargs, one, two
with monkeypatch.context() as m:
m.setattr(dnpiu,
'get_allargs_as_kwargs',
no_render_get_allargs_as_kwargs)
yield
[docs]
@pytest.fixture(autouse=False, scope="function")
def tmp_keyring():
"""Patch plaintext keyring to temporarily use a different storage
No credential read or write actions will impact any existing credential
store of any configured backend.
The patched backend is yielded by the fixture.
"""
import keyring
# the testsetup assumes this to be a plaintext backend.
# this backend is based on a filename and maintains no state.
# each operation opens, reads/writes, and then closes the file.
# hence we can simply point to a different file
backend = keyring.get_keyring()
prev_fpath = backend.file_path
# no tmp keyring yet, make one
with NamedTemporaryFile(
'r',
prefix='datalad_tmp_keyring_',
delete=True) as tf:
# we must close, because windows does not like the file being open
# already when ConfigManager would open it for reading
tf.close()
backend.file_path = tf.name
with patched_env(DATALAD_TESTS_TMP_KEYRING_PATH=tf.name):
yield backend
backend.file_path = prev_fpath
# the following is taken from datalad/conftest.py
# sadly, this is defined inline and cannot be reused directly
standard_gitconfig = """\
[user]
name = DataLad Tester
email = test@example.com
[core]
askPass =
[datalad "log"]
exc = 1
[datalad "extensions"]
# load the next extension to be able to test patches of annex remotes
# that run in subprocesses
load = next
[annex "security"]
# from annex 6.20180626 file:/// and http://localhost access isn't
# allowed by default
allowed-url-schemes = http https file
allowed-http-addresses = all
[protocol "file"]
# since git 2.38.1 cannot by default use local clones for submodules
# https://github.blog/2022-10-18-git-security-vulnerabilities-announced/#cve-2022-39253
allow = always
""" + os.environ.get('DATALAD_TESTS_GITCONFIG', '').replace('\\n', os.linesep)
[docs]
@pytest.fixture(autouse=False, scope="function")
def datalad_cfg():
"""Temporarily alter configuration to use a plain "global" configuration
The global configuration manager at `datalad.cfg` is reloaded after
adjusting `GIT_CONFIG_GLOBAL` to point to a new temporary `.gitconfig`
file.
After test execution the file is removed, and the global `ConfigManager`
is reloaded once more.
Any test using this fixture will be skipped for Git versions earlier
than 2.32, because the `GIT_CONFIG_GLOBAL` environment variable used
here was only introduced with that version.
"""
if external_versions['cmd:git'] < "2.32":
pytest.skip(
"Git configuration redirect via GIT_CONFIG_GLOBAL "
"only supported since Git v2.32"
)
from datalad import cfg
with NamedTemporaryFile(
'w',
prefix='datalad_gitcfg_global_',
delete=False) as tf:
tf.write(standard_gitconfig)
# we must close, because windows does not like the file being open
# already when ConfigManager would open it for reading
tf.close()
with patched_env(GIT_CONFIG_GLOBAL=tf.name):
cfg.reload(force=True)
yield cfg
# reload to put the previous config in effect again
cfg.reload(force=True)
[docs]
@pytest.fixture(autouse=True, scope="function")
def check_gitconfig_global():
"""No test must modify a user's global Git config.
If such modifications are needed, a custom configuration setup
limited to the scope of the test requiring it must be arranged.
"""
globalcfg_fname = get_git_config_global_fpath()
if not globalcfg_fname.exists():
lgr.warning(
'No global/user Git config file exists. This is an unexpected '
'test environment, no config modifications checks can be '
'performed. Proceeding nevertheless.')
# let the test run
yield
# and exit quietly
return
# we have a config file. hash it pre and post test. Fail is changed.
pre = md5sum(globalcfg_fname)
yield
post = md5sum(globalcfg_fname)
assert pre == post, \
"Global Git config modification detected. Test must be modified to use " \
"a temporary configuration target. Hint: use the `datalad_cfg` fixture."
[docs]
@pytest.fixture(autouse=True, scope="function")
def check_plaintext_keyring():
"""No test must modify a user's keyring.
If such modifications are needed, a custom keyring setup
limited to the scope of the test requiring it must be arranged.
The ``tmp_keyring`` fixture can be employed in such cases.
"""
# datalad-core configures keyring to use a plaintext backend
# we will look for the underlying file and verify that it is either
# no there, or remains unmodified
import keyring
kr = keyring.get_keyring()
if not hasattr(kr, 'file_path'):
# this is not the plain text keyring, nothing we can do here
# run as-is, but leave a message
lgr.warning('Running without the expected plain-text keyring')
yield
return
kr_fpath = Path(kr.file_path)
pre = md5sum(kr_fpath) if kr_fpath.exists() else ''
yield
post = md5sum(kr_fpath) if kr_fpath.exists() else ''
assert pre == post, \
"Keyring modification detected. Test must be modified to use " \
"a temporary keyring. Hint: use the `tmp_keyring` fixture."
[docs]
@pytest.fixture(autouse=False, scope="function")
def credman(datalad_cfg, tmp_keyring):
"""Provides a temporary credential manager
It comes with a temporary global datalad config and a temporary
keyring as well.
This manager can be used to deploy or manipulate credentials within the
scope of a single test.
"""
from datalad import cfg
from datalad_next.credman import CredentialManager
cm = CredentialManager(cfg)
yield cm
[docs]
@pytest.fixture(autouse=False, scope="function")
def dataset(datalad_cfg, tmp_path_factory):
"""Provides a ``Dataset`` instance for a not-yet-existing repository
The instance points to an existing temporary path, but ``create()``
has not been called on it yet.
"""
# must use the factory to get a unique path even when a concrete
# test also uses `tmp_path`
ds = Dataset(tmp_path_factory.mktemp("dataset"))
yield ds
[docs]
@pytest.fixture(autouse=False, scope="function")
def existing_dataset(dataset):
"""Provides a ``Dataset`` instance pointing to an existing dataset/repo
This fixture uses an instance provided by the ``dataset`` fixture and
calls ``create()`` on it, before it yields the ``Dataset`` instance.
"""
dataset.create(result_renderer='disabled')
yield dataset
[docs]
@pytest.fixture(autouse=False, scope="function")
def existing_noannex_dataset(dataset):
"""just like ``existing_dataset``, but created with ``annex=False``
"""
dataset.create(annex=False, result_renderer='disabled')
yield dataset
[docs]
@pytest.fixture(scope="session")
def modified_dataset(tmp_path_factory):
"""Produces a dataset with various modifications
The fixture is module-scope, aiming to be reused by many tests focused
on reporting. It does not support any further modification. The fixture
will fail, if any such modification is detected.
``git status`` will report::
❯ git status -uall
On branch dl-test-branch
Changes to be committed:
(use "git restore --staged <file>..." to unstage)
new file: dir_m/file_a
new file: file_a
Changes not staged for commit:
(use "git add/rm <file>..." to update what will be committed)
(use "git restore <file>..." to discard changes in working directory)
(commit or discard the untracked or modified content in submodules)
deleted: dir_d/file_d
deleted: dir_m/file_d
modified: dir_m/file_m
deleted: dir_sm/sm_d
modified: dir_sm/sm_m (modified content)
modified: dir_sm/sm_mu (modified content, untracked content)
modified: dir_sm/sm_n (new commits)
modified: dir_sm/sm_nm (new commits, modified content)
modified: dir_sm/sm_nmu (new commits, modified content, untracked content)
modified: dir_sm/sm_u (untracked content)
deleted: file_d
modified: file_m
Untracked files:
(use "git add <file>..." to include in what will be committed)
dir_m/dir_u/file_u
dir_m/file_u
dir_u/file_u
file_u
Suffix indicates the ought-to state (multiple possible):
a - added
c - clean
d - deleted
n - new commits
m - modified
u - untracked content
Prefix indicated the item type:
file - file
sm - submodule
dir - directory
"""
ds = Dataset(tmp_path_factory.mktemp("modified_dataset"))
ds.create(result_renderer='disabled')
ds_dir = ds.pathobj / 'dir_m'
ds_dir.mkdir()
ds_dir_d = ds.pathobj / 'dir_d'
ds_dir_d.mkdir()
(ds_dir / 'file_m').touch()
(ds.pathobj / 'file_m').touch()
dirsm = ds.pathobj / 'dir_sm'
dss = {}
for smname in (
'sm_d', 'sm_c', 'sm_n', 'sm_m', 'sm_nm', 'sm_u', 'sm_mu', 'sm_nmu',
'droppedsm_c',
):
sds = Dataset(dirsm / smname).create(result_renderer='disabled')
# for the plain modification, commit the reference right here
if smname in ('sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
(sds.pathobj / 'file_m').touch()
sds.save(to_git=True, result_renderer='disabled')
dss[smname] = sds
# files in superdataset to be deleted
for d in (ds_dir_d, ds_dir, ds.pathobj):
(d / 'file_d').touch()
dss['.'] = ds
dss['dir'] = ds_dir
ds.save(to_git=True, result_renderer='disabled')
ds.drop(dirsm / 'droppedsm_c', what='datasets', reckless='availability',
result_renderer='disabled')
# a new commit
for smname in ('.', 'sm_n', 'sm_nm', 'sm_nmu'):
sds = dss[smname]
(sds.pathobj / 'file_c').touch()
sds.save(to_git=True, result_renderer='disabled')
# modified file
for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
obj = dss[smname]
pobj = obj.pathobj if isinstance(obj, Dataset) else obj
(pobj / 'file_m').write_text('modify!')
# untracked
for smname in ('.', 'dir', 'sm_u', 'sm_mu', 'sm_nmu'):
obj = dss[smname]
pobj = obj.pathobj if isinstance(obj, Dataset) else obj
(pobj / 'file_u').touch()
(pobj / 'dirempty_u').mkdir()
(pobj / 'dir_u').mkdir()
(pobj / 'dir_u' / 'file_u').touch()
# delete items
rmtree(dss['sm_d'].pathobj)
rmtree(ds_dir_d)
(ds_dir / 'file_d').unlink()
(ds.pathobj / 'file_d').unlink()
# added items
for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
obj = dss[smname]
pobj = obj.pathobj if isinstance(obj, Dataset) else obj
(pobj / 'file_a').write_text('added')
assert call_git_success(['add', 'file_a'], cwd=pobj)
# record git-status output as a reference
status_start = call_git_lines(['status'], cwd=ds.pathobj)
yield ds
# compare with initial git-status output, if there are any
# differences the assumptions of any consuming test could be
# invalidated. The modifying code must be found and fixed
assert status_start == call_git_lines(['status'], cwd=ds.pathobj), \
"Unexpected modification of the testbed"
[docs]
@pytest.fixture(autouse=False, scope="session")
def webdav_credential():
"""Provides HTTP Basic authentication credential necessary to access the
server provided by the ``webdav_server`` fixture."""
yield dict(
name='dltest-my&=webdav',
user='datalad',
secret='secure',
type='user_password',
)
[docs]
@pytest.fixture(autouse=False, scope="function")
def webdav_server(tmp_path_factory, webdav_credential):
"""Provides a WebDAV server, serving a temporary directory
The fixtures yields an instance of ``WebDAVPath``, providing the
following essential attributes:
- ``path``: ``Path`` instance of the served temporary directory
- ``url``: HTTP URL to access the WebDAV server
Server access requires HTTP Basic authentication with the credential
provided by the ``webdav_credential`` fixture.
"""
auth = (webdav_credential['user'], webdav_credential['secret'])
# must use the factory to get a unique path even when a concrete
# test also uses `tmp_path`
path = tmp_path_factory.mktemp("webdav")
# this looks a little awkward, but is done to avoid a change in
# WebDAVPath.
server = WebDAVPath(path, auth=auth)
with server as server_url:
server.url = server_url
yield server
[docs]
@pytest.fixture(autouse=False, scope="session")
def http_credential():
"""Provides the HTTP Basic authentication credential necessary to access the
HTTP server provided by the ``http_server_with_basicauth`` fixture."""
yield dict(
name='dltest-my&=http',
user='datalad',
secret='secure',
type='user_password',
)
[docs]
@pytest.fixture(autouse=False, scope="function")
def http_server(tmp_path_factory):
"""Provides an HTTP server, serving a temporary directory
The fixtures yields an instance of ``HTTPPath``, providing the
following essential attributes:
- ``path``: ``Path`` instance of the served temporary directory
- ``url``: HTTP URL to access the HTTP server
"""
# must use the factory to get a unique path even when a concrete
# test also uses `tmp_path`
path = tmp_path_factory.mktemp("webdav")
server = HTTPPath(path, use_ssl=False, auth=None)
with server:
# overwrite path with Path object for convenience
server.path = path
yield server
[docs]
@pytest.fixture(autouse=False, scope="function")
def http_server_with_basicauth(tmp_path_factory, http_credential):
"""Like ``http_server`` but requiring authentication via ``http_credential``
"""
path = tmp_path_factory.mktemp("webdav")
server = HTTPPath(
path, use_ssl=False,
auth=(http_credential['user'], http_credential['secret']),
)
with server:
# overwrite path with Path object for convenience
server.path = path
yield server
[docs]
@pytest.fixture(scope="session")
def httpbin_service():
"""Return canonical access URLs for the HTTPBIN service
This fixture tries to spin up a httpbin Docker container at localhost:8765;
if successful, it returns this URL as the 'standard' URL. If the attempt
fails, a URL pointing to the canonical instance is returned.
For tests that need to have the service served via a specific
protocol (https vs http), the corresponding URLs are returned
too. They always point to the canonical deployment, as some
tests require both protocols simultaneously and a local deployment
generally won't have https.
"""
hburl = 'http://httpbin.org'
hbsurl = 'https://httpbin.org'
ciurl = 'http://localhost:8765'
if os.name == "posix":
try:
r = subprocess.run(
["docker", "run", "-d", "-p", "127.0.0.1:8765:80", "kennethreitz/httpbin"],
check=True,
stdout=subprocess.PIPE,
text=True,
)
except (OSError, subprocess.CalledProcessError):
lgr.warning("Failed to spin up httpbin Docker container:", exc_info=True)
container_id = None
else:
container_id = r.stdout.strip()
else:
container_id = None
try:
if container_id is not None:
# Wait for container to fully start:
for _ in range(25):
try:
urlopen(ciurl)
except Exception:
sleep(1)
else:
break
else:
raise RuntimeError("httpbin container did not start up in time")
yield {
"standard": ciurl if container_id is not None else hbsurl,
"http": hburl,
"https": hbsurl,
}
finally:
if container_id is not None:
subprocess.run(["docker", "rm", "-f", container_id], check=True)
[docs]
@pytest.fixture(scope="function")
def httpbin(httpbin_service):
"""Does the same thing as ``httpbin_service``, but skips on function-scope
``httpbin_service`` always returns access URLs for HTTPBIN. However,
in some cases it is simply not desirable to run a test. For example,
the appveyor workers are more or less constantly unable to access the
public service. This fixture is evaluated at function-scope and
skips the test whenever any of these undesired conditions is
detected. Otherwise it just relays ``httpbin_service``.
"""
if os.environ.get('DATALAD_TESTS_NONETWORK'):
pytest.skip(
'Not running httpbin-based test: NONETWORK flag set'
)
if 'APPVEYOR' in os.environ and 'DEPLOY_HTTPBIN_IMAGE' not in os.environ:
pytest.skip(
"Not running httpbin-based test on appveyor without "
"docker-deployed instance -- too unreliable"
)
yield httpbin_service
[docs]
@pytest.fixture(autouse=False, scope="function")
def datalad_interactive_ui(monkeypatch):
"""Yields a UI replacement to query for operations and stage responses
No output will be written to STDOUT/ERR by this UI.
A standard usage pattern is to stage one or more responses, run the
to-be-tested code, and verify that the desired user interaction
took place::
> datalad_interactive_ui.staged_responses.append('skip')
> ...
> assert ... datalad_interactive_ui.log
"""
from datalad_next.uis import ui_switcher
from .utils import InteractiveTestUI
with monkeypatch.context() as m:
m.setattr(ui_switcher, '_ui', InteractiveTestUI())
yield ui_switcher.ui
[docs]
@pytest.fixture(autouse=False, scope="function")
def datalad_noninteractive_ui(monkeypatch):
"""Yields a UI replacement to query for operations
No output will be written to STDOUT/ERR by this UI.
A standard usage pattern is to run the to-be-tested code, and verify that
the desired user messaging took place::
> ...
> assert ... datalad_interactive_ui.log
"""
from datalad_next.uis import ui_switcher
from .utils import TestUI
with monkeypatch.context() as m:
m.setattr(ui_switcher, '_ui', TestUI())
yield ui_switcher.ui
[docs]
@pytest.fixture(autouse=False, scope="session")
def sshserver_setup(tmp_path_factory):
if not os.environ.get('DATALAD_TESTS_SSH'):
pytest.skip(
"set DATALAD_TESTS_SSH=1 to enable")
# query a bunch of recognized configuration environment variables,
# fill in the blanks, then check if the given configuration is working,
# and post the full configuration again as ENV vars, to be picked up by
# the function-scope `datalad_cfg`
tmp_root = str(tmp_path_factory.mktemp("sshroot"))
host = os.environ.get('DATALAD_TESTS_SERVER_SSH_HOST', 'localhost')
port = os.environ.get('DATALAD_TESTS_SERVER_SSH_PORT', '22')
login = os.environ.get(
'DATALAD_TESTS_SERVER_SSH_LOGIN',
getpass.getuser())
seckey = os.environ.get(
'DATALAD_TESTS_SERVER_SSH_SECKEY',
str(Path.home() / '.ssh' / 'id_rsa'))
path = os.environ.get('DATALAD_TESTS_SERVER_SSH_PATH', tmp_root)
# TODO this should not use `tmp_root` unconditionally, but only if
# the SSH_PATH is known to be the same. This might not be if SSH_PATH
# is explicitly configured and LOCALPATH is not -- which could be
# an indication that there is none
localpath = os.environ.get('DATALAD_TESTS_SERVER_LOCALPATH', tmp_root)
assert_ssh_access(host, port, login, seckey, path, localpath)
info = {}
# as far as we can tell, this is good, post effective config in ENV
for v, e in (
(host, 'HOST'),
# this is SSH_*, because elsewhere we also have other properties
# for other services
(port, 'SSH_PORT'),
(login, 'SSH_LOGIN'),
(seckey, 'SSH_SECKEY'),
(path, 'SSH_PATH'),
(localpath, 'LOCALPATH'),
):
os.environ[f"DATALAD_TESTS_SERVER_{e}"] = v
info[e] = v
yield info
[docs]
@pytest.fixture(autouse=False, scope="function")
def sshserver(sshserver_setup, datalad_cfg, monkeypatch):
# strip any leading / from the path, we add one, and
# only one below
sshserver_path = sshserver_setup['SSH_PATH'].lstrip('/')
baseurl = f"ssh://{sshserver_setup['SSH_LOGIN']}" \
f"@{sshserver_setup['HOST']}" \
f":{sshserver_setup['SSH_PORT']}" \
f"/{sshserver_path}"
with monkeypatch.context() as m:
m.setenv("DATALAD_SSH_IDENTITYFILE", sshserver_setup['SSH_SECKEY'])
# force reload the config manager, to ensure the private key setting
# makes it into the active config
datalad_cfg.reload(force=True)
yield baseurl, Path(sshserver_setup['LOCALPATH'])