Source code for datalad_next.tests.fixtures

"""Collection of fixtures for facilitation test implementations
"""
import getpass
import logging
import os
from pathlib import Path
import subprocess
import pytest
from tempfile import NamedTemporaryFile
from time import sleep
from urllib.request import urlopen

from datalad_next.datasets import Dataset
from datalad_next.runners import (
    call_git_lines,
    call_git_success,
)
from datalad_next.utils import patched_env
from .utils import (
    HTTPPath,
    WebDAVPath,
    assert_ssh_access,
    external_versions,
    get_git_config_global_fpath,
    md5sum,
    rmtree,
)

lgr = logging.getLogger('datalad.next.tests.fixtures')



[docs]
@pytest.fixture(autouse=True, scope="session")
def reduce_logging():
    """Reduce the logging output during test runs

    DataLad emits a large amount of repetitive INFO log messages that only
    clutter the test output, and hardly ever help to identify an issue.
    This fixture modifies the standard logger to throw away all INFO level
    log messages.

    With this approach, such messages are still fed to and processes by the
    logger (in contrast to an apriori level setting).
    """
    dllgr = logging.getLogger('datalad')
    # leave a trace that this is happening
    dllgr.info("Test fixture starts suppressing INFO level messages")

    class NoInfo(logging.Filter):
        def filter(self, record):
            # it seems unnecessary to special case progress logs, moreover
            # not filtering them out will make clone/fetch/push very visible
            # in the logs with trivial messages
            #if hasattr(record, 'dlm_progress'):
            #    # this is a progress log message that may trigger something
            #    # a test is looking for
            #    return True
            if record.levelno == 20:
                # this is a plain INFO message, ignore
                return False
            else:
                return True

    noinfo = NoInfo()
    # we need to attach the filter to any handler to make it effective.
    # adding to the logger only will not effect any log messages produced
    # via descendant loggers
    for hdlr in dllgr.handlers:
        hdlr.addFilter(noinfo)




[docs]
@pytest.fixture(autouse=False, scope="function")
def no_result_rendering(monkeypatch):
    """Disable datalad command result rendering for all command calls

    This is achieved by forcefully supplying `result_renderer='disabled'`
    to any command call via a patch to internal argument normalizer
    ``get_allargs_as_kwargs()``.
    """
    # we need to patch our patch function, because datalad-core's is no
    # longer used
    import datalad_next.patches.interface_utils as dnpiu

    old_get_allargs_as_kwargs = dnpiu.get_allargs_as_kwargs

    def no_render_get_allargs_as_kwargs(call, args, kwargs):
        kwargs, one, two = old_get_allargs_as_kwargs(call, args, kwargs)
        kwargs['result_renderer'] = 'disabled'
        return kwargs, one, two

    with monkeypatch.context() as m:
        m.setattr(dnpiu,
                  'get_allargs_as_kwargs',
                  no_render_get_allargs_as_kwargs)
        yield




[docs]
@pytest.fixture(autouse=False, scope="function")
def tmp_keyring():
    """Patch plaintext keyring to temporarily use a different storage

    No credential read or write actions will impact any existing credential
    store of any configured backend.

    The patched backend is yielded by the fixture.
    """
    import keyring

    # the testsetup assumes this to be a plaintext backend.
    # this backend is based on a filename and maintains no state.
    # each operation opens, reads/writes, and then closes the file.
    # hence we can simply point to a different file
    backend = keyring.get_keyring()
    prev_fpath = backend.file_path

    # no tmp keyring yet, make one
    with NamedTemporaryFile(
            'r',
            prefix='datalad_tmp_keyring_',
            delete=True) as tf:
        # we must close, because windows does not like the file being open
        # already when ConfigManager would open it for reading
        tf.close()
        backend.file_path = tf.name
        with patched_env(DATALAD_TESTS_TMP_KEYRING_PATH=tf.name):
            yield backend

    backend.file_path = prev_fpath



# the following is taken from datalad/conftest.py
# sadly, this is defined inline and cannot be reused directly
standard_gitconfig = """\
[user]
    name = DataLad Tester
    email = test@example.com
[core]
    askPass =
[datalad "log"]
    exc = 1
[datalad "extensions"]
    # load the next extension to be able to test patches of annex remotes
    # that run in subprocesses
    load = next
[annex "security"]
    # from annex 6.20180626 file:/// and http://localhost access isn't
    # allowed by default
    allowed-url-schemes = http https file
    allowed-http-addresses = all
[protocol "file"]
    # since git 2.38.1 cannot by default use local clones for submodules
    # https://github.blog/2022-10-18-git-security-vulnerabilities-announced/#cve-2022-39253
    allow = always
""" + os.environ.get('DATALAD_TESTS_GITCONFIG', '').replace('\\n', os.linesep)



[docs]
@pytest.fixture(autouse=False, scope="function")
def datalad_cfg():
    """Temporarily alter configuration to use a plain "global" configuration

    The global configuration manager at `datalad.cfg` is reloaded after
    adjusting `GIT_CONFIG_GLOBAL` to point to a new temporary `.gitconfig`
    file.

    After test execution the file is removed, and the global `ConfigManager`
    is reloaded once more.

    Any test using this fixture will be skipped for Git versions earlier
    than 2.32, because the `GIT_CONFIG_GLOBAL` environment variable used
    here was only introduced with that version.
    """
    if external_versions['cmd:git'] < "2.32":
        pytest.skip(
            "Git configuration redirect via GIT_CONFIG_GLOBAL "
            "only supported since Git v2.32"
        )
    from datalad import cfg
    with NamedTemporaryFile(
            'w',
            prefix='datalad_gitcfg_global_',
            delete=False) as tf:
        tf.write(standard_gitconfig)
        # we must close, because windows does not like the file being open
        # already when ConfigManager would open it for reading
        tf.close()
        with patched_env(GIT_CONFIG_GLOBAL=tf.name):
            cfg.reload(force=True)
            yield cfg
    # reload to put the previous config in effect again
    cfg.reload(force=True)




[docs]
@pytest.fixture(autouse=True, scope="function")
def check_gitconfig_global():
    """No test must modify a user's global Git config.

    If such modifications are needed, a custom configuration setup
    limited to the scope of the test requiring it must be arranged.
    """
    globalcfg_fname = get_git_config_global_fpath()
    if not globalcfg_fname.exists():
        lgr.warning(
            'No global/user Git config file exists. This is an unexpected '
            'test environment, no config modifications checks can be '
            'performed. Proceeding nevertheless.')
        # let the test run
        yield
        # and exit quietly
        return

    # we have a config file. hash it pre and post test. Fail is changed.
    pre = md5sum(globalcfg_fname)
    yield
    post = md5sum(globalcfg_fname)
    assert pre == post, \
        "Global Git config modification detected. Test must be modified to use " \
        "a temporary configuration target. Hint: use the `datalad_cfg` fixture."




[docs]
@pytest.fixture(autouse=True, scope="function")
def check_plaintext_keyring():
    """No test must modify a user's keyring.

    If such modifications are needed, a custom keyring setup
    limited to the scope of the test requiring it must be arranged.
    The ``tmp_keyring`` fixture can be employed in such cases.
    """
    # datalad-core configures keyring to use a plaintext backend
    # we will look for the underlying file and verify that it is either
    # no there, or remains unmodified
    import keyring
    kr = keyring.get_keyring()
    if not hasattr(kr, 'file_path'):
        # this is not the plain text keyring, nothing we can do here
        # run as-is, but leave a message
        lgr.warning('Running without the expected plain-text keyring')
        yield
        return

    kr_fpath = Path(kr.file_path)
    pre = md5sum(kr_fpath) if kr_fpath.exists() else ''
    yield
    post = md5sum(kr_fpath) if kr_fpath.exists() else ''
    assert pre == post, \
        "Keyring modification detected. Test must be modified to use " \
        "a temporary keyring. Hint: use the `tmp_keyring` fixture."




[docs]
@pytest.fixture(autouse=False, scope="function")
def credman(datalad_cfg, tmp_keyring):
    """Provides a temporary credential manager

    It comes with a temporary global datalad config and a temporary
    keyring as well.

    This manager can be used to deploy or manipulate credentials within the
    scope of a single test.
    """
    from datalad import cfg
    from datalad_next.credman import CredentialManager
    cm = CredentialManager(cfg)
    yield cm




[docs]
@pytest.fixture(autouse=False, scope="function")
def dataset(datalad_cfg, tmp_path_factory):
    """Provides a ``Dataset`` instance for a not-yet-existing repository

    The instance points to an existing temporary path, but ``create()``
    has not been called on it yet.
    """
    # must use the factory to get a unique path even when a concrete
    # test also uses `tmp_path`
    ds = Dataset(tmp_path_factory.mktemp("dataset"))
    yield ds




[docs]
@pytest.fixture(autouse=False, scope="function")
def existing_dataset(dataset):
    """Provides a ``Dataset`` instance pointing to an existing dataset/repo

    This fixture uses an instance provided by the ``dataset`` fixture and
    calls ``create()`` on it, before it yields the ``Dataset`` instance.
    """
    dataset.create(result_renderer='disabled')
    yield dataset




[docs]
@pytest.fixture(autouse=False, scope="function")
def existing_noannex_dataset(dataset):
    """just like ``existing_dataset``, but created with ``annex=False``
    """
    dataset.create(annex=False, result_renderer='disabled')
    yield dataset




[docs]
@pytest.fixture(scope="session")
def modified_dataset(tmp_path_factory):
    """Produces a dataset with various modifications

    The fixture is module-scope, aiming to be reused by many tests focused
    on reporting. It does not support any further modification. The fixture
    will fail, if any such modification is detected.

    ``git status`` will report::

        ❯ git status -uall
        On branch dl-test-branch
        Changes to be committed:
          (use "git restore --staged <file>..." to unstage)
                new file:   dir_m/file_a
                new file:   file_a

        Changes not staged for commit:
          (use "git add/rm <file>..." to update what will be committed)
          (use "git restore <file>..." to discard changes in working directory)
          (commit or discard the untracked or modified content in submodules)
                deleted:    dir_d/file_d
                deleted:    dir_m/file_d
                modified:   dir_m/file_m
                deleted:    dir_sm/sm_d
                modified:   dir_sm/sm_m (modified content)
                modified:   dir_sm/sm_mu (modified content, untracked content)
                modified:   dir_sm/sm_n (new commits)
                modified:   dir_sm/sm_nm (new commits, modified content)
                modified:   dir_sm/sm_nmu (new commits, modified content, untracked content)
                modified:   dir_sm/sm_u (untracked content)
                deleted:    file_d
                modified:   file_m

        Untracked files:
          (use "git add <file>..." to include in what will be committed)
                dir_m/dir_u/file_u
                dir_m/file_u
                dir_u/file_u
                file_u

    Suffix indicates the ought-to state (multiple possible):

    a - added
    c - clean
    d - deleted
    n - new commits
    m - modified
    u - untracked content

    Prefix indicated the item type:

    file - file
    sm - submodule
    dir - directory
    """
    ds = Dataset(tmp_path_factory.mktemp("modified_dataset"))
    ds.create(result_renderer='disabled')
    ds_dir = ds.pathobj / 'dir_m'
    ds_dir.mkdir()
    ds_dir_d = ds.pathobj / 'dir_d'
    ds_dir_d.mkdir()
    (ds_dir / 'file_m').touch()
    (ds.pathobj / 'file_m').touch()
    dirsm = ds.pathobj / 'dir_sm'
    dss = {}
    for smname in (
        'sm_d', 'sm_c', 'sm_n', 'sm_m', 'sm_nm', 'sm_u', 'sm_mu', 'sm_nmu',
        'droppedsm_c',
    ):
        sds = Dataset(dirsm / smname).create(result_renderer='disabled')
        # for the plain modification, commit the reference right here
        if smname in ('sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
            (sds.pathobj / 'file_m').touch()
        sds.save(to_git=True, result_renderer='disabled')
        dss[smname] = sds
    # files in superdataset to be deleted
    for d in (ds_dir_d, ds_dir, ds.pathobj):
        (d / 'file_d').touch()
    dss['.'] = ds
    dss['dir'] = ds_dir
    ds.save(to_git=True, result_renderer='disabled')
    ds.drop(dirsm / 'droppedsm_c', what='datasets', reckless='availability',
            result_renderer='disabled')
    # a new commit
    for smname in ('.', 'sm_n', 'sm_nm', 'sm_nmu'):
        sds = dss[smname]
        (sds.pathobj / 'file_c').touch()
        sds.save(to_git=True, result_renderer='disabled')
    # modified file
    for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
        obj = dss[smname]
        pobj = obj.pathobj if isinstance(obj, Dataset) else obj
        (pobj / 'file_m').write_text('modify!')
    # untracked
    for smname in ('.', 'dir', 'sm_u', 'sm_mu', 'sm_nmu'):
        obj = dss[smname]
        pobj = obj.pathobj if isinstance(obj, Dataset) else obj
        (pobj / 'file_u').touch()
        (pobj / 'dirempty_u').mkdir()
        (pobj / 'dir_u').mkdir()
        (pobj / 'dir_u' / 'file_u').touch()
    # delete items
    rmtree(dss['sm_d'].pathobj)
    rmtree(ds_dir_d)
    (ds_dir / 'file_d').unlink()
    (ds.pathobj / 'file_d').unlink()
    # added items
    for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
        obj = dss[smname]
        pobj = obj.pathobj if isinstance(obj, Dataset) else obj
        (pobj / 'file_a').write_text('added')
        assert call_git_success(['add', 'file_a'], cwd=pobj)

    # record git-status output as a reference
    status_start = call_git_lines(['status'], cwd=ds.pathobj)
    yield ds
    # compare with initial git-status output, if there are any
    # differences the assumptions of any consuming test could be
    # invalidated. The modifying code must be found and fixed
    assert status_start == call_git_lines(['status'], cwd=ds.pathobj), \
        "Unexpected modification of the testbed"




[docs]
@pytest.fixture(autouse=False, scope="session")
def webdav_credential():
    """Provides HTTP Basic authentication credential necessary to access the
    server provided by the ``webdav_server`` fixture."""
    yield dict(
        name='dltest-my&=webdav',
        user='datalad',
        secret='secure',
        type='user_password',
    )




[docs]
@pytest.fixture(autouse=False, scope="function")
def webdav_server(tmp_path_factory, webdav_credential):
    """Provides a WebDAV server, serving a temporary directory

    The fixtures yields an instance of ``WebDAVPath``, providing the
    following essential attributes:

    - ``path``: ``Path`` instance of the served temporary directory
    - ``url``: HTTP URL to access the WebDAV server

    Server access requires HTTP Basic authentication with the credential
    provided by the ``webdav_credential`` fixture.
    """
    auth = (webdav_credential['user'], webdav_credential['secret'])
    # must use the factory to get a unique path even when a concrete
    # test also uses `tmp_path`
    path = tmp_path_factory.mktemp("webdav")
    # this looks a little awkward, but is done to avoid a change in
    # WebDAVPath.
    server = WebDAVPath(path, auth=auth)
    with server as server_url:
        server.url = server_url
        yield server




[docs]
@pytest.fixture(autouse=False, scope="session")
def http_credential():
    """Provides the HTTP Basic authentication credential necessary to access the
    HTTP server provided by the ``http_server_with_basicauth`` fixture."""
    yield dict(
        name='dltest-my&=http',
        user='datalad',
        secret='secure',
        type='user_password',
    )




[docs]
@pytest.fixture(autouse=False, scope="function")
def http_server(tmp_path_factory):
    """Provides an HTTP server, serving a temporary directory

    The fixtures yields an instance of ``HTTPPath``, providing the
    following essential attributes:

    - ``path``: ``Path`` instance of the served temporary directory
    - ``url``: HTTP URL to access the HTTP server
    """
    # must use the factory to get a unique path even when a concrete
    # test also uses `tmp_path`
    path = tmp_path_factory.mktemp("webdav")
    server = HTTPPath(path, use_ssl=False, auth=None)
    with server:
        # overwrite path with Path object for convenience
        server.path = path
        yield server




[docs]
@pytest.fixture(autouse=False, scope="function")
def http_server_with_basicauth(tmp_path_factory, http_credential):
    """Like ``http_server`` but requiring authentication via ``http_credential``
    """
    path = tmp_path_factory.mktemp("webdav")
    server = HTTPPath(
        path, use_ssl=False,
        auth=(http_credential['user'], http_credential['secret']),
    )
    with server:
        # overwrite path with Path object for convenience
        server.path = path
        yield server




[docs]
@pytest.fixture(scope="session")
def httpbin_service():
    """Return canonical access URLs for the HTTPBIN service

    This fixture tries to spin up a httpbin Docker container at localhost:8765;
    if successful, it returns this URL as the 'standard' URL.  If the attempt
    fails, a URL pointing to the canonical instance is returned.

    For tests that need to have the service served via a specific
    protocol (https vs http), the corresponding URLs are returned
    too. They always point to the canonical deployment, as some
    tests require both protocols simultaneously and a local deployment
    generally won't have https.
    """
    hburl = 'http://httpbin.org'
    hbsurl = 'https://httpbin.org'
    ciurl = 'http://localhost:8765'
    if os.name == "posix":
        try:
            r = subprocess.run(
                ["docker", "run", "-d", "-p", "127.0.0.1:8765:80", "kennethreitz/httpbin"],
                check=True,
                stdout=subprocess.PIPE,
                text=True,
            )
        except (OSError, subprocess.CalledProcessError):
            lgr.warning("Failed to spin up httpbin Docker container:", exc_info=True)
            container_id = None
        else:
            container_id = r.stdout.strip()
    else:
        container_id = None
    try:
        if container_id is not None:
            # Wait for container to fully start:
            for _ in range(25):
                try:
                    urlopen(ciurl)
                except Exception:
                    sleep(1)
                else:
                    break
            else:
                raise RuntimeError("httpbin container did not start up in time")
        yield {
            "standard": ciurl if container_id is not None else hbsurl,
            "http": hburl,
            "https": hbsurl,
        }
    finally:
        if container_id is not None:
            subprocess.run(["docker", "rm", "-f", container_id], check=True)




[docs]
@pytest.fixture(scope="function")
def httpbin(httpbin_service):
    """Does the same thing as ``httpbin_service``, but skips on function-scope

    ``httpbin_service`` always returns access URLs for HTTPBIN. However,
    in some cases it is simply not desirable to run a test. For example,
    the appveyor workers are more or less constantly unable to access the
    public service. This fixture is evaluated at function-scope and
    skips the test whenever any of these undesired conditions is
    detected. Otherwise it just relays ``httpbin_service``.
    """
    if os.environ.get('DATALAD_TESTS_NONETWORK'):
        pytest.skip(
            'Not running httpbin-based test: NONETWORK flag set'
        )
    if 'APPVEYOR' in os.environ and 'DEPLOY_HTTPBIN_IMAGE' not in os.environ:
        pytest.skip(
            "Not running httpbin-based test on appveyor without "
            "docker-deployed instance -- too unreliable"
        )
    yield httpbin_service




[docs]
@pytest.fixture(autouse=False, scope="function")
def datalad_interactive_ui(monkeypatch):
    """Yields a UI replacement to query for operations and stage responses

    No output will be written to STDOUT/ERR by this UI.

    A standard usage pattern is to stage one or more responses, run the
    to-be-tested code, and verify that the desired user interaction
    took place::

       > datalad_interactive_ui.staged_responses.append('skip')
       > ...
       > assert ... datalad_interactive_ui.log
    """
    from datalad_next.uis import ui_switcher
    from .utils import InteractiveTestUI

    with monkeypatch.context() as m:
        m.setattr(ui_switcher, '_ui', InteractiveTestUI())
        yield ui_switcher.ui




[docs]
@pytest.fixture(autouse=False, scope="function")
def datalad_noninteractive_ui(monkeypatch):
    """Yields a UI replacement to query for operations

    No output will be written to STDOUT/ERR by this UI.

    A standard usage pattern is to run the to-be-tested code, and verify that
    the desired user messaging took place::

       > ...
       > assert ... datalad_interactive_ui.log
    """
    from datalad_next.uis import ui_switcher
    from .utils import TestUI

    with monkeypatch.context() as m:
        m.setattr(ui_switcher, '_ui', TestUI())
        yield ui_switcher.ui




[docs]
@pytest.fixture(autouse=False, scope="session")
def sshserver_setup(tmp_path_factory):
    if not os.environ.get('DATALAD_TESTS_SSH'):
        pytest.skip(
            "set DATALAD_TESTS_SSH=1 to enable")

    # query a bunch of recognized configuration environment variables,
    # fill in the blanks, then check if the given configuration is working,
    # and post the full configuration again as ENV vars, to be picked up by
    # the function-scope `datalad_cfg`
    tmp_root = str(tmp_path_factory.mktemp("sshroot"))
    host = os.environ.get('DATALAD_TESTS_SERVER_SSH_HOST', 'localhost')
    port = os.environ.get('DATALAD_TESTS_SERVER_SSH_PORT', '22')
    login = os.environ.get(
        'DATALAD_TESTS_SERVER_SSH_LOGIN',
        getpass.getuser())
    seckey = os.environ.get(
        'DATALAD_TESTS_SERVER_SSH_SECKEY',
        str(Path.home() / '.ssh' / 'id_rsa'))
    path = os.environ.get('DATALAD_TESTS_SERVER_SSH_PATH', tmp_root)
    # TODO this should not use `tmp_root` unconditionally, but only if
    # the SSH_PATH is known to be the same. This might not be if SSH_PATH
    # is explicitly configured and LOCALPATH is not -- which could be
    # an indication that there is none
    localpath = os.environ.get('DATALAD_TESTS_SERVER_LOCALPATH', tmp_root)

    assert_ssh_access(host, port, login, seckey, path, localpath)

    info = {}
    # as far as we can tell, this is good, post effective config in ENV
    for v, e in (
            (host, 'HOST'),
            # this is SSH_*, because elsewhere we also have other properties
            # for other services
            (port, 'SSH_PORT'),
            (login, 'SSH_LOGIN'),
            (seckey, 'SSH_SECKEY'),
            (path, 'SSH_PATH'),
            (localpath, 'LOCALPATH'),
    ):
        os.environ[f"DATALAD_TESTS_SERVER_{e}"] = v
        info[e] = v

    yield info




[docs]
@pytest.fixture(autouse=False, scope="function")
def sshserver(sshserver_setup, datalad_cfg, monkeypatch):
    # strip any leading / from the path, we add one, and
    # only one below
    sshserver_path = sshserver_setup['SSH_PATH'].lstrip('/')
    baseurl = f"ssh://{sshserver_setup['SSH_LOGIN']}" \
        f"@{sshserver_setup['HOST']}" \
        f":{sshserver_setup['SSH_PORT']}" \
        f"/{sshserver_path}"
    with monkeypatch.context() as m:
        m.setenv("DATALAD_SSH_IDENTITYFILE", sshserver_setup['SSH_SECKEY'])
        # force reload the config manager, to ensure the private key setting
        # makes it into the active config
        datalad_cfg.reload(force=True)
        yield baseurl, Path(sshserver_setup['LOCALPATH'])