Source code for datalad_core.runners.git

from __future__ import annotations

import logging
import os
import subprocess
from functools import wraps
from typing import (
    TYPE_CHECKING,
)

if TYPE_CHECKING:
    from collections.abc import Iterator, Mapping
    from pathlib import Path

from datasalad.itertools import (
    decode_bytes,
    itemize,
)

from datalad_core.runners.imports import (
    CommandError,
    iter_subproc,
)

lgr = logging.getLogger('datalad.runners')


def _call_git(
    args: list[str],
    *,
    capture_output: bool = False,
    cwd: Path | None = None,
    check: bool = False,
    env: Mapping[str, str] | None = None,
    text: bool | None = None,
    inputs: str | bytes | None = None,
    force_c_locale: bool = False,
) -> subprocess.CompletedProcess:
    """Wrapper around ``subprocess.run`` for calling Git command

    ``args`` is a list of argument for the Git command. This list must not
    contain the Git executable itself. It will be prepended (unconditionally)
    to the arguments before passing them on.

    If ``force_c_locale`` is ``True`` the environment of the Git process
    is altered to ensure output according to the C locale. This is useful
    when output has to be processed in a locale invariant fashion.

    All other argument are pass on to ``subprocess.run()`` verbatim.
    """
    if force_c_locale:
        env = dict(env or os.environ, LC_ALL='C')

    # make configurable
    git_executable = 'git'
    cmd = [git_executable, *args]
    try:
        return subprocess.run(
            cmd,
            capture_output=capture_output,
            cwd=cwd,
            check=check,
            text=text,
            input=inputs,
            env=env,
        )
    except subprocess.CalledProcessError as e:
        # TODO: we could support post-error forensics, but some client
        # might call this knowing that it could fail, and may not
        # appreciate the slow-down. Add option `expect_fail=False`?
        #
        # normalize exception to datalad-wide standard
        # TODO: CommandError.from_callprocesserror
        exc = CommandError(
            cmd=cmd,
            returncode=e.returncode,
            stdout=e.stdout,
            stderr=e.stderr,
            cwd=cwd,
        )
        raise exc from e



[docs]
def call_git(
    args: list[str],
    *,
    cwd: Path | None = None,
    env: Mapping[str, str] | None = None,
    force_c_locale: bool = False,
    inputs: str | bytes | None = None,
    text: bool | None = None,
    capture_output: bool = False,
) -> str | bytes | None:
    """Call Git with no output capture, raises on non-zero exit.

    If ``cwd`` is not None, the function changes the working directory to
    ``cwd`` before executing the command.

    If ``force_c_locale`` is ``True`` the environment of the Git process
    is altered to ensure output according to the C locale. This is useful
    when output has to be processed in a locale invariant fashion.

    If ``capture_output`` is ``True``, process output is captured (and not
    relayed to the parent process/terminal). This is necessary for reporting
    any error messaging via a ``CommandError`` exception.  By default process
    output is not captured.

    All other argument are pass on to ``subprocess.run()`` verbatim.

    If ``capture_output`` is enabled, the captured STDOUT is returned as
    ``str`` or ``bytes``, depending on the value of ``text``. Otherwise
    ``None`` is returned to indicate that no output was captured.
    """
    res = _call_git(
        args,
        capture_output=capture_output,
        cwd=cwd,
        check=True,
        env=env,
        inputs=inputs,
        text=text,
        force_c_locale=force_c_locale,
    )
    return res.stdout if capture_output else None




[docs]
def call_git_success(
    args: list[str],
    *,
    cwd: Path | None = None,
    capture_output: bool = False,
    env: Mapping[str, str] | None = None,
) -> bool:
    """Call Git and report success or failure of the command

    ``args`` is a list of arguments for the Git command. This list must not
    contain the Git executable itself. It will be prepended (unconditionally)
    to the arguments before passing them on.

    If ``cwd`` is not None, the function changes the working directory to
    ``cwd`` before executing the command.

    If ``capture_output`` is ``True``, process output is captured, but not
    returned. By default process output is not captured.
    """
    try:
        _call_git(
            args,
            capture_output=capture_output,
            cwd=cwd,
            check=True,
            env=env,
        )
    except CommandError:
        # exc_info=True replaces CapturedException from legacy datalad
        lgr.debug('call_git_success() failed with exception', exc_info=True)
        return False
    return True




[docs]
def call_git_lines(
    args: list[str],
    *,
    cwd: Path | None = None,
    inputs: str | None = None,
    env: Mapping[str, str] | None = None,
    force_c_locale: bool = False,
) -> list[str]:
    """Call Git for any (small) number of lines of output

    ``args`` is a list of arguments for the Git command. This list must not
    contain the Git executable itself. It will be prepended (unconditionally)
    to the arguments before passing them on.

    If ``cwd`` is not None, the function changes the working directory to
    ``cwd`` before executing the command.

    If ``inputs`` is not None, the argument becomes the subprocess's stdin.
    This is intended for small-scale inputs. For call that require processing
    large inputs, ``iter_git_subproc()`` is to be preferred.

    If ``force_c_locale`` is ``True`` the environment of the Git process
    is altered to ensure output according to the C locale. This is useful
    when output has to be processed in a locale invariant fashion.

    Raises
    ------
    CommandError if the call exits with a non-zero status.
    """
    res = _call_git(
        args,
        capture_output=True,
        cwd=cwd,
        check=True,
        env=env,
        text=True,
        inputs=inputs,
        force_c_locale=force_c_locale,
    )
    return res.stdout.splitlines()




[docs]
def call_git_oneline(
    args: list[str],
    *,
    cwd: Path | None = None,
    inputs: str | None = None,
    env: Mapping[str, str] | None = None,
    force_c_locale: bool = False,
) -> str:
    """Call Git for a single line of output

    If ``cwd`` is not None, the function changes the working directory to
    ``cwd`` before executing the command.

    If ``inputs`` is not None, the argument becomes the subprocess's stdin.
    This is intended for small-scale inputs. For call that require processing
    large inputs, ``iter_git_subproc()`` is to be preferred.

    If ``force_c_locale`` is ``True`` the environment of the Git process
    is altered to ensure output according to the C locale. This is useful
    when output has to be processed in a locale invariant fashion.

    Raises
    ------
    CommandError if the call exits with a non-zero status.
    AssertionError if there is not exactly one line of output.
    """
    lines = call_git_lines(
        args, cwd=cwd, env=env, inputs=inputs, force_c_locale=force_c_locale
    )
    if len(lines) != 1:
        msg = f'Expected Git {args} to return a single line, but got {lines}'
        raise AssertionError(msg)
    return lines[0]



@wraps(iter_subproc)
def iter_git_subproc(args: list[str], **kwargs):
    """``iter_subproc()`` wrapper for calling Git commands

    All argument semantics are identical to those of ``iter_subproc()``,
    except that ``args`` must not contain the Git binary, but need to be
    exclusively arguments to it. The respective `git` command/binary is
    automatically added internally.
    """
    cmd = ['git']
    cmd.extend(args)

    return iter_subproc(cmd, **kwargs)


@wraps(iter_git_subproc)
def iter_git_subproc_zlines(args: list[str], **kwargs) -> Iterator[str]:
    """Run ``git <args>`` command that yields NULL-separated lines

    The command is executed verbatim. It is the caller's responsibility
    to set it up to produce NULL-separated output (e.g., add -z option).

    No yielded line will have any trailing null-byte separators.
    """
    with iter_git_subproc(args, **kwargs) as r:
        yield from itemize(
            decode_bytes(r, backslash_replace=True),
            sep='\0',
            keep_ends=False,
        )