Source code for datalad_core.runners.git

from __future__ import annotations

import logging
import os
import subprocess
from functools import wraps
from typing import (
    TYPE_CHECKING,
)

if TYPE_CHECKING:
    from collections.abc import Iterator, Mapping
    from pathlib import Path

from datasalad.itertools import (
    decode_bytes,
    itemize,
)

from datalad_core.runners.imports import (
    CommandError,
    iter_subproc,
)

lgr = logging.getLogger('datalad.runners')


def _call_git(
    args: list[str],
    *,
    capture_output: bool = False,
    cwd: Path | None = None,
    check: bool = False,
    env: Mapping[str, str] | None = None,
    text: bool | None = None,
    inputs: str | bytes | None = None,
    force_c_locale: bool = False,
) -> subprocess.CompletedProcess:
    """Wrapper around ``subprocess.run`` for calling Git command

    ``args`` is a list of argument for the Git command. This list must not
    contain the Git executable itself. It will be prepended (unconditionally)
    to the arguments before passing them on.

    If ``force_c_locale`` is ``True`` the environment of the Git process
    is altered to ensure output according to the C locale. This is useful
    when output has to be processed in a locale invariant fashion.

    All other argument are pass on to ``subprocess.run()`` verbatim.
    """
    if force_c_locale:
        env = dict(env or os.environ, LC_ALL='C')

    # make configurable
    git_executable = 'git'
    cmd = [git_executable, *args]
    try:
        return subprocess.run(
            cmd,
            capture_output=capture_output,
            cwd=cwd,
            check=check,
            text=text,
            input=inputs,
            env=env,
        )
    except subprocess.CalledProcessError as e:
        # TODO: we could support post-error forensics, but some client
        # might call this knowing that it could fail, and may not
        # appreciate the slow-down. Add option `expect_fail=False`?
        #
        # normalize exception to datalad-wide standard
        # TODO: CommandError.from_callprocesserror
        exc = CommandError(
            cmd=cmd,
            returncode=e.returncode,
            stdout=e.stdout,
            stderr=e.stderr,
            cwd=cwd,
        )
        raise exc from e


[docs] def call_git( args: list[str], *, cwd: Path | None = None, env: Mapping[str, str] | None = None, force_c_locale: bool = False, inputs: str | bytes | None = None, text: bool | None = None, capture_output: bool = False, ) -> str | bytes | None: """Call Git with no output capture, raises on non-zero exit. If ``cwd`` is not None, the function changes the working directory to ``cwd`` before executing the command. If ``force_c_locale`` is ``True`` the environment of the Git process is altered to ensure output according to the C locale. This is useful when output has to be processed in a locale invariant fashion. If ``capture_output`` is ``True``, process output is captured (and not relayed to the parent process/terminal). This is necessary for reporting any error messaging via a ``CommandError`` exception. By default process output is not captured. All other argument are pass on to ``subprocess.run()`` verbatim. If ``capture_output`` is enabled, the captured STDOUT is returned as ``str`` or ``bytes``, depending on the value of ``text``. Otherwise ``None`` is returned to indicate that no output was captured. """ res = _call_git( args, capture_output=capture_output, cwd=cwd, check=True, env=env, inputs=inputs, text=text, force_c_locale=force_c_locale, ) return res.stdout if capture_output else None
[docs] def call_git_success( args: list[str], *, cwd: Path | None = None, capture_output: bool = False, env: Mapping[str, str] | None = None, ) -> bool: """Call Git and report success or failure of the command ``args`` is a list of arguments for the Git command. This list must not contain the Git executable itself. It will be prepended (unconditionally) to the arguments before passing them on. If ``cwd`` is not None, the function changes the working directory to ``cwd`` before executing the command. If ``capture_output`` is ``True``, process output is captured, but not returned. By default process output is not captured. """ try: _call_git( args, capture_output=capture_output, cwd=cwd, check=True, env=env, ) except CommandError: # exc_info=True replaces CapturedException from legacy datalad lgr.debug('call_git_success() failed with exception', exc_info=True) return False return True
[docs] def call_git_lines( args: list[str], *, cwd: Path | None = None, inputs: str | None = None, env: Mapping[str, str] | None = None, force_c_locale: bool = False, ) -> list[str]: """Call Git for any (small) number of lines of output ``args`` is a list of arguments for the Git command. This list must not contain the Git executable itself. It will be prepended (unconditionally) to the arguments before passing them on. If ``cwd`` is not None, the function changes the working directory to ``cwd`` before executing the command. If ``inputs`` is not None, the argument becomes the subprocess's stdin. This is intended for small-scale inputs. For call that require processing large inputs, ``iter_git_subproc()`` is to be preferred. If ``force_c_locale`` is ``True`` the environment of the Git process is altered to ensure output according to the C locale. This is useful when output has to be processed in a locale invariant fashion. Raises ------ CommandError if the call exits with a non-zero status. """ res = _call_git( args, capture_output=True, cwd=cwd, check=True, env=env, text=True, inputs=inputs, force_c_locale=force_c_locale, ) return res.stdout.splitlines()
[docs] def call_git_oneline( args: list[str], *, cwd: Path | None = None, inputs: str | None = None, env: Mapping[str, str] | None = None, force_c_locale: bool = False, ) -> str: """Call Git for a single line of output If ``cwd`` is not None, the function changes the working directory to ``cwd`` before executing the command. If ``inputs`` is not None, the argument becomes the subprocess's stdin. This is intended for small-scale inputs. For call that require processing large inputs, ``iter_git_subproc()`` is to be preferred. If ``force_c_locale`` is ``True`` the environment of the Git process is altered to ensure output according to the C locale. This is useful when output has to be processed in a locale invariant fashion. Raises ------ CommandError if the call exits with a non-zero status. AssertionError if there is not exactly one line of output. """ lines = call_git_lines( args, cwd=cwd, env=env, inputs=inputs, force_c_locale=force_c_locale ) if len(lines) != 1: msg = f'Expected Git {args} to return a single line, but got {lines}' raise AssertionError(msg) return lines[0]
@wraps(iter_subproc) def iter_git_subproc(args: list[str], **kwargs): """``iter_subproc()`` wrapper for calling Git commands All argument semantics are identical to those of ``iter_subproc()``, except that ``args`` must not contain the Git binary, but need to be exclusively arguments to it. The respective `git` command/binary is automatically added internally. """ cmd = ['git'] cmd.extend(args) return iter_subproc(cmd, **kwargs) @wraps(iter_git_subproc) def iter_git_subproc_zlines(args: list[str], **kwargs) -> Iterator[str]: """Run ``git <args>`` command that yields NULL-separated lines The command is executed verbatim. It is the caller's responsibility to set it up to produce NULL-separated output (e.g., add -z option). No yielded line will have any trailing null-byte separators. """ with iter_git_subproc(args, **kwargs) as r: yield from itemize( decode_bytes(r, backslash_replace=True), sep='\0', keep_ends=False, )