Source code for datalad_ria.patches.sshremoteio

"""Correct flaws in `SSHRemoteIO` operation

The original code has a number of problems.

1. The ``cmd``-argument for the shell ssh-process, which is created by:
   ``self.shell = subprocess.Popen(cmd, ...)`` is not correct, if ``self.ssh``i
   is an instance of ``NoMultiplexSSHConnection``.

   The changes in this patch build the correct ``cmd``-argument by adding
   additional arguments to ``cmd``, if `self.ssh` is an instance of
   ``NoMultiplexSSHConnection``. More precisely, the arguments that are
   required to open a "shell" in a ``NoMultiplexSSHConnection`` are stored in
   ``NoMultiplexSSHConnection._ssh_open_args`` and not in
   ``NoMultiplexSSHConnection._ssh_args``. This patch therefore provides
   arguments from both lists, i.e. from ``_ssh_args`` and ``_ssh_open_args`` in
   the call that opens a "shell", if ``self.ssh`` is an instance of
   ``NoMultiplexSSHConnection``.

2. The while-loop that waits to read ``b"RIA-REMOTE-LOGIN-END\\n"`` from the
   shell ssh-process did not contain any error handling. That led to an
   infinite loop in case that the shell ssh-process terminates without writing
   ``b"RIA-REMOTE-LOGIN-END\\n"`` to its stdout, or in the case that the
   stdout-pipeline of the shell ssh-process is closed.

   This patch introduces two checks into the while loop. One check for
   termination of the ssh shell-process, and one check for a closed
   stdout-pipeline of the ssh shell-process, i.e. reading an EOF from the
   stdout-pipeline. If any of those two cases appears, an exception is raised.

3. The output endmarker handling in ``SSHRemoteIO._run()`` could not reliably
   handle commands that would yield output without a final newline (e.g.,
   ``cat`` of a file without a trailing newline). This patch changes to
   endmarker handling to guarantee that they appear on a dedicated line,
   by prefixing the marker itself with a newline (which is withheld form the
   actual output).

4. ``SSHRemoteIO.remove_dir()`` would not fail (unlike ``FileIO.remove_dir()``)
   when ran on a non-empty directory. Despite not failing, it would also not
   remove that directory.

In addition, this patch modifies two comments. It adds a missing description of
the ``buffer_size``-parameter of ``SSHRemoteIO.__init__``to the doc-string, and
fixes the description of the condition in the comment on the use of
``DEFAULT_BUFFER_SIZE``.
"""

import logging
import subprocess

from datalad.distributed.ora_remote import (
    ssh_manager,
    sh_quote,
    RemoteCommandFailedError,
    RIARemoteError,
)
# we need to get this from elsewhere, the orginal code does local imports
from datalad.support.exceptions import CommandError
# we need this for a conditional that is not part of the original code
from datalad.support.sshconnector import NoMultiplexSSHConnection

from datalad_next.utils.consts import COPY_BUFSIZE
from datalad_next.patches import apply_patch

# use same logger as -core
lgr = logging.getLogger('datalad.customremotes.ria_remote')


DEFAULT_BUFFER_SIZE = COPY_BUFSIZE


# The method 'SSHRemoteIO__init__' is a patched version of
# 'datalad/distributed/ora-remote.py:SSHRemoteIO.__init___'
# from datalad@8a145bf432ae8931be7039c97ff602e53813d238
[docs] def SSHRemoteIO__init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE): """ Parameters ---------- host : str SSH-accessible host(name) to perform remote IO operations on. buffer_size: int or None The preferred buffer size """ # the connection to the remote # we don't open it yet, not yet clear if needed self.ssh = ssh_manager.get_connection( host, use_remote_annex_bundle=False, ) self.ssh.open() # This is a PATCH: it extends ssh_args to contain all # necessary parameters ssh_args = self.ssh._ssh_args if isinstance(self.ssh, NoMultiplexSSHConnection): ssh_args.extend(self.ssh._ssh_open_args) cmd = ['ssh'] + ssh_args + [self.ssh.sshri.as_str()] # open a remote shell self.shell = subprocess.Popen(cmd, stderr=subprocess.DEVNULL, stdout=subprocess.PIPE, stdin=subprocess.PIPE) # swallow login message(s): self.shell.stdin.write(b"echo RIA-REMOTE-LOGIN-END\n") self.shell.stdin.flush() while True: # This is a PATCH: detect a terminated shell-process status = self.shell.poll() if status not in (0, None): raise CommandError(f'ssh shell process exited with {status}') line = self.shell.stdout.readline() if line == b"RIA-REMOTE-LOGIN-END\n": break # This is a PATCH: detect closing of stdout of the shell-process if line == '': raise RuntimeError(f'ssh shell process close stdout unexpectedly') # TODO: Same for stderr? # make sure default is used if 0 or None was passed, too. self.buffer_size = buffer_size if buffer_size else DEFAULT_BUFFER_SIZE # lazy property to store the remote unix name self._remote_uname = None
# The method 'SSHRemoteIO_append_end_markers' is a patched version of # 'datalad/distributed/ora-remote.py:SSHRemoteIO._append_end_markers' # from datalad@58b8e06317fe1a03290aed80526bff1e2d5b7797
[docs] def SSHRemoteIO_append_end_markers(self, cmd): """Append end markers to remote command""" # THE PATCH: the addition of the leading newline char return "{} && printf '\\n%s\\n' {} || printf '\\n%s\\n' {}\n".format( cmd, sh_quote(self.REMOTE_CMD_OK), sh_quote(self.REMOTE_CMD_FAIL), )
# The method 'SSHRemoteIO_run' is a patched version of # 'datalad/distributed/ora-remote.py:SSHRemoteIO._run' # from datalad@58b8e06317fe1a03290aed80526bff1e2d5b7797
[docs] def SSHRemoteIO_run(self, cmd, no_output=True, check=False): # TODO: we might want to redirect stderr to stdout here (or have # additional end marker in stderr) otherwise we can't empty stderr # to be ready for next command. We also can't read stderr for # better error messages (RemoteError) without making sure there's # something to read in any case (it's blocking!). # However, if we are sure stderr can only ever happen if we would # raise RemoteError anyway, it might be okay. call = self._append_end_markers(cmd) self.shell.stdin.write(call.encode()) self.shell.stdin.flush() # PATCH: helper to strip the endmarker newline def _strip_endmarker_newline(lines): if lines[-1] == '\n': lines = lines[:-1] else: lines[-1] = lines[-1][:-1] return lines lines = [] while True: line = self.shell.stdout.readline().decode() if line == self.REMOTE_CMD_OK + '\n': # PATCH remove leading newline that also belongs to the endmarker lines = _strip_endmarker_newline(lines) # end reading break elif line == self.REMOTE_CMD_FAIL + '\n': # PATCH remove leading newline that also belongs to the endmarker lines = _strip_endmarker_newline(lines) if check: raise RemoteCommandFailedError( "{cmd} failed: {msg}".format(cmd=cmd, msg="".join(lines[:-1])) ) else: break # PATCH add line only here, to skip end markers alltogether lines.append(line) if no_output and len(lines) > 1: raise RIARemoteError("{}: {}".format(call, "".join(lines))) return "".join(lines)
# The method 'SSHRemoteIO_run' is a patched version of # 'datalad/distributed/ora-remote.py:SSHRemoteIO._run' # from datalad@58b8e06317fe1a03290aed80526bff1e2d5b7797
[docs] def SSHRemoteIO_remove_dir(self, path): with self.ensure_writeable(path.parent): self._run('rmdir {}'.format(sh_quote(str(path))), # THIS IS THE PATCH # we want it to fail, like rmdir() would fail # on non-empty dirs check=True)
for target, patch in ( ('__init__', SSHRemoteIO__init__), ('_append_end_markers', SSHRemoteIO_append_end_markers), ('_run', SSHRemoteIO_run), ('remove_dir', SSHRemoteIO_remove_dir), ): apply_patch('datalad.distributed.ora_remote', 'SSHRemoteIO', target, patch)