Source code for datalad_core.clone.utils

[docs] def git_url_basename( repo: str, *, is_bundle: bool = False, is_bare: bool = False, ) -> str: """Guess directory name from Git repository URL/path Aims to replicate the behaviour of the ``git_url_basename()`` C function, which is part of the Git sources (``dir.c``). The parameter ``repo`` is the repository URL or path. When ``is_bundle`` is ``True``, ``repo`` is treated as a bundle. When ``is_bare`` is ``True`` the repository (target) is treated as bare (adds '.git' suffix). A guessed directory name is returned. ``ValueError`` is raised, if no directory name could be guessed. """ noguess_msg = 'No directory name could be guessed.' # Helper: treat both '/' and '\\' as directory separators def is_dir_sep(ch: str) -> bool: return ch in ('/', '\\') # ------------------------------------------------------------------ # 1. Skip scheme (e.g. "https://") start = repo.find('://') if start != -1: start += 3 else: start = 0 end = len(repo) # 2. Skip authentication data up to the last '@' before a separator ptr = start while ptr < end and not is_dir_sep(repo[ptr]): if repo[ptr] == '@': start = ptr + 1 ptr += 1 # 3. Strip trailing spaces, slashes and optional ".git" while start < end and (is_dir_sep(repo[end - 1]) or repo[end - 1].isspace()): end -= 1 suffix = '.git' suffix_skip = len(suffix) + 1 if ( end - start > suffix_skip and is_dir_sep(repo[end - suffix_skip]) and repo[end - len(suffix) : end] == suffix ): end -= 5 while start < end and is_dir_sep(repo[end - 1]): end -= 1 # 4. Guard against negative length (mirrors the C `die` call) # should never happen if end - start < 0: raise ValueError(noguess_msg) # pragma: no cover # 5. Strip trailing port number if we have only a hostname if '/' not in repo[start:end] and ':' in repo[start:end]: ptr = end while ptr > start and repo[ptr - 1].isdigit() and repo[ptr - 1] != ':': ptr -= 1 # print(repo, ptr, start, end) # breakpoint() if ptr > start and repo[ptr - 1] == ':': end = ptr - 1 # 6. Find last component (treat ':' as a separator as well) ptr = end while ptr > start and not is_dir_sep(repo[ptr - 1]) and repo[ptr - 1] != ':': ptr -= 1 start = ptr # 7. Strip suffix ".bundle" or ".git" suffix = '.bundle' if is_bundle else '.git' name = repo[start:end] if name.lower().endswith(suffix): name = name[: -len(suffix)] if not name or (len(name) == 1 and name == '/'): raise ValueError(noguess_msg) # 8. Append ".git" for bare repositories if is_bare: name = f'{name}.git' # 9. Collapse control characters / whitespace to a single space, # strip leading/trailing spaces. cleaned = [] prev_space = True # strip leading whitespace for ch in name: # replace control chars (<0x20) with space if ord(ch) < 0x20: # noqa: PLR2004 ch = ' ' # noqa: PLW2901 if ch.isspace(): if prev_space: continue prev_space = True cleaned.append(' ') else: prev_space = False cleaned.append(ch) return ''.join(cleaned).strip()