[docs]
def git_url_basename(
repo: str,
*,
is_bundle: bool = False,
is_bare: bool = False,
) -> str:
"""Guess directory name from Git repository URL/path
Aims to replicate the behaviour of the ``git_url_basename()`` C function,
which is part of the Git sources (``dir.c``).
The parameter ``repo`` is the repository URL or path.
When ``is_bundle`` is ``True``, ``repo`` is treated as a bundle.
When ``is_bare`` is ``True`` the repository (target) is treated
as bare (adds '.git' suffix).
A guessed directory name is returned. ``ValueError`` is raised, if no
directory name could be guessed.
"""
noguess_msg = 'No directory name could be guessed.'
# Helper: treat both '/' and '\\' as directory separators
def is_dir_sep(ch: str) -> bool:
return ch in ('/', '\\')
# ------------------------------------------------------------------
# 1. Skip scheme (e.g. "https://")
start = repo.find('://')
if start != -1:
start += 3
else:
start = 0
end = len(repo)
# 2. Skip authentication data up to the last '@' before a separator
ptr = start
while ptr < end and not is_dir_sep(repo[ptr]):
if repo[ptr] == '@':
start = ptr + 1
ptr += 1
# 3. Strip trailing spaces, slashes and optional ".git"
while start < end and (is_dir_sep(repo[end - 1]) or repo[end - 1].isspace()):
end -= 1
suffix = '.git'
suffix_skip = len(suffix) + 1
if (
end - start > suffix_skip
and is_dir_sep(repo[end - suffix_skip])
and repo[end - len(suffix) : end] == suffix
):
end -= 5
while start < end and is_dir_sep(repo[end - 1]):
end -= 1
# 4. Guard against negative length (mirrors the C `die` call)
# should never happen
if end - start < 0:
raise ValueError(noguess_msg) # pragma: no cover
# 5. Strip trailing port number if we have only a hostname
if '/' not in repo[start:end] and ':' in repo[start:end]:
ptr = end
while ptr > start and repo[ptr - 1].isdigit() and repo[ptr - 1] != ':':
ptr -= 1
# print(repo, ptr, start, end)
# breakpoint()
if ptr > start and repo[ptr - 1] == ':':
end = ptr - 1
# 6. Find last component (treat ':' as a separator as well)
ptr = end
while ptr > start and not is_dir_sep(repo[ptr - 1]) and repo[ptr - 1] != ':':
ptr -= 1
start = ptr
# 7. Strip suffix ".bundle" or ".git"
suffix = '.bundle' if is_bundle else '.git'
name = repo[start:end]
if name.lower().endswith(suffix):
name = name[: -len(suffix)]
if not name or (len(name) == 1 and name == '/'):
raise ValueError(noguess_msg)
# 8. Append ".git" for bare repositories
if is_bare:
name = f'{name}.git'
# 9. Collapse control characters / whitespace to a single space,
# strip leading/trailing spaces.
cleaned = []
prev_space = True # strip leading whitespace
for ch in name:
# replace control chars (<0x20) with space
if ord(ch) < 0x20: # noqa: PLR2004
ch = ' ' # noqa: PLW2901
if ch.isspace():
if prev_space:
continue
prev_space = True
cleaned.append(' ')
else:
prev_space = False
cleaned.append(ch)
return ''.join(cleaned).strip()