Source code for datalad_next.iter_collections.zipfile

"""Report on the content of ZIP file

The main functionality is provided by the :func:`iter_zip()` function.
"""

from __future__ import annotations

import datetime
from functools import cached_property
import time
import zipfile
from dataclasses import dataclass
from pathlib import (
    Path,
    PurePosixPath,
)
from typing import Generator

from .utils import (
    FileSystemItem,
    FileSystemItemType,
)


[docs] @dataclass class ZipfileItem(FileSystemItem): name: str @cached_property def path(self) -> PurePosixPath: """Returns the item name as a ``PurePosixPath`` instance ZIP uses POSIX paths as item identifiers from version 6.3.3 onwards. Not all POSIX paths are legal paths on non-POSIX file systems or platforms. Therefore we cannot use a platform-dependent ``PurePath``-instance to address ZIP-file items, anq we use ``PurePosixPath``-instances instead. """ return PurePosixPath(self.name)
[docs] def iter_zip( path: Path, *, fp: bool = False, ) -> Generator[ZipfileItem, None, None]: """Uses the standard library ``zipfile`` module to report on ZIP-files A ZIP archive can represent more or less the full bandwidth of file system properties, therefore reporting on archive members is implemented similar to :func:`~datalad_next.iter_collections.directory.iter_dir()`. The iterator produces an :class:`ZipfileItem` instance with standard information on file system elements, such as ``size``, or ``mtime``. Parameters ---------- path: Path Path of the ZIP archive to report content for (iterate over). fp: bool, optional If ``True``, each file-type item includes a file-like object to access the file's content. This file handle will be closed automatically when the next item is yielded or the function returns. Yields ------ :class:`ZipfileItem` The ``name`` attribute of an item is a ``str`` with the corresponding archive member name (in POSIX conventions). """ with zipfile.ZipFile(path, mode='r') as zip_file: for zip_info in zip_file.infolist(): item = _get_zipfile_item(zip_info) if fp and item.type == FileSystemItemType.file: with zip_file.open(zip_info) as amfp: item.fp = amfp yield item else: yield item
def _get_zipfile_item(zip_info: zipfile.ZipInfo) -> ZipfileItem: return ZipfileItem( name=zip_info.filename, type=FileSystemItemType.directory if zip_info.is_dir() else FileSystemItemType.file, size=zip_info.file_size, mtime=time.mktime( datetime.datetime(*zip_info.date_time).timetuple() ) )