Source code for datalad_metalad.extractors.legacy.image

# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the datalad package for the
#   copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""generic image metadata extractor"""

from os.path import join as opj
import logging
lgr = logging.getLogger('datalad.metadata.extractors.image')
from datalad.log import log_progress

from PIL import Image
from datalad.support.exceptions import CapturedException

from ..base import BaseMetadataExtractor


vocabulary = {
    "spatial_resolution(dpi)": {
        '@id': "idqa:0000162",
        'unit': "uo:0000240",  # DPI
        'unit_label': 'dpi',
        'description': "spatial resolution in dot-per-inch"},
    "color_mode": {
        '@id': 'idqa:0000160',
        'description': 'color resolution/mode'},
}


mode_map = {
    '1': '1-bit pixels, black and white, stored with one pixel per byte',
    'L': '8-bit pixels, black and white',
    'P': '8-bit pixels, mapped to any other mode using a color palette',
    'RGB': '3x8-bit pixels, true color',
    'RGBA': '4x8-bit pixels, true color with transparency mask',
    'CMYK': '4x8-bit pixels, color separation',
    'YCbCr': '3x8-bit pixels, color video format',
    'LAB': '3x8-bit pixels, the L*a*b color space',
    'HSV': '3x8-bit pixels, Hue, Saturation, Value color space',
    'I': '32-bit signed integer pixels',
    'F': '32-bit floating point pixels',
}


[docs]class ImageMetadataExtractor(BaseMetadataExtractor): _extractors = { 'format': lambda x: x.format_description, 'dcterms:SizeOrDuration': lambda x: x.size, 'spatial_resolution(dpi)': lambda x: x.info.get('dpi', ''), 'color_mode': lambda x: mode_map.get(x.mode, ''), }
[docs] def get_metadata(self, dataset, content): if not content: return {}, [] contentmeta = [] log_progress( lgr.info, 'extractorimage', 'Start image metadata extraction from %s', self.ds, total=len(self.paths), label='image metadata extraction', unit=' Files', ) for f in self.paths: absfp = opj(self.ds.path, f) log_progress( lgr.info, 'extractorimage', 'Extract image metadata from %s', absfp, update=1, increment=True) try: img = Image.open(absfp) except Exception as e: lgr.debug("Image metadata extractor failed to load %s: %s", absfp, CapturedException(e)) continue meta = { 'type': 'dctype:Image', } # run all extractors meta.update({k: v(img) for k, v in self._extractors.items()}) # filter useless fields (empty strings and NaNs) meta = {k: v for k, v in meta.items() if not (hasattr(v, '__len__') and not len(v))} contentmeta.append((f, meta)) log_progress( lgr.info, 'extractorimage', 'Finished image metadata extraction from %s', self.ds ) return { '@context': vocabulary, }, \ contentmeta