Source code for datalad_metalad.extractors.legacy.frictionless_datapackage
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Extractor for friction-less data packages
(http://specs.frictionlessdata.io/data-packages)
"""
import logging
lgr = logging.getLogger('datalad.metadata.extractors.frictionless_datapackage')
from os.path import join as opj, exists
from datalad.support.json_py import load as jsonload
from ..base import BaseMetadataExtractor
def _compact_author(obj):
if isinstance(obj, dict):
bits = []
if 'name' in obj:
bits.append(obj['name'])
if 'email' in obj:
bits.append('<{}>'.format(obj['email']))
if 'web' in obj:
bits.append('({})'.format(obj['web']))
return ' '.join(bits)
else:
return obj
def _compact_license(obj):
if isinstance(obj, dict):
# With obj itself if no url or type
obj = obj.get('path', obj.get('type', obj))
if isinstance(obj, dict) and len(obj) == 1:
# didn't get lucky with compacting, try one more
obj = obj.popitem()[1]
return obj
else:
return obj
[docs]class FRDPMetadataExtractor(BaseMetadataExtractor):
metadatasrc_fname = 'datapackage.json'
_key2stdkey = {
'name': 'name',
'title': 'shortdescription',
'description': 'description',
'keywords': 'tag',
'version': 'version',
'homepage': 'homepage',
}
def _get_dataset_metadata(self):
meta = {}
metadata_path = opj(self.ds.path, self.metadatasrc_fname)
if not exists(metadata_path):
return meta
foreign = jsonload(metadata_path)
for term in self._key2stdkey:
if term in foreign:
meta[self._key2stdkey[term]] = foreign[term]
if 'author' in foreign:
meta['author'] = _compact_author(foreign['author'])
if 'contributors' in foreign:
meta['contributors'] = [_compact_author(c)
for c in foreign['contributors']]
# two license terms were supported at some point
if 'license' in foreign:
meta['license'] = _compact_license(foreign['license'])
if 'licenses' in foreign:
meta['license'] = [_compact_license(l) for l in foreign['licenses']]
meta['conformsto'] = 'http://specs.frictionlessdata.io/data-packages'
return meta
def _get_content_metadata(self):
return []