Source code for datalad_osf.create_sibling_osf

# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See LICENSE file distributed along with the datalad_osf package for the
#   copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##

import logging
from datalad.interface.base import (
    Interface,
    build_doc,
)
from datalad.support.annexrepo import AnnexRepo
from datalad.support.param import Parameter
from datalad.distribution.dataset import (
    datasetmethod,
    EnsureDataset,
    require_dataset
)
from datalad.interface.base import eval_results
from datalad.support.constraints import (
    EnsureChoice,
    EnsureNone,
    EnsureStr,
    EnsureBool,
)
from datalad.interface.results import get_status_dict
from datalad.interface.utils import ac
from osfclient import OSF
from datalad_osf.utils import (
    create_node,
    update_node,
    get_credentials,
)
from datalad.utils import ensure_list
from datalad.log import log_progress

lgr = logging.getLogger('datalad.osf.create_sibling_osf')


@build_doc
class CreateSiblingOSF(Interface):
    """Create a dataset representation at OSF.

    This will create a node on OSF and initialize
    an osf special remote to point to it. There are two modes
    this can operate in: 'annex' and 'export'.
    The former uses the OSF node as a key-value store, that
    can be used by git-annex to copy data to and retrieve
    data from (potentially by any clone of the original dataset).
    The latter allows to use 'git annex export' to publish a
    snapshot of a particular version of the dataset. Such an OSF
    node will - in opposition to the 'annex' - be
    human-readable.

    For authentication with OSF, you can define environment variables: Either
    'OSF_TOKEN', or both 'OSF_USERNAME' and 'OSF_PASSWORD'. If neither of these
    is defined, the tool will fall back to the datalad credential manager and
    inquire for credentials interactively.

    """

    result_renderer = 'tailored'

    _params_ = dict(
        dataset=Parameter(
            args=("-d", "--dataset"),
            doc="""Dataset to create a sibling for.""",
            constraints=EnsureDataset() | EnsureNone()
        ),
        title=Parameter(
            args=("--title",),
            doc="""title of the to-be created OSF node that is displayed
            on the OSF website. Defaults to the basename of the root directory
            of the local dataset.""",
            constraints=EnsureStr() | EnsureNone(),
        ),
        name=Parameter(
            args=("-s", "--name",),
            doc="""Name of the to-be initialized osf-special-remote""",
            constraints=EnsureStr()
        ),
        storage_name=Parameter(
            args=("--storage-name",),
            metavar="NAME",
            doc="""Name of the storage sibling (git-annex special remote).
            Must not be identical to the sibling name. If not specified,
            defaults to the sibling name plus '-storage' suffix.""",
            constraints=EnsureStr() | EnsureNone()),
        existing=Parameter(
            args=("--existing",),
            constraints=EnsureChoice(
                'skip', 'error') | EnsureNone(),
            metavar='MODE',
            doc="""Action to perform, if a (storage) sibling is already
            configured under the given name and/or a target already exists.
            In this case, a dataset can be skipped ('skip'), or the command
            be instructed to fail ('error').""", ),
        trust_level=Parameter(
            args=("--trust-level",),
            metavar="TRUST-LEVEL",
            constraints=EnsureChoice(
                'trust', 'semitrust', 'untrust') | EnsureNone(),
            doc="""specify a trust level for the storage sibling. If not
            specified, the default git-annex trust level is used.""",),
        mode=Parameter(
            args=("--mode",),
            doc=""" """,
            constraints=EnsureChoice(
                "annex", "export", "exportonly", "gitonly")
        ),
        tags=Parameter(
            args=('--tag',),
            dest='tags',
            metavar='TAG',
            doc="""specific one or more tags for the to-be-create OSF node.
            A tag 'DataLad dataset' and the dataset ID (if there is any)
            will be automatically added as additional tags.
            [CMD: This option can be given more than once CMD].""",
            action='append',
        ),
        public=Parameter(
            args=("--public",),
            doc="""make OSF node public""",
            action='store_true',
        ),
        category=Parameter(
            args=("--category",),
            doc="""specific the OSF node category to be used for the
            node. The categorization determines what icon is displayed
            with the node on the OSF, and helps with search organization""",
            # all presently supported categories
            constraints=EnsureChoice(
                "analysis", "communication", "data", "hypothesis",
                "instrumentation", "methods and measures", "procedure",
                "project", "software", "other")
        ),
        description=Parameter(
            args=("--description",),
            metavar="TEXT",
            doc="""Description of the OSF node that will be displayed on
            the associated project page. By default a description will be
            generated based on the mode the sibling is put into.""",
            constraints=EnsureStr() | EnsureNone()),
    )

    @staticmethod
    @datasetmethod(name='create_sibling_osf')
    @eval_results
    def __call__(title=None,
                 name="osf",
                 storage_name=None,
                 dataset=None,
                 mode="annex",
                 existing='error',
                 trust_level=None,
                 tags=None,
                 public=False,
                 category='data',
                 description=None,
                 ):
        ds = require_dataset(dataset,
                             purpose="create OSF remote",
                             check_installed=True)
        res_kwargs = dict(
            ds=ds,
            action="create-sibling-osf",
            logger=lgr,
        )
        # we need an annex
        if not isinstance(ds.repo, AnnexRepo):
            yield get_status_dict(
                type="dataset",
                status="impossible",
                message="dataset has no annex",
                **res_kwargs)
            return

        # NOTES:
        # - we prob. should check osf-special-remote availability upfront to
        #   fail early
        # - add --recursive option
        #       - recursive won't work easily. Need to think that through.
        #       - would need a naming scheme for subdatasets
        #       - flat on OSF or a tree?
        #       - how do we detect something is there already, so we can skip
        #         rather than duplicate (with a new name)?
        #         osf-type-special-remote sufficient to decide it's not needed?
        # - adapt to conclusions in issue #30
        #   -> create those subcomponents
        # - results need to report URL for created projects suitable for datalad
        #   output formatting!
        #   -> result_renderer
        #   -> needs to ne returned by create_node

        if not storage_name:
            storage_name = "{}-storage".format(name)

        sibling_conflicts = sibling_exists(
            ds, [name, storage_name],
            # TODO pass through
            recursive=False, recursion_limit=None,
            # fail fast, if error is desired
            exhaustive=existing == 'error',
        )
        if existing == 'error' and sibling_conflicts:
            # we only asked for one
            conflict = sibling_conflicts[0]
            yield get_status_dict(
                status='error',
                message=(
                    "a sibling '%s' is already configured in dataset %s",
                    conflict[1], conflict[0]),
                **res_kwargs,
            )
            return

        if title is None:
            # use dataset root basename
            title = ds.pathobj.name

        tags = ensure_list(tags)
        if 'DataLad dataset' not in tags:
            tags.append('DataLad dataset')
        if ds.id and ds.id not in tags:
            tags.append(ds.id)

        if not description:
            description = \
                "This component was built from a DataLad dataset using the " \
                "datalad-osf extension " \
                "(https://github.com/datalad/datalad-osf)."
            if mode != 'exportonly':
                description += \
                    " With this extension installed, this component can be " \
                    "git or datalad cloned from a 'osf://ID' URL, where " \
                    "'ID' is the OSF node ID that shown in the OSF HTTP " \
                    "URL, e.g. https://osf.io/q8xnk can be cloned from " \
                    "osf://q8xnk. "
        cred = get_credentials(allow_interactive=True)
        osf = OSF(**cred)
        node_id, node_url = create_node(
            osf_session=osf.session,
            title=title,
            category=category,
            tags=tags if tags else None,
            public=EnsureBool()(public),
            description=description,
        )
        if mode != 'gitonly':
            init_opts = ["encryption=none",
                         "type=external",
                         "externaltype=osf",
                         "autoenable=true",
                         "node={}".format(node_id)]

            if mode in ("export", "exportonly"):
                init_opts += ["exporttree=yes"]

            ds.repo.init_remote(storage_name, options=init_opts)
            if trust_level:
                ds.repo.call_git(['annex', trust_level, storage_name])

            yield get_status_dict(
                type="dataset",
                url=node_url,
                id=node_id,
                name=storage_name,
                status="ok",
                **res_kwargs
            )

        if mode == 'exportonly':
            return

        # append how to clone this specific dataset to the description
        description += "This particular project can be cloned using" \
                       " 'datalad clone osf://{}'".format(node_id)
        update_node(osf_session=osf.session,
                    id_=node_id,
                    description=description)

        ds.config.set(
            'remote.{}.annex-ignore'.format(name), 'true',
            where='local')
        yield from ds.siblings(
            # use configure, not add, to not trip over the config that
            # we just made
            action='configure',
            name=name,
            url='osf://{}'.format(node_id),
            fetch=False,
            publish_depends=storage_name if mode != 'gitonly' else None,
            recursive=False,
            result_renderer=None,
        )

    @staticmethod
    def custom_result_renderer(res, **kwargs):
        from datalad.ui import ui
        if res['action'] == "create-sibling-osf":
            msg = res.get('message', None)
            ui.message("{action}({status}): {url}{msg}".format(
                action=ac.color_word(res['action'], ac.BOLD),
                status=ac.color_status(res['status']),
                url=res.get('url', ''),
                msg=' [{}]'.format(msg[0] % msg[1:]
                                   if isinstance(msg, tuple)
                                   else res['message'])
                if msg else '')
            )
        elif res['action'] == "add-sibling-osf":
            ui.message("{action}({status})".format(
                action=ac.color_word(res['action'], ac.BOLD),
                status=ac.color_status(res['status']))
            )
        else:
            from datalad.interface.utils import default_result_renderer
            default_result_renderer(res)


# TODO this could was originally taken from create_sibling_ria() and
# subsequently turned into a standalone function to facilitate re-use by other
# `create_sibling()` implementations
def sibling_exists(ds, names, recursive=False,
                   recursion_limit=None, exhaustive=False):
    # in recursive mode this check could take a substantial amount of
    # time: employ a progress bar (or rather a counter, because we don't
    # know the total in advance
    pbar_id = 'sibling-exists-{}'.format(id(ds))
    if recursive:
        log_progress(
            lgr.info, pbar_id,
            'Start checking pre-existing sibling configuration %s', ds,
            label='Query siblings',
            unit=' Siblings',
        )
    conflicts = []
    for r in ds.siblings(result_renderer=None,
                         recursive=recursive,
                         recursion_limit=recursion_limit):
        if recursive:
            log_progress(
                lgr.info, pbar_id,
                'Discovered sibling %s in dataset at %s',
                r['name'], r['path'],
                update=1,
                increment=True)
        if not r['type'] == 'sibling' or r['status'] != 'ok':
            # this is an internal status query that has no consequences
            continue
        for name in names:
            if r['name'] == name:
                conflicts.append((r['path'], name))
                if not exhaustive:
                    return conflicts
    if recursive:
        log_progress(
            lgr.info, pbar_id,
            'Finished checking pre-existing sibling configuration %s', ds,
        )
    return conflicts