Source code for feets.datasets.base

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# The MIT License (MIT)

# Copyright (c) 2017 Juan Cabral

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# =============================================================================
# DOCS
# =============================================================================

"""Base code for IO dataset retrieval


"""


# =============================================================================
# IMPORTS
# =============================================================================

import os
import shutil
from collections import Mapping

import numpy as np

import requests

import attr

from ..extractors.core import DATAS


# =============================================================================
# FUNCTIONS
# =============================================================================

[docs]def get_data_home(data_home=None):
    """Return the path of the feets data dir.

    This folder is used by some large dataset loaders to avoid downloading the
    data several times.

    By default the data dir is set to a folder named 'feets_data' in the
    user home folder.

    Alternatively, it can be set by the 'feets_DATA' environment
    variable or programmatically by giving an explicit folder path. The '~'
    symbol is expanded to the user home folder.

    If the folder does not already exist, it is automatically created.

    Parameters
    ----------
    data_home : str | None
        The path to feets data dir.
    """
    if data_home is None:
        data_home = os.environ.get(
            'feets_DATA', os.path.join('~', 'feets_data'))
    data_home = os.path.expanduser(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)
    return data_home


[docs]def clear_data_home(data_home=None):
    """Delete all the content of the data home cache.

    Parameters
    ----------
    data_home : str | None
        The path to feets data dir.

    """
    data_home = get_data_home(data_home)
    shutil.rmtree(data_home)


[docs]def fetch(url, dest, force=False):
    """Retrieve data from an url and store it into dest.

    Parameters
    ----------
    url: str
        Link to the remote data
    dest: str
        Path where the file must be stored
    force: bool (default=False)
        Overwrite if the file exists

    Returns
    -------
    cached: bool
        True if the file already exists
    dest: str
        The same string of the parameter


    """

    cached = True
    if force or not os.path.exists(dest):
        cached = False
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            with open(dest, 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
    return cached, dest


# =============================================================================
# CLASSES
# =============================================================================

[docs]class Bunch(Mapping):  # THANKS SKLEARN
    """Container object for datasets
    Dictionary-like object that exposes its keys as attributes.

    >>> b = Bunch(a=1, b=2)
    >>> b['b']
    2
    >>> b.b
    2
    >>> b.a = 3
    >>> b['a']
    3
    >>> b.c = 6
    >>> b['c']
    6

    """

    def __init__(self, data=None, **kwargs):
        if data and kwargs:
            raise ValueError(
                "If 'data' is not none keywords aguments are not allowed")
        self._data = dict(data) if data else kwargs

    def __repr__(self):
        keys_str = ", ".join(self._data.keys())
        return "Bunch({})".format(keys_str)

    def __getitem__(self, key):
        return self._data[key]

    def __iter__(self):
        return iter(self._data)

    def __len__(self):
        return len(self._data)

    def __dir__(self):
        return self._data.keys()

    def __getattr__(self, key):
        try:
            return self._data[key]
        except KeyError:
            raise AttributeError(key)

    def __setstate__(self, state):
        pass


# This ugly code creates a LightCurve object based on the extractor constants
# and ad som validations and a custom repr, as

LightCurveBase = attr.make_class(
    'LightCurveBase', {
        k: attr.ib(default=attr.NOTHING if k in DATAS[:2] else None,
                   converter=attr.converters.optional(np.asarray))
        for k in DATAS}, frozen=True)


[docs]class LightCurve(LightCurveBase, Mapping):

    def __repr__(self):
        fields = []
        for a in attr.fields(LightCurveBase):
            v = getattr(self, a.name)
            if v is not None:
                fields.append("{}[{}]".format(a.name, len(v)))
        fields_str = ", ".join(fields)
        return "LightCurve({})".format(fields_str)

    def __getitem__(self, k):
        try:
            return getattr(self, k)
        except AttributeError:
            raise KeyError(k)

    def __iter__(self):
        return iter(k for k, v in attr.asdict(self).items() if v is not None)

    def __len__(self):
        return len(attr.fields(LightCurveBase))


# The real dataset object

[docs]@attr.s(frozen=True)
class Data(Mapping):
    """This object encapsulates a full data with their metadata.

    Attributes
    ----------

    id : any object or None
        the id of the lightcurve or None
    ds_name : str
        The name of the dataset
    description : str
        description about the dataset
    bands : tuple
        the names of the attributes inside data
    metadata : dict-like
        arbitrary data.
    data : dict-like
        lightcurves collection in a dint-like object

    """
    id = attr.ib()
    ds_name = attr.ib(converter=str)
    description = attr.ib(converter=str, repr=False)
    bands = attr.ib(converter=tuple)
    metadata = attr.ib(
        repr=False, converter=attr.converters.optional(Bunch))
    data = attr.ib(
        repr=False, converter=lambda value: Bunch({
            k: LightCurve(**v) for k, v in value.items()}))

    def __getitem__(self, k):
        try:
            return getattr(self, k)
        except AttributeError:
            raise KeyError(k)

    def __iter__(self):
        return iter(k for k, v in attr.asdict(self).items() if v is not None)

    def __len__(self):
        return len(attr.fields(Data))