Source code for feets.core

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# The MIT License (MIT)

# Copyright (c) 2017 Juan Cabral

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# =============================================================================
# FUTURE
# =============================================================================

from __future__ import unicode_literals, print_function


# =============================================================================
# DOCS
# =============================================================================

__doc__ = """core functionalities of feets"""

__all__ = [
    "FeatureNotFound",
    "DataRequiredError",
    "FeatureSpace"]


# =============================================================================
# IMPORTS
# =============================================================================

import logging

import numpy as np

from . import extractors
from .extractors.core import (
    DATA_MAGNITUDE,
    DATA_TIME,
    DATA_ERROR,
    DATA_MAGNITUDE2,
    DATA_ALIGNED_MAGNITUDE,
    DATA_ALIGNED_MAGNITUDE2,
    DATA_ALIGNED_TIME,
    DATA_ALIGNED_ERROR,
    DATA_ALIGNED_ERROR2)


# =============================================================================
# CONSTANTS
# =============================================================================

TABULATE_PARAMS = {
    "headers": "firstrow",
    "numalign": "center",
    "stralign": "center",
}


# =============================================================================
# LOG
# =============================================================================

logger = logging.getLogger("feets")
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.WARNING)


# =============================================================================
# EXCEPTIONS
# =============================================================================

[docs]class FeatureNotFound(ValueError):
    pass


[docs]class DataRequiredError(ValueError):
    pass


# =============================================================================
# FEATURE EXTRACTORS
# =============================================================================


[docs]class FeatureSpace(object):
    """Wrapper class, to allow user select the
    features based on the available time series vectors (magnitude, time,
    error, second magnitude, etc.) or specify a list of features.
    The finally selected features for the execution plan are are those that
    satisfy all the filters.

    Parameters
    ----------

    data : array-like, optional, default ``None``
        available time series vectors, which will
        output all the features that need this data to be calculated.

    only : array-like, optional, default ``None``
        List of features, which will output
        all the features in the list.

    exclude : array-like, optional, default ``None``
        List of features, which will not output

    kwargs
        Extra configuration for the feature extractors.
        format is ``Feature_name={param1: value, param2: value, ...}``

    Examples
    --------

    **List of features as an input:**

    .. code-block:: pycon

        >>> fs = feets.FeatureSpace(only=['Std'])
        >>> features, values = fs.extract(*lc)
        >>> dict(zip(features, values))
        {"Std": .42}

    **Available data as an input:**

    .. code-block:: pycon

        >>> fs = feets.FeatureSpace(data=['magnitude','time'])
        >>> features, values = fs.extract(*lc)
        >>> dict(zip(features, values))
        {...}

    **List of features and available data as an input:**

    .. code-block:: pycon

        >>> fs = feets.FeatureSpace(
        ...     only=['Mean','Beyond1Std', 'CAR_sigma','Color'],
        ...     data=['magnitude', 'error'])
        >>> features, values = fs.extract(*lc)
        >>> dict(zip(features, values))
        {"Beyond1Std": ..., "Mean": ...}

    **Excluding list as an input**

    .. code-block:: pycon

        >>> fs = feets.FeatureSpace(
        ...     only=['Mean','Beyond1Std','CAR_sigma','Color'],
        ...     data=['magnitude', 'error'],
        ...     exclude=["Beyond1Std"])
        >>> features, values = fs.extract(**lc)
        >>> dict(zip(features, values))
        {"Mean": 23}

    """
    def __init__(self, data=None, only=None, exclude=None, **kwargs):
        # retrieve all the extractors
        exts = extractors.registered_extractors()

        # store all the parameters for the extractors
        self._kwargs = kwargs

        # get all posible features by data
        if data:
            fbdata = []
            for fname, f in exts.items():
                if not f.get_data().difference(data):
                    fbdata.append(fname)
        else:
            fbdata = exts.keys()
        self._data = frozenset(data or extractors.DATAS)
        self._features_by_data = frozenset(fbdata)

        # validate the list of features or select all of them
        if only:
            for f in only:
                if f not in exts:
                    raise FeatureNotFound(f)
        self._only = frozenset(only or exts.keys())

        # select the features to exclude or not exclude anything
        if exclude:
            for f in exclude:
                if f not in exts:
                    raise FeatureNotFound(f)
        self._exclude = frozenset(exclude or ())

        # the candidate to be the features to be extracted
        candidates = self._features_by_data.intersection(
            self._only).difference(self._exclude)

        # remove by dependencies
        if only or exclude:
            final = set()
            for f in candidates:
                fcls = exts[f]
                dependencies = fcls.get_dependencies()
                if dependencies.issubset(candidates):
                    final.add(f)
        else:
            final = candidates

        # the final features
        self._features = frozenset(final)

        # create a ndarray for all the results
        self._features_as_array = np.array(sorted(self._features))

        # initialize the extractors and determine the required data only
        features_extractors, features_extractors_names = set(), set()
        required_data = set()
        for fcls in set(exts.values()):
            if fcls.get_features().intersection(self._features):

                params = self._kwargs.get(fcls.__name__, {})
                fext = fcls(**params)

                features_extractors.add(fext)
                features_extractors_names.add(fext.name)
                required_data.update(fext.get_data())

        self._features_extractors = frozenset(features_extractors)
        self._features_extractors_names = frozenset(features_extractors_names)
        self._required_data = frozenset(required_data)

        # excecution order by dependencies
        self._execution_plan = extractors.sort_by_dependencies(
            features_extractors)

        not_found = set(self._kwargs).difference(
            self._features_extractors_names)
        if not_found:
            msg = (
                "This space not found feature(s) extractor(s) {} "
                "to assign the given parameter(s)"
            ).format(", ".join(not_found))
            raise FeatureNotFound(msg)

    def __repr__(self):
        return str(self)

    def __str__(self):
        if not hasattr(self, "__str"):
            extractors = [str(extractor) for extractor in self._execution_plan]
            space = ", ".join(extractors)
            self.__str = "<FeatureSpace: {}>".format(space)
        return self.__str

[docs]    def dict_data_as_array(self, d):
        array_data = {}
        for k, v in d.items():
            if k in self._required_data and v is None:
                raise DataRequiredError(k)
            array_data[k] = v if v is None else np.asarray(v)
        return array_data

[docs]    def extract(self, time=None, magnitude=None, error=None,
                magnitude2=None, aligned_time=None,
                aligned_magnitude=None, aligned_magnitude2=None,
                aligned_error=None, aligned_error2=None):

        kwargs = self.dict_data_as_array({
            DATA_TIME: time,
            DATA_MAGNITUDE: magnitude,
            DATA_ERROR: error,
            DATA_MAGNITUDE2: magnitude2,
            DATA_ALIGNED_TIME: aligned_time,
            DATA_ALIGNED_MAGNITUDE: aligned_magnitude,
            DATA_ALIGNED_MAGNITUDE2: aligned_magnitude2,
            DATA_ALIGNED_ERROR: aligned_error,
            DATA_ALIGNED_ERROR2: aligned_error2})

        features = {}
        for fextractor in self._execution_plan:
            result = fextractor.extract(features=features, **kwargs)
            features.update(result)

        fvalues = np.array([
            features[fname] for fname in self._features_as_array])

        return self._features_as_array, fvalues

    @property
    def kwargs(self):
        return dict(self._kwargs)

    @property
    def data(self):
        return self._data

    @property
    def only(self):
        return self._only

    @property
    def exclude(self):
        return self._exclude

    @property
    def features_by_data_(self):
        return self._features_by_data

    @property
    def features_(self):
        return self._features

    @property
    def features_extractors_(self):
        return self._features_extractors

    @property
    def features_as_array_(self):
        return self._features_as_array

    @property
    def excecution_plan_(self):
        return self._execution_plan

    @property
    def required_data_(self):
        return self._required_data