Source code for feets.features

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2017-2024, Cabral, Juan
# Copyright (c) 2025, QuatroPe; Clariá, Felipe
# License: MIT
# Full Text:
#     https://github.com/quatrope/feets/blob/master/LICENSE


# =============================================================================
# DOCS
# =============================================================================

"""Manage and manipulate feature extraction results."""

# =============================================================================
# IMPORTS
# =============================================================================

from collections.abc import Sequence

import joblib

import numpy as np

import pandas as pd


# =============================================================================
# FEATURE SET
# =============================================================================



[docs]
class Features(Sequence):
    """Class to manage and manipulate feature extraction results.

    The `Features` class encapsulates the results of feature extraction
    performed on multiple light curves. It provides an interface to access
    the extracted features either by feature name or by light curve index.

    Parameters
    ----------
    features : array_like of dict
        The results of the feature extraction for each of the light curves.
    extractors : array_like of Extractor
        The extractor instances used to compute the features.

    Attributes
    ----------
    features : np.ndarray
        The extracted features by light curve.
    extractors : np.ndarray
        The extractor instances used to compute the features.
    feature_names : frozenset
        The names of the extracted features.
    length : int
        The number of light curves.

    Examples
    --------
    >>> from feets import FeatureSpace
    >>> fs = FeatureSpace(only=["Std", "Mean"])
    >>> results = fs.extract_many(
    ...     {"magnitude": [1, 1.5, 2]},
    ...     {"magnitude": [1, 2, 3]}
    ... )
    >>> results
    <Features feature_names={'Std', 'Mean'}, length=2>

    Accessing results by feature name:

    >>> results.Mean
    array([1.5, 2. ])
    >>> results.Std
    array([0.5, 1. ])

    Accessing results by light curve index:

    >>> results[0]
    {'Std': np.float64(0.5), 'Mean': np.float64(1.5)}
    >>> results[1]
    {'Std': np.float64(1.0), 'Mean': np.float64(2.0)}
    """

    # CONSTRUCTOR =============================================================

    def __init__(self, features, extractors):
        self.features = np.array(features, dtype=dict)
        self.extractors = np.array(extractors, dtype=object)

    # PROPERTIES ==============================================================

    @property
    def feature_names(self):
        """frozenset: The names of the extracted features."""
        return frozenset(self.features[0])

    @property
    def length(self):
        """int: The number of light curves."""
        return len(self.features)

    # MAGIC ===================================================================

    def __repr__(self):
        """String representation of the `Features` object."""
        return (
            f"<Features feature_names={set(self.feature_names)}, "
            f"length={self.length}>"
        )

    def __getattr__(self, feature_name):
        """Access feature results by feature name."""
        try:
            return np.array([feat[feature_name] for feat in self.features])
        except KeyError:
            message = (
                f"{type(self).__name__!r} object has no feature by the name "
                f"{feature_name!r}"
            )
            raise AttributeError(message)

    def __getitem__(self, slicer):
        """Access light curve results by index or slice."""
        try:
            return self.features.__getitem__(slicer)
        except IndexError:
            message = (
                f"index {slicer} is out of bounds for "
                f"{type(self).__name__!r} object with length {self.length}"
            )
            raise IndexError(message)

    def __len__(self):
        """Get the number of light curves."""
        return self.length

    def __dir__(self):
        """Get the list of attributes of the `Features` object."""
        return list(vars(type(self))) + list(self.feature_names)

    # API =====================================================================

    def _extractors_by_feature(self):
        extractors_by_feature = {}
        for extractor in self.extractors:
            extractor_by_feature = dict.fromkeys(
                extractor.get_features(), extractor
            )
            extractors_by_feature.update(extractor_by_feature)

        return extractors_by_feature

    def _get_default_jobs(self):
        jobs = min(len(self.features), joblib.cpu_count())
        return jobs

    @staticmethod
    def _features_as_serie(features, extractors_by_feature):
        data = {}
        for fname, fvalue in features.items():
            extractor = extractors_by_feature[fname]
            flattened = extractor.flatten_feature(fname, fvalue)
            extractor.validate_flatten(fname, flattened)
            data.update(flattened)
        return pd.Series(data)


[docs]
    def as_frame(self, **kwargs):
        """Convert the extraction results into a `pandas.DataFrame`.

        This method transforms the extracted features into a `pandas.DataFrame`,
        where each row corresponds to a light curve and each column represents
        a feature.

        The conversion process can be parallelized to improve performance on
        large datasets.

        Parameters
        ----------
        **kwargs
            Keyword arguments passed to the `joblib.Parallel` constructor,
            used when parallel processing the `pandas.DataFrame` conversion.

        Returns
        -------
        pandas.DataFrame
            A `pandas.DataFrame` representation of the extracted features.
            Each row corresponds to a light curve and each column represents
            a feature.

        Examples
        --------
        >>> from feets import FeatureSpace
        >>> fs = FeatureSpace(only=["Std", "Mean"])
        >>> results = fs.extract_many(
        ...     {"magnitude": [1, 1.5, 2]},
        ...     {"magnitude": [1, 2, 3]}
        ... )
        >>> results.as_frame()
        Features     Std  Mean
        Light Curve
        0            0.5   1.5
        1            1.0   2.0
        """
        extractors_by_feature = self._extractors_by_feature()

        kwargs.setdefault("prefer", "processes")
        kwargs.setdefault("n_jobs", self._get_default_jobs())

        with joblib.Parallel(**kwargs) as P:
            features_as_serie = joblib.delayed(self._features_as_serie)
            all_series = P(
                features_as_serie(features, extractors_by_feature)
                for features in self.features
            )
        df = pd.DataFrame(all_series)
        df.index.name = "Light Curve"
        df.columns.name = "Features"
        return df
feets

Navigation

Related Topics

Source code for feets.features