Source code for feets.datasets.base

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2017-2024, Cabral, Juan
# Copyright (c) 2025, QuatroPe; ClariĆ”, Felipe
# License: MIT
# Full Text:
#     https://github.com/quatrope/feets/blob/master/LICENSE


# =============================================================================
# DOCS
# =============================================================================

"""Base code for IO dataset retrieval."""


# =============================================================================
# IMPORTS
# =============================================================================

import os
import pathlib
import shutil
from collections.abc import Mapping

import attr

import numpy as np

import requests

from ..extractors.extractor import DATAS, DATA_TIME
from ..libs.bunch import Bunch


# =============================================================================
# CONSTANTS
# =============================================================================

HOME_PATH = pathlib.Path.home()

FEETS_DATA_DIR = "feets_data"

FEETS_DATA_DIR_ENV_VAR = "feets_DATA"

# =============================================================================
# FUNCTIONS
# =============================================================================


[docs] def get_data_home(data_home=None): """Return the path of the feets data directory. This directory is used by some large dataset loaders to avoid downloading the same data several times. By default, this is a directory named 'feets_data' in the user's home directory. Alternatively, it can be programatically set with the `data_home` variable, or by the 'feets_DATA' environment variable. The '~' prefix will be expanded to the user's home directory. If the directory does not already exist, it will be automatically created. Parameters ---------- data_home : str, pathlib.Path or None, optional The path to the feets data directory. Returns ------- pathlib.Path The path to the feets data directory. """ if data_home is None: data_home = HOME_PATH / os.environ.get( FEETS_DATA_DIR_ENV_VAR, FEETS_DATA_DIR ) data_home = pathlib.Path(data_home).expanduser() data_home.mkdir(parents=True, exist_ok=True) return data_home
[docs] def clear_data_home(data_home=None): """Delete all cached files from the feets data directory. Parameters ---------- data_home : str, pathlib.Path or None, optional The path to the feets data directory. """ data_home = get_data_home(data_home) shutil.rmtree(data_home)
[docs] def fetch(url, dest, force=False): """Retrieve data from `url` and store it into `dest`. Parameters ---------- url: str Link to the remote data dest: str or pathlib.Path Path where the file must be stored force: bool, default=False Overwrite if the file already exists Returns ------- cached: bool True if the file already exists dest: pathlib.Path The path to the downloaded file """ cached = True dest = pathlib.Path(dest) if force or not dest.exists(): cached = False response = requests.get(url, stream=True) if response.status_code == 200: with open(dest, "wb") as fp: for chunk in response.iter_content(1024): fp.write(chunk) return cached, dest
# ============================================================================= # CLASSES # ============================================================================= # This ugly code creates a LightCurve object based on the extractor constants # and ad som validations and a custom repr, as _LightCurveBase = attr.make_class( "LightCurveBase", { data: attr.ib( default=attr.NOTHING if data == DATA_TIME else None, converter=attr.converters.optional(np.asarray), ) for data in DATAS }, frozen=True, )
[docs] class LightCurve(_LightCurveBase, Mapping): """Time series data representation. This class holds the time series data for a single photometric band. This may include the time, magnitude, and associated errors. Each of the available data vectors (time, magnitude, error) can be accessed as attributes of the class. """ def __getitem__(self, key): """Get a data vector by name.""" try: return getattr(self, key) except AttributeError: raise KeyError(key) def __len__(self): """Get the number of available data vectors.""" return len( [ value for value in attr.asdict(self).values() if value is not None ] ) def __iter__(self): """Iterate over the names of the available data vectors.""" return iter( key for key, value in attr.asdict(self).items() if value is not None ) def __repr__(self): """String representation of the `LightCurve` object.""" fields = [ f"{key}[{len(value)}]" for key, value in attr.asdict(self).items() if value is not None ] fields_str = ", ".join(fields) return f"<LightCurve {fields_str}>"
[docs] @attr.s(frozen=True) class LightCurveDataset(Mapping): """An immutable container for a single light curve dataset. This object encapsulates the time series data for one astronomical object, potentially across multiple photometric bands, along with its corresponding metadata. It behaves like a dictionary, allowing access to its main attributes (`_id`, `name`, `description`, etc.) via key-based lookup. The actual time series data is stored in the `data` attribute as a `feets.libs.bunch.Bunch` of `LightCurve` objects, one for each band. Attributes ---------- _id : str or None A unique identifier for the light curve dataset object. name : str The human-readable name of the dataset from which this object originates. description : str A detailed description of the dataset's origin, content, or purpose. bands : tuple of str An ordered collection of the band names for which time series data is available. These names correspond to the keys in the `data` attribute. data : dict-like A dictionary-like object mapping band names (from `bands`) to their corresponding time series data. This is internally converted to a `feets.libs.bunch.Bunch` of `LightCurve` objects for convenient attribute-style access. metadata : dict-like or None A dictionary-like object containing arbitrary metadata about the light curve object (e.g., celestial coordinates, redshift). It is internally converted to a `feets.libs.bunch.Bunch` for convenient attribute-style access. See Also -------- feets.libs.bunch.Bunch : Container object exposing keys as attributes. LightCurve """ _id: str | None = attr.ib(converter=str) name: str = attr.ib(converter=str) description: str = attr.ib(converter=str, repr=False) bands: tuple = attr.ib(converter=tuple) data: Bunch = attr.ib( converter=lambda data: Bunch( "LightCurve", {band: LightCurve(**ts) for band, ts in data.items()} ), repr=False, ) metadata: Bunch | None = attr.ib( converter=lambda metadata: ( Bunch("Metadata", metadata) if metadata else None ), repr=False, default=None, ) def __getitem__(self, key): """Get an attribute by name.""" try: return getattr(self, key) except AttributeError: raise KeyError(key) def __len__(self): """Get the number of available attributes.""" return len( [ value for value in attr.asdict(self).values() if value is not None ] ) def __iter__(self): """Iterate over the available attributes.""" return iter( key for key, value in attr.asdict(self).items() if value is not None )