Source code for nested_pandas.series.nestedseries

from functools import wraps

import pandas as pd
from deprecated import deprecated

from nested_pandas.series.dtype import NestedDtype

__all__ = ["NestedSeries"]


def nested_only(func):
    """Decorator to designate certain functions can only be used with NestedDtype."""

    @wraps(func)  # This ensures the original function's metadata is preserved
    def wrapper(*args, **kwargs):
        if not isinstance(args[0].dtype, NestedDtype):
            raise TypeError(f"'{func.__name__}' can only be used with a NestedDtype, not '{args[0].dtype}'.")

        result = func(*args, **kwargs)
        return result

    return wrapper


[docs] class NestedSeries(pd.Series): """ A Series that can contain nested data structures, such as lists or data-frames. This class extends the functionality of a standard pandas Series to handle nested data. """
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
@property @nested_only @deprecated( version="0.6.0", reason="The `fields` property is deprecated and will be removed in version 0.7.0," "use `columns` instead.", ) def fields(self): """Returns the fields of the nested series as a list.""" return self.columns @property @nested_only def columns(self): """Returns the names of the nested columns of the nested series as a list.""" return self.nest.columns @property @nested_only def flat_length(self): """Returns the length of the flattened nested series.""" return self.nest.flat_length @property @nested_only @deprecated( version="0.6.10", reason="`list_lengths` is deprecated and will be removed in version 0.8.0, use `len()` instead.", ) def list_lengths(self): """Returns the lengths of the list-packed nested series.""" return self.len()
[docs] @nested_only def len(self): """Returns the lengths of the list-packed nested series.""" return self.nest.len()
def __getitem__(self, key): """Equip getitem with ability to handle nested data.""" # Pure pandas Series behavior if not a NestedDtype if not isinstance(self.dtype, NestedDtype): return super().__getitem__(key) # Return a flattened series for a single column if isinstance(key, str) and key in self.columns: return self.nest[key] # For list-like keys, perform sub-column selection elif isinstance(key, list | tuple) and all(isinstance(k, str) for k in key): return self.nest[key] # Handle boolean masking if isinstance(key, pd.Series) and pd.api.types.is_bool_dtype(key.dtype): return self.nest[key] # Otherwise, fall back to the default behavior return super().__getitem__(key) def __setitem__(self, key, value): """Equip setitem with ability to handle nested data.""" # Pure pandas Series behavior if not a NestedDtype if not isinstance(self.dtype, NestedDtype): return super().__setitem__(key, value) # Use nest setitem when setting on a single field if isinstance(key, str) and key in self.columns: self.nest[key] = value return return super().__setitem__(key, value) @nested_only @deprecated(version="0.6.0", reason="`to_flat` will be removed in version 0.7.0, use `explode` instead.") def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame: """Convert nested series into dataframe of flat arrays. Parameters ---------- fields : list[str] or None, optional Names of the fields to include. Default is None, which means all fields. Returns ------- pd.DataFrame Dataframe of flat arrays. Examples -------- >>> from nested_pandas.datasets.generation import generate_data >>> nf = generate_data(5, 2, seed=1) >>> nf["nested"].to_flat() t flux flux_error band 0 8.38389 80.074457 1.0 r 0 13.40935 89.460666 1.0 g 1 13.70439 96.826158 1.0 g 1 8.346096 8.504421 1.0 g 2 4.089045 31.342418 1.0 g 2 11.173797 3.905478 1.0 g 3 17.562349 69.232262 1.0 r 3 2.807739 16.983042 1.0 r 4 0.547752 87.638915 1.0 g 4 3.96203 87.81425 1.0 r """ return self.explode(columns=fields)
[docs] @nested_only def explode(self, columns: list[str] | str | None = None) -> pd.DataFrame: """Unpack nested series into dataframe of flat arrays. Parameters ---------- columns : list[str] or str or None, optional Names of the column(s) to include. Default is None, which means all columns. Returns ------- pd.DataFrame Dataframe of flat arrays. Examples -------- >>> from nested_pandas.datasets.generation import generate_data >>> nf = generate_data(5, 2, seed=1) >>> nf["nested"].explode() t flux flux_error band 0 8.38389 80.074457 1.0 r 0 13.40935 89.460666 1.0 g 1 13.70439 96.826158 1.0 g 1 8.346096 8.504421 1.0 g 2 4.089045 31.342418 1.0 g 2 11.173797 3.905478 1.0 g 3 17.562349 69.232262 1.0 r 3 2.807739 16.983042 1.0 r 4 0.547752 87.638915 1.0 g 4 3.96203 87.81425 1.0 r """ return self.nest.to_flat(columns=columns)
[docs] @nested_only def to_lists(self, columns: list[str] | str | None = None) -> pd.DataFrame: """Convert nested series into dataframe of list-array columns. Parameters ---------- columns : list[str] or str or None, optional Names of the column(s) to include. Default is None, which means all columns. Returns ------- pd.DataFrame Dataframe of list-arrays. Examples -------- >>> from nested_pandas.datasets.generation import generate_data >>> nf = generate_data(5, 2, seed=1) >>> nf["nested"].to_lists() t flux flux_error band 0 [ 8.38389029 13.4093502 ] [80.07445687 89.46066635] [1. 1.] ['r' 'g'] 1 [13.70439001 8.34609605] [96.82615757 8.50442114] [1. 1.] ['g' 'g'] 2 [ 4.08904499 11.17379657] [31.34241782 3.90547832] [1. 1.] ['g' 'g'] 3 [17.56234873 2.80773877] [69.23226157 16.98304196] [1. 1.] ['r' 'r'] 4 [0.54775186 3.96202978] [87.63891523 87.81425034] [1. 1.] ['g' 'r'] """ return self.nest.to_lists(columns=columns)