Source code for nested_pandas.series.nestedseries
from functools import wraps
import pandas as pd
from deprecated import deprecated
from nested_pandas.series.dtype import NestedDtype
__all__ = ["NestedSeries"]
def nested_only(func):
"""Decorator to designate certain functions can only be used with NestedDtype."""
@wraps(func) # This ensures the original function's metadata is preserved
def wrapper(*args, **kwargs):
if not isinstance(args[0].dtype, NestedDtype):
raise TypeError(f"'{func.__name__}' can only be used with a NestedDtype, not '{args[0].dtype}'.")
result = func(*args, **kwargs)
return result
return wrapper
[docs]
class NestedSeries(pd.Series):
"""
A Series that can contain nested data structures, such as lists or data-frames.
This class extends the functionality of a standard pandas Series to handle nested data.
"""
[docs]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@property
@nested_only
@deprecated(
version="0.6.0",
reason="The `fields` property is deprecated and will be removed in version 0.7.0,"
"use `columns` instead.",
)
def fields(self):
"""Returns the fields of the nested series as a list."""
return self.columns
@property
@nested_only
def columns(self):
"""Returns the names of the nested columns of the nested series as a list."""
return self.nest.columns
@property
@nested_only
def flat_length(self):
"""Returns the length of the flattened nested series."""
return self.nest.flat_length
@property
@nested_only
@deprecated(
version="0.6.10",
reason="`list_lengths` is deprecated and will be removed in version 0.8.0, use `len()` instead.",
)
def list_lengths(self):
"""Returns the lengths of the list-packed nested series."""
return self.len()
[docs]
@nested_only
def len(self):
"""Returns the lengths of the list-packed nested series."""
return self.nest.len()
def __getitem__(self, key):
"""Equip getitem with ability to handle nested data."""
# Pure pandas Series behavior if not a NestedDtype
if not isinstance(self.dtype, NestedDtype):
return super().__getitem__(key)
# Return a flattened series for a single column
if isinstance(key, str) and key in self.columns:
return self.nest[key]
# For list-like keys, perform sub-column selection
elif isinstance(key, list | tuple) and all(isinstance(k, str) for k in key):
return self.nest[key]
# Handle boolean masking
if isinstance(key, pd.Series) and pd.api.types.is_bool_dtype(key.dtype):
return self.nest[key]
# Otherwise, fall back to the default behavior
return super().__getitem__(key)
def __setitem__(self, key, value):
"""Equip setitem with ability to handle nested data."""
# Pure pandas Series behavior if not a NestedDtype
if not isinstance(self.dtype, NestedDtype):
return super().__setitem__(key, value)
# Use nest setitem when setting on a single field
if isinstance(key, str) and key in self.columns:
self.nest[key] = value
return
return super().__setitem__(key, value)
@nested_only
@deprecated(version="0.6.0", reason="`to_flat` will be removed in version 0.7.0, use `explode` instead.")
def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame:
"""Convert nested series into dataframe of flat arrays.
Parameters
----------
fields : list[str] or None, optional
Names of the fields to include. Default is None, which means all fields.
Returns
-------
pd.DataFrame
Dataframe of flat arrays.
Examples
--------
>>> from nested_pandas.datasets.generation import generate_data
>>> nf = generate_data(5, 2, seed=1)
>>> nf["nested"].to_flat()
t flux flux_error band
0 8.38389 80.074457 1.0 r
0 13.40935 89.460666 1.0 g
1 13.70439 96.826158 1.0 g
1 8.346096 8.504421 1.0 g
2 4.089045 31.342418 1.0 g
2 11.173797 3.905478 1.0 g
3 17.562349 69.232262 1.0 r
3 2.807739 16.983042 1.0 r
4 0.547752 87.638915 1.0 g
4 3.96203 87.81425 1.0 r
"""
return self.explode(columns=fields)
[docs]
@nested_only
def explode(self, columns: list[str] | str | None = None) -> pd.DataFrame:
"""Unpack nested series into dataframe of flat arrays.
Parameters
----------
columns : list[str] or str or None, optional
Names of the column(s) to include. Default is None, which means all columns.
Returns
-------
pd.DataFrame
Dataframe of flat arrays.
Examples
--------
>>> from nested_pandas.datasets.generation import generate_data
>>> nf = generate_data(5, 2, seed=1)
>>> nf["nested"].explode()
t flux flux_error band
0 8.38389 80.074457 1.0 r
0 13.40935 89.460666 1.0 g
1 13.70439 96.826158 1.0 g
1 8.346096 8.504421 1.0 g
2 4.089045 31.342418 1.0 g
2 11.173797 3.905478 1.0 g
3 17.562349 69.232262 1.0 r
3 2.807739 16.983042 1.0 r
4 0.547752 87.638915 1.0 g
4 3.96203 87.81425 1.0 r
"""
return self.nest.to_flat(columns=columns)
[docs]
@nested_only
def to_lists(self, columns: list[str] | str | None = None) -> pd.DataFrame:
"""Convert nested series into dataframe of list-array columns.
Parameters
----------
columns : list[str] or str or None, optional
Names of the column(s) to include. Default is None, which means all columns.
Returns
-------
pd.DataFrame
Dataframe of list-arrays.
Examples
--------
>>> from nested_pandas.datasets.generation import generate_data
>>> nf = generate_data(5, 2, seed=1)
>>> nf["nested"].to_lists()
t flux flux_error band
0 [ 8.38389029 13.4093502 ] [80.07445687 89.46066635] [1. 1.] ['r' 'g']
1 [13.70439001 8.34609605] [96.82615757 8.50442114] [1. 1.] ['g' 'g']
2 [ 4.08904499 11.17379657] [31.34241782 3.90547832] [1. 1.] ['g' 'g']
3 [17.56234873 2.80773877] [69.23226157 16.98304196] [1. 1.] ['r' 'r']
4 [0.54775186 3.96202978] [87.63891523 87.81425034] [1. 1.] ['g' 'r']
"""
return self.nest.to_lists(columns=columns)