Source code for nested_pandas.series.accessor

# Python 3.9 doesn't support "|" for types
from __future__ import annotations

from collections.abc import Generator, Mapping
from typing import cast

import numpy as np
import pandas as pd
import pyarrow as pa
from deprecated import deprecated
from numpy.typing import ArrayLike
from pandas.api.extensions import register_series_accessor

from nested_pandas.series.dtype import NestedDtype
from nested_pandas.series.nestedseries import NestedSeries
from nested_pandas.series.packer import pack_flat, pack_sorted_df_into_struct
from nested_pandas.series.utils import downcast_large_list_array, nested_types_mapper

__all__ = ["NestSeriesAccessor"]



[docs]
@register_series_accessor("nest")
class NestSeriesAccessor(Mapping):
    """Accessor for operations on Series of NestedDtype

    Available as ".nest" property of a Series with NestedDtype.

    This accessor implements `MutableMapping` interface over the fields of the
    struct, so you can access, change and delete the fields as if it was a
    dictionary, with `[]`, `[] =` and `del` operators.
    """


[docs]
    def __init__(self, series):
        self._check_series(series)

        self._series = series


    @staticmethod
    def _check_series(series):
        dtype = series.dtype
        if not isinstance(dtype, NestedDtype):
            raise AttributeError(f"Can only use .nest accessor with a Series of NestedDtype, got {dtype}")


[docs]
    def to_lists(self, columns: list[str] | str | None = None, large_list: bool = False) -> pd.DataFrame:
        """Convert nested series into dataframe of list-array columns

        Parameters
        ----------
        columns : list[str] or str or None, optional
            Names of the column(s) to include. Default is None, which means all columns.
        large_list : bool, optional
            If False (default), use regular ``list_`` (int32 offsets). Set to True to
            use ``large_list`` (int64 offsets), which is required when the total number
            of nested elements across all rows exceeds ~2.1 billion (int32 max).

        Returns
        -------
        pd.DataFrame
            Dataframe of list-arrays.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.to_lists()
                                   t                       flux flux_error       band
        0  [ 8.38389029 13.4093502 ]  [80.07445687 89.46066635]    [1. 1.]  ['r' 'g']
        1  [13.70439001  8.34609605]  [96.82615757  8.50442114]    [1. 1.]  ['g' 'g']
        2  [ 4.08904499 11.17379657]  [31.34241782  3.90547832]    [1. 1.]  ['g' 'g']
        3  [17.56234873  2.80773877]  [69.23226157 16.98304196]    [1. 1.]  ['r' 'r']
        4    [0.54775186 3.96202978]  [87.63891523 87.81425034]    [1. 1.]  ['g' 'r']
        """
        columns = columns if columns is not None else list(self._series.array.field_names)

        if isinstance(columns, str):
            columns = [columns]

        if len(columns) == 0:
            raise ValueError("Cannot convert a struct with no fields to lists")

        list_table = self._series.array.pa_table.select(columns)
        if not large_list:
            list_table = pa.table(
                {col: downcast_large_list_array(list_table.column(col)) for col in list_table.column_names}
            )
        list_df = list_table.to_pandas(types_mapper=nested_types_mapper)
        list_df.index = self._series.index

        return list_df



[docs]
    def to_flat(self, columns: list[str] | str | None = None) -> pd.DataFrame:
        """Convert nested series into dataframe of flat arrays

        Parameters
        ----------
        columns : list[str] or str or None, optional
            Names of the column(s) to include. Default is None, which means all columns.

        Returns
        -------
        pd.DataFrame
            Dataframe of flat arrays.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.to_flat()
                   t       flux  flux_error band
        0    8.38389  80.074457         1.0    r
        0   13.40935  89.460666         1.0    g
        1   13.70439  96.826158         1.0    g
        1   8.346096   8.504421         1.0    g
        2   4.089045  31.342418         1.0    g
        2  11.173797   3.905478         1.0    g
        3  17.562349  69.232262         1.0    r
        3   2.807739  16.983042         1.0    r
        4   0.547752  87.638915         1.0    g
        4    3.96203   87.81425         1.0    r

        """
        columns = columns if columns is not None else list(self._series.array.field_names)

        if isinstance(columns, str):
            columns = [columns]

        if len(columns) == 0:
            raise ValueError("Cannot flatten a struct with no columns")

        index = self.flat_index

        flat_chunks: dict[str, list[pa.Array]] = {column: [] for column in columns}
        for chunk in self._series.array.struct_array.iterchunks():
            struct_array = cast(pa.StructArray, chunk)
            for column in columns:
                list_array = cast(pa.LargeListArray, struct_array.field(column))
                flat_array = list_array.flatten()
                flat_chunks[column].append(flat_array)

        flat_series = {}
        for column, chunks in flat_chunks.items():
            dtype = self._series.dtype.column_dtype(column)
            chunked_array = pa.chunked_array(chunks, type=self._series.dtype.column_dtypes[column])
            flat_series[column] = pd.Series(
                chunked_array,
                index=index,
                name=column,
                copy=False,
                dtype=dtype,
            )

        # TODO: Consider returning a NestedSeries if only one column is present
        return pd.DataFrame(flat_series)


    @property
    @deprecated(
        version="0.7.0",
        reason="`list_lengths` is deprecated and will be removed in version 0.8.0, use `len()` instead.",
    )
    def list_lengths(self) -> list[int]:
        """Lengths of the list arrays"""
        return self.len()


[docs]
    def len(self) -> list[int]:
        """Lengths of the list arrays"""
        return self._series.array.list_lengths


    @property
    def flat_length(self) -> int:
        """Length of the flat arrays"""
        return self._series.array.flat_length

    @property
    @deprecated(
        version="0.6.0",
        reason="`fields` will be removed in version 0.7.0, use `columns` instead.",
    )
    def fields(self) -> list[str]:
        """Names of the nested columns"""
        return self.columns

    @property
    def columns(self) -> list[str]:
        """Names of the nested columns"""
        return self._series.array.field_names

    @property
    def flat_index(self) -> pd.Index:
        """Index of the flattened arrays"""
        flat_index = np.repeat(self._series.index, np.diff(self._series.array.list_offsets))
        # pd.Index supports np.repeat, so flat_index is the same type as self._series.index
        flat_index = cast(pd.Index, flat_index)
        return flat_index

    @deprecated(
        version="0.6.0",
        reason="`with_field` will be removed in version 0.7.0, use `set_column` instead.",
    )
    def with_field(self, field: str, value: ArrayLike) -> NestedSeries:
        """Set the field from flat-array of values and return a new series

        It is an alias for `.nest.with_flat_field`.

        Parameters
        ----------
        field : str
            Name of the field to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must be a scalar or have the same length
            as the flat arrays, e.g. `self.flat_length`.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nested_with_avg = nf["nested"].nest.with_field("avg_flux", 50.0)
        >>> # Look at one row of the series
        >>> nested_with_avg[0]
                  t       flux  flux_error band  avg_flux
        0   8.38389  80.074457         1.0    r      50.0
        1  13.40935  89.460666         1.0    g      50.0
        """
        return self.set_column(field, value)


[docs]
    def set_column(self, column: str, value: ArrayLike) -> NestedSeries:
        """Set the column from a flat-array of values and return a new series

        It is an alias for `.nest.set_flat_column`.

        Parameters
        ----------
        column : str
            Name of the column to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must be a scalar or have the same length
            as the flat arrays, e.g. `self.flat_length`.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nested_with_avg = nf["nested"].nest.set_column("avg_flux", 50.0)
        >>> # Look at one row of the series
        >>> nested_with_avg[0]
                  t       flux  flux_error band  avg_flux
        0   8.38389  80.074457         1.0    r      50.0
        1  13.40935  89.460666         1.0    g      50.0
        """
        return self.set_flat_column(column, value)


    @deprecated(
        version="0.6.0",
        reason="`with_flat_field` will be removed in version 0.7.0, use `set_flat_column` instead.",
    )
    def with_flat_field(self, field: str, value: ArrayLike) -> NestedSeries:
        """Set the field from flat-array of values and return a new series

        Parameters
        ----------
        field : str
            Name of the field to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must be a scalar or have the same length
            as the flat arrays, e.g. `self.flat_length`.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nested_with_avg = nf["nested"].nest.with_flat_field("avg_flux",
        ...                                                     50.0)
        >>> # Look at one row of the series
        >>> nested_with_avg[0]
                  t       flux  flux_error band  avg_flux
        0   8.38389  80.074457         1.0    r      50.0
        1  13.40935  89.460666         1.0    g      50.0
        """
        return self.set_flat_column(field, value)


[docs]
    def set_flat_column(self, column: str, value: ArrayLike) -> NestedSeries:
        """Set the column from flat-array of values and return a new series

        Parameters
        ----------
        column : str
            Name of the column to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must be a scalar or have the same length
            as the flat arrays, e.g. `self.flat_length`.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nested_with_avg = nf["nested"].nest.set_flat_column("avg_flux",
        ...                                                     50.0)
        >>> # Look at one row of the series
        >>> nested_with_avg[0]
                  t       flux  flux_error band  avg_flux
        0   8.38389  80.074457         1.0    r      50.0
        1  13.40935  89.460666         1.0    g      50.0
        """
        new_array = self._series.array.copy()
        new_array.set_flat_field(column, value)
        return NestedSeries(new_array, copy=False, index=self._series.index, name=self._series.name)


    @deprecated(
        version="0.6.0",
        reason="`with_list_field` will be removed in version 0.7.0, use `set_list_column` instead.",
    )
    def with_list_field(self, field: str, value: ArrayLike) -> NestedSeries:
        """Set the field from list-array of values and return a new series

        Parameters
        ----------
        field : str
            Name of the field to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must be a list-array of the same length
            as the series.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(2, 2, seed=1)

        >>> nf_new_band = nf["nested"].nest.with_list_field("new_band",
        ...                                                 [["g","g"],
        ...                                                  ["r","r"]])
        >>> # Look at one row of the series
        >>> nf_new_band[0]
                  t       flux  flux_error band new_band
        0  2.935118  39.676747         1.0    g        g
        1  3.725204  41.919451         1.0    r        g

        """
        return self.set_list_column(field, value)


[docs]
    def set_list_column(self, column: str, value: ArrayLike) -> NestedSeries:
        """Set the field from list-array of values and return a new series

        Parameters
        ----------
        column : str
            Name of the column to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must be a list-array of the same length
            as the series.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(2, 2, seed=1)

        >>> nf_new_band = nf["nested"].nest.set_list_column("new_band",
        ...                                                 [["g","g"],
        ...                                                  ["r","r"]])
        >>> # Look at one row of the series
        >>> nf_new_band[0]
                  t       flux  flux_error band new_band
        0  2.935118  39.676747         1.0    g        g
        1  3.725204  41.919451         1.0    r        g

        """
        new_array = self._series.array.copy()
        new_array.set_list_field(column, value)
        return NestedSeries(new_array, copy=False, index=self._series.index, name=self._series.name)


    @deprecated(
        version="0.6.0",
        reason="`with_filled_field` will be removed in version 0.7.0, use `set_filled_column` instead.",
    )
    def with_filled_field(self, field: str, value: ArrayLike) -> NestedSeries:
        """Set the field by repeating values and return a new series

        The input value array must have as many elements as the Series,
        each of them will be repeated in the corresponding list.

        .nest.with_repeated_field("a", [1, 2, 3]) will create a nested field
        "a" with values [[1, 1, ...], [2, 2, ...], [3, 3, ...]].

        Parameters
        ----------
        field : str
            Name of the field to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must have the same length as the series.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(3, 2, seed=1)

        >>> nf_filled = nf["nested"].nest.with_filled_field("a", [1,2,3])

        >>> # Look at one row of the series
        >>> nf_filled[0]
                   t       flux  flux_error band  a
        0   3.725204  20.445225         1.0    g  1
        1  10.776335  67.046751         1.0    r  1
        """
        return self.set_filled_column(field, value)


[docs]
    def set_filled_column(self, column: str, value: ArrayLike) -> NestedSeries:
        """Set the column by repeating values and return a new series

        The input value array must have as many elements as the Series,
        each of them will be repeated in the corresponding list.

        .nest.set_filled_column("a", [1, 2, 3]) will create a nested column
        "a" with values [[1, 1, ...], [2, 2, ...], [3, 3, ...]].

        Parameters
        ----------
        column : str
            Name of the field to set. If not present, it will be added.
        value : ArrayLike
            Array of values to set. It must have the same length as the series.

        Returns
        -------
        NestedSeries
            The new series with the field set.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(3, 2, seed=1)

        >>> nf_filled = nf["nested"].nest.set_filled_column("a", [1,2,3])

        >>> # Look at one row of the series
        >>> nf_filled[0]
                   t       flux  flux_error band  a
        0   3.725204  20.445225         1.0    g  1
        1  10.776335  67.046751         1.0    r  1
        """
        new_array = self._series.array.copy()
        new_array.fill_field_lists(column, value)
        return NestedSeries(new_array, copy=False, index=self._series.index, name=self._series.name)


    @deprecated(
        version="0.6.0",
        reason="`without_field` will be removed in version 0.7.0, use `drop` instead.",
    )
    def without_field(self, field: str | list[str]) -> NestedSeries:
        """Remove the field(s) from the series and return a new series

        Note, that at least one field must be left in the series.

        Parameters
        ----------
        field : str or list[str]
            Name of the field(s) to remove.

        Returns
        -------
        NestedSeries
            The new series without the field(s).

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.without_field("flux")
        0    [{t: 8.38389, flux_error: 1.0, band: 'r'}; …] ...
        1    [{t: 13.70439, flux_error: 1.0, band: 'g'}; …]...
        2    [{t: 4.089045, flux_error: 1.0, band: 'g'}; …]...
        3    [{t: 17.562349, flux_error: 1.0, band: 'r'}; …...
        4    [{t: 0.547752, flux_error: 1.0, band: 'g'}; …]...
        Name: nested, dtype: nested<t: [double], flux_error: [double], band: [string]>
        """
        return self.drop(field)


[docs]
    def drop(self, column: str | list[str]) -> NestedSeries:
        """Remove the column(s) from the series and return a new series

        Note, that at least one nested column must be left in the series.

        Parameters
        ----------
        column : str or list[str]
            Name of the column(s) to remove.

        Returns
        -------
        NestedSeries
            The new series without the column(s).

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.drop("flux")
        0    [{t: 8.38389, flux_error: 1.0, band: 'r'}; …] ...
        1    [{t: 13.70439, flux_error: 1.0, band: 'g'}; …]...
        2    [{t: 4.089045, flux_error: 1.0, band: 'g'}; …]...
        3    [{t: 17.562349, flux_error: 1.0, band: 'r'}; …...
        4    [{t: 0.547752, flux_error: 1.0, band: 'g'}; …]...
        Name: nested, dtype: nested<t: [double], flux_error: [double], band: [string]>
        """
        if isinstance(column, str):
            column = [column]

        new_array = self._series.array.copy()
        new_array.pop_fields(column)
        return NestedSeries(new_array, copy=False, index=self._series.index, name=self._series.name)


    @deprecated(
        version="0.6.0",
        reason="`query_flat` will be removed in version 0.7.0, use `query` instead.",
    )
    def query_flat(self, query: str) -> NestedSeries:
        """Query the flat arrays with a boolean expression

        Currently, it will remove empty rows from the output series.
        # TODO: preserve the index keeping empty rows

        Parameters
        ----------
        query : str
            Boolean expression to filter the rows.

        Returns
        -------
        NestedSeries
            The filtered series.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 5, seed=1)

        >>> nf["nested"].nest.query_flat("flux > 50")
        0    [{t: 13.40935, flux: 98.886109, flux_error: 1....
        1    [{t: 13.70439, flux: 68.650093, flux_error: 1....
        2    [{t: 4.089045, flux: 83.462567, flux_error: 1....
        3    [{t: 2.807739, flux: 78.927933, flux_error: 1....
        4    [{t: 0.547752, flux: 75.014431, flux_error: 1....
        dtype: nested<t: [double], flux: [double], flux_error: [double], band: [string]>
        """
        return self.query(query)


[docs]
    def query(self, query: str) -> NestedSeries:
        """Query the flat arrays with a boolean expression

        Currently, it will remove empty rows from the output series.
        # TODO: preserve the index keeping empty rows

        Parameters
        ----------
        query : str
            Boolean expression to filter the rows.

        Returns
        -------
        NestedSeries
            The filtered series.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 5, seed=1)

        >>> nf["nested"].nest.query("flux > 50")
        0    [{t: 13.40935, flux: 98.886109, flux_error: 1....
        1    [{t: 13.70439, flux: 68.650093, flux_error: 1....
        2    [{t: 4.089045, flux: 83.462567, flux_error: 1....
        3    [{t: 2.807739, flux: 78.927933, flux_error: 1....
        4    [{t: 0.547752, flux: 75.014431, flux_error: 1....
        dtype: nested<t: [double], flux: [double], flux_error: [double], band: [string]>
        """
        flat = self.to_flat().query(query)

        if len(flat) == 0:
            return NestedSeries(
                [],
                dtype=self._series.dtype,
                index=pd.Index([], dtype=flat.index.dtype, name=flat.index.name),
            )
        return pack_sorted_df_into_struct(flat)


    @deprecated(
        version="0.6.0",
        reason="`get_flat_index` will be removed in version 0.7.0, use the `flat_index` property instead.",
    )
    def get_flat_index(self) -> pd.Index:
        """Index of the flat arrays

        Returns
        -------
        pd.Index
            The index of the flat arrays. It is a repeated index of the
            original index, with the number of repetitions equal to the
            number of elements in the list-array field.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.get_flat_index()
        Index([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype='int64')
        """
        return self.flat_index

    @deprecated(
        version="0.6.0",
        reason="`get_flat_series` will be removed in version 0.7.0, use `to_flat()[column]` instead.",
    )
    def get_flat_series(self, field: str) -> pd.Series:
        """Get the flat-array field as a pd.Series

        Parameters
        ----------
        field : str
            Name of the field to get.

        Returns
        -------
        pd.Series
            The flat-array field.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.get_flat_series("flux")
        0    80.074457
        0    89.460666
        1    96.826158
        1     8.504421
        2    31.342418
        2     3.905478
        3    69.232262
        3    16.983042
        4    87.638915
        4     87.81425
        Name: flux, dtype: double[pyarrow]
        """
        flat_chunks = []
        for nested_chunk in self._series.array.struct_array.iterchunks():
            struct_array = cast(pa.StructArray, nested_chunk)
            list_array = cast(pa.LargeListArray, struct_array.field(field))
            flat_array = list_array.flatten()
            flat_chunks.append(flat_array)

        flat_chunked_array = pa.chunked_array(flat_chunks, type=self._series.dtype.column_dtypes[field])

        flat_series = pd.Series(
            flat_chunked_array,
            dtype=self._series.dtype.column_dtype(field),
            # index=self.get_flat_index(),
            index=self.flat_index,
            name=field,
            copy=False,
        )
        if isinstance(self._series.dtype.column_dtype(field), NestedDtype):
            return NestedSeries(flat_series, copy=False)
        return flat_series

    @deprecated(
        version="0.6.0",
        reason="`get_list_series` will be removed in version 0.7.0, use `to_lists()[column]` instead.",
    )
    def get_list_series(self, field: str) -> pd.Series:
        """Get the list-array field as a Series

        Parameters
        ----------
        field : str
            Name of the field to get.

        Returns
        -------
        pd.Series
            The list-array field.

        Examples
        --------

        >>> from nested_pandas.datasets.generation import generate_data
        >>> nf = generate_data(5, 2, seed=1)

        >>> nf["nested"].nest.get_list_series("flux")
        0    [80.07445687 89.46066635]
        1    [96.82615757  8.50442114]
        2    [31.34241782  3.90547832]
        3    [69.23226157 16.98304196]
        4    [87.63891523 87.81425034]
        Name: flux, dtype: large_list<item: double>[pyarrow]
        """
        list_chunked_array = self._series.array.pa_table[field]
        return pd.Series(
            list_chunked_array,
            dtype=pd.ArrowDtype(list_chunked_array.type),
            index=self._series.index,
            name=field,
            copy=False,
        )

    def __getitem__(self, key: str | list[str]) -> NestedSeries:
        # Allow boolean masking given a Series of booleans
        if isinstance(key, pd.Series) and pd.api.types.is_bool_dtype(key.dtype):
            flat_df = self.to_flat()  # Use the flat representation
            if not key.index.equals(flat_df.index):
                raise ValueError("Boolean mask must have the same index as the flattened nested dataframe.")
            # Apply the mask to the series
            return NestedSeries(
                pack_flat(flat_df[key]),
                index=self._series.index,
                name=self._series.name,
            )

        # A list of fields may return a pd.Series or a NestedSeries depending
        # on the number of fields requested and their dtypes
        if isinstance(key, list):
            new_array = self._series.array.view_fields(key)
            return NestedSeries(new_array, index=self._series.index, name=self._series.name)

        # If the key is a single string, return the flat series for that field
        flat_chunks = []
        for nested_chunk in self._series.array.struct_array.iterchunks():
            struct_array = cast(pa.StructArray, nested_chunk)
            list_array = cast(pa.LargeListArray, struct_array.field(key))
            flat_array = list_array.flatten()
            flat_chunks.append(flat_array)

        flat_chunked_array = pa.chunked_array(flat_chunks, type=self._series.dtype.column_dtypes[key])

        flat_series = pd.Series(
            flat_chunked_array,
            dtype=self._series.dtype.column_dtype(key),
            # index=self.get_flat_index(),
            index=self.flat_index,
            name=key,
            copy=False,
        )
        if isinstance(self._series.dtype.column_dtype(key), NestedDtype):
            return NestedSeries(flat_series, copy=False)
        return flat_series

    def __setitem__(self, key: str, value: ArrayLike) -> None:
        """Replace the field values from flat-array of values

        Currently, only replacement of the whole field is supported, the length
        and dtype of the input value must match the field.
        https://github.com/lincc-frameworks/nested-pandas/issues/87
        """
        # TODO: we can be much-much smarter about the performance here
        # TODO: think better about underlying pa.ChunkArray in both self._series.array and value

        ndim = np.ndim(value)

        # Everything is empty, do nothing
        if len(self._series) == 0 and ndim != 0:
            array = pa.array(value, from_pandas=True)
            if len(array) == 0:
                return

        # Set single value for all rows
        if ndim == 0:
            self._series.array.set_flat_field(key, value, keep_dtype=True)
            return

        if isinstance(value, pd.Series) and not self.flat_index.equals(value.index):
            raise ValueError("Cannot set field with a Series of different index")

        pa_array = pa.array(value, from_pandas=True)

        # Input is a flat array of values
        if len(pa_array) != self.flat_length:
            ValueError(
                f"Cannot set field {key} with value of length {len(pa_array)}, the value is expected to be "
                f"either a scalar, a 'flat' array of length {self.flat_length}, or a 'list' array of length "
                f"{len(self._series)}."
            )

        self._series.array.set_flat_field(key, pa_array, keep_dtype=True)

    def __iter__(self) -> Generator[str, None, None]:
        return iter(self._series.array.field_names)

    def __len__(self) -> int:
        return len(self._series.array.field_names)

    def __eq__(self, other) -> bool:
        if not isinstance(other, type(self)):
            return False
        return self._series.equals(other._series)

    def clear(self) -> None:
        """Mandatory MutableMapping method, always fails with NotImplementedError

        The reason is that we cannot delete all nested fields from the nested series.
        """
        raise NotImplementedError("Cannot delete fields from nested series")


[docs]
    def to_flatten_inner(self, field: str) -> NestedSeries:
        """Explode the nested inner field and return as a NestedSeries

        Works for the case of multiple nesting only, the field must represent
        a nested series.

        Each row of this Series is changed in the following way:

        1. Each nested item in the given field is converted to a "flat" frame.
           If a nested item contains fields that are also nested, those are
           brought up as their own nested structures in the resulting "flat"
           frame.
        2. All items of other fields are repeated as many times as that frame
           length.

        It has the same effect as doing
        `nested_df.drop(field, axis=1).join(nested_df[field].nest.to_flat())`
        for each nested element of the Series.

        Parameters
        ----------
        field : str
            Inner field, must have NestedDtype.

        Returns
        -------
        NestedSeries
            This series object, but with the inner field exploded.

        Examples
        --------

        >>> from nested_pandas import NestedFrame
        >>> from nested_pandas.datasets import generate_data
        >>> nf = generate_data(5, 2, seed=1).rename(columns={"nested": "inner"})
        >>> nf["b"] = "b"  # Shorten width of example output

        Assign a repeated ID to double-nest on

        >>> nf["id"] = [0, 0, 0, 1, 1]
        >>> nf
                  a  b                                              inner  id
        0  0.417022  b  [{t: 8.38389, flux: 80.074457, flux_error: 1.0...   0
        1  0.720324  b  [{t: 13.70439, flux: 96.826158, flux_error: 1....   0
        2  0.000114  b  [{t: 4.089045, flux: 31.342418, flux_error: 1....   0
        3  0.302333  b  [{t: 17.562349, flux: 69.232262, flux_error: 1...   1
        4  0.146756  b  [{t: 0.547752, flux: 87.638915, flux_error: 1....   1

        >>> nf.inner.nest.to_flat()
                   t       flux  flux_error band
        0    8.38389  80.074457         1.0    r
        0   13.40935  89.460666         1.0    g
        1   13.70439  96.826158         1.0    g
        1   8.346096   8.504421         1.0    g
        2   4.089045  31.342418         1.0    g
        2  11.173797   3.905478         1.0    g
        3  17.562349  69.232262         1.0    r
        3   2.807739  16.983042         1.0    r
        4   0.547752  87.638915         1.0    g
        4    3.96203   87.81425         1.0    r

        Create a dataframe with double-nested column "outer"

        >>> dnf = NestedFrame.from_flat(nf, base_columns=[], on="id", name="outer")

        Flat "inner" nested column.
        This is like "concatenation" of the initial nf frame on duplicated `id` rows

        >>> concated_nf_series = dnf["outer"].nest.to_flatten_inner("inner")
        >>> concated_nf_series  # doctest: +NORMALIZE_WHITESPACE
        id
        0    [{a: 0.417022, b: 'b', t: 8.38389, flux: 80.07...
        1    [{a: 0.302333, b: 'b', t: 17.562349, flux: 69....
        Name: outer, dtype: nested<a: [double], b: [string], t: [double],
        flux: [double], flux_error: [double], band: [string]>

        >>> concated_nf_series.nest.to_flat()  # doctest: +NORMALIZE_WHITESPACE
                   a  b          t       flux  flux_error band
        id
        0   0.417022  b    8.38389  80.074457         1.0    r
        0   0.417022  b   13.40935  89.460666         1.0    g
        0   0.720324  b   13.70439  96.826158         1.0    g
        0   0.720324  b   8.346096   8.504421         1.0    g
        0   0.000114  b   4.089045  31.342418         1.0    g
        0   0.000114  b  11.173797   3.905478         1.0    g
        1   0.302333  b  17.562349  69.232262         1.0    r
        1   0.302333  b   2.807739  16.983042         1.0    r
        1   0.146756  b   0.547752  87.638915         1.0    g
        1   0.146756  b    3.96203   87.81425         1.0    r
        """
        if not isinstance(self._series.dtype.column_dtype(field), NestedDtype):
            raise ValueError(
                f"Field '{field}' dtype must be NestedDtype, got '{self._series.dtype.column_dtype(field)}'"
            )

        # Copy series and make an "ordinal" index
        series = self._series.reset_index(drop=True)

        # Flat the array and set a multiindex.
        # "outer" is the ordinal index over the original "top"-level series.
        # "inner" is the ordinal index over the flatten series, e.g., over the first-level nested rows.
        # "inner" has more unique values than "outer".
        # The total number of double-nested rows is larger than "inner".
        series_flatten = series.nest.to_flat()
        series_flatten = series_flatten.set_index(
            [
                pd.Index(series_flatten.index, name="outer"),
                pd.RangeIndex(len(series_flatten), name="inner"),
            ]
        )

        # Use "inner" ordinal index for the join and drop it
        field_flatten = series_flatten[field].nest.to_flat().reset_index("outer", drop=True)
        inner_flatten = series_flatten.drop(field, axis=1).join(field_flatten, on="inner")
        inner_flatten = inner_flatten.reset_index("inner", drop=True)

        # Assign back the "outer" ordinal index and pack on it
        result = pack_flat(inner_flatten, name=self._series.name)

        # Some indexes may be missed if the original series had some NULLs
        if len(result) < len(series):
            nulls = NestedSeries(None, index=series.index, dtype=result.dtype)
            nulls[result.index] = result
            result = nulls

        # And put back the original index
        result.index = self._series.index
        return result