/pandas/core/series.py
Python | 5558 lines | 5296 code | 76 blank | 186 comment | 92 complexity | ea8dc81a6f75149e2835496ceafedefc MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- """
- Data structure for 1-dimensional cross-sectional and time series data
- """
- from __future__ import annotations
- from textwrap import dedent
- from typing import (
- IO,
- TYPE_CHECKING,
- Any,
- Callable,
- Hashable,
- Iterable,
- Literal,
- Sequence,
- Union,
- cast,
- overload,
- )
- import warnings
- import weakref
- import numpy as np
- from pandas._config import get_option
- from pandas._libs import (
- lib,
- properties,
- reshape,
- tslibs,
- )
- from pandas._libs.lib import no_default
- from pandas._typing import (
- AggFuncType,
- ArrayLike,
- Axis,
- Dtype,
- DtypeObj,
- FillnaOptions,
- IndexKeyFunc,
- SingleManager,
- StorageOptions,
- TimedeltaConvertibleTypes,
- TimestampConvertibleTypes,
- ValueKeyFunc,
- npt,
- )
- from pandas.compat.numpy import function as nv
- from pandas.errors import InvalidIndexError
- from pandas.util._decorators import (
- Appender,
- Substitution,
- deprecate_nonkeyword_arguments,
- doc,
- )
- from pandas.util._validators import (
- validate_ascending,
- validate_bool_kwarg,
- validate_percentile,
- )
- from pandas.core.dtypes.cast import (
- convert_dtypes,
- maybe_box_native,
- maybe_cast_pointwise_result,
- validate_numeric_casting,
- )
- from pandas.core.dtypes.common import (
- ensure_platform_int,
- is_dict_like,
- is_integer,
- is_iterator,
- is_list_like,
- is_object_dtype,
- is_scalar,
- pandas_dtype,
- validate_all_hashable,
- )
- from pandas.core.dtypes.generic import ABCDataFrame
- from pandas.core.dtypes.inference import is_hashable
- from pandas.core.dtypes.missing import (
- isna,
- na_value_for_dtype,
- notna,
- remove_na_arraylike,
- )
- from pandas.core import (
- algorithms,
- base,
- generic,
- missing,
- nanops,
- ops,
- )
- from pandas.core.accessor import CachedAccessor
- from pandas.core.apply import SeriesApply
- from pandas.core.arrays import ExtensionArray
- from pandas.core.arrays.categorical import CategoricalAccessor
- from pandas.core.arrays.sparse import SparseAccessor
- import pandas.core.common as com
- from pandas.core.construction import (
- create_series_with_explicit_dtype,
- extract_array,
- is_empty_data,
- sanitize_array,
- )
- from pandas.core.generic import NDFrame
- from pandas.core.indexers import (
- deprecate_ndim_indexing,
- unpack_1tuple,
- )
- from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
- from pandas.core.indexes.api import (
- CategoricalIndex,
- DatetimeIndex,
- Float64Index,
- Index,
- MultiIndex,
- PeriodIndex,
- TimedeltaIndex,
- default_index,
- ensure_index,
- )
- import pandas.core.indexes.base as ibase
- from pandas.core.indexing import check_bool_indexer
- from pandas.core.internals import (
- SingleArrayManager,
- SingleBlockManager,
- )
- from pandas.core.shared_docs import _shared_docs
- from pandas.core.sorting import (
- ensure_key_mapped,
- nargsort,
- )
- from pandas.core.strings import StringMethods
- from pandas.core.tools.datetimes import to_datetime
- import pandas.io.formats.format as fmt
- import pandas.plotting
- if TYPE_CHECKING:
- from pandas._typing import (
- NumpySorter,
- NumpyValueArrayLike,
- )
- from pandas.core.frame import DataFrame
- from pandas.core.groupby.generic import SeriesGroupBy
- from pandas.core.resample import Resampler
- __all__ = ["Series"]
- _shared_doc_kwargs = {
- "axes": "index",
- "klass": "Series",
- "axes_single_arg": "{0 or 'index'}",
- "axis": """axis : {0 or 'index'}
- Parameter needed for compatibility with DataFrame.""",
- "inplace": """inplace : bool, default False
- If True, performs operation inplace and returns None.""",
- "unique": "np.ndarray",
- "duplicated": "Series",
- "optional_by": "",
- "optional_mapper": "",
- "optional_labels": "",
- "optional_axis": "",
- "replace_iloc": """
- This differs from updating with ``.loc`` or ``.iloc``, which require
- you to specify a location to update with some value.""",
- }
- def _coerce_method(converter):
- """
- Install the scalar coercion methods.
- """
- def wrapper(self):
- if len(self) == 1:
- return converter(self.iloc[0])
- raise TypeError(f"cannot convert the series to {converter}")
- wrapper.__name__ = f"__{converter.__name__}__"
- return wrapper
- # ----------------------------------------------------------------------
- # Series class
- class Series(base.IndexOpsMixin, generic.NDFrame):
- """
- One-dimensional ndarray with axis labels (including time series).
- Labels need not be unique but must be a hashable type. The object
- supports both integer- and label-based indexing and provides a host of
- methods for performing operations involving the index. Statistical
- methods from ndarray have been overridden to automatically exclude
- missing data (currently represented as NaN).
- Operations between Series (+, -, /, \\*, \\*\\*) align values based on their
- associated index values-- they need not be the same length. The result
- index will be the sorted union of the two indexes.
- Parameters
- ----------
- data : array-like, Iterable, dict, or scalar value
- Contains data stored in Series. If data is a dict, argument order is
- maintained.
- index : array-like or Index (1d)
- Values must be hashable and have the same length as `data`.
- Non-unique index values are allowed. Will default to
- RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like
- and index is None, then the keys in the data are used as the index. If the
- index is not None, the resulting Series is reindexed with the index values.
- dtype : str, numpy.dtype, or ExtensionDtype, optional
- Data type for the output Series. If not specified, this will be
- inferred from `data`.
- See the :ref:`user guide <basics.dtypes>` for more usages.
- name : str, optional
- The name to give to the Series.
- copy : bool, default False
- Copy input data. Only affects Series or 1d ndarray input. See examples.
- Examples
- --------
- Constructing Series from a dictionary with an Index specified
- >>> d = {'a': 1, 'b': 2, 'c': 3}
- >>> ser = pd.Series(data=d, index=['a', 'b', 'c'])
- >>> ser
- a 1
- b 2
- c 3
- dtype: int64
- The keys of the dictionary match with the Index values, hence the Index
- values have no effect.
- >>> d = {'a': 1, 'b': 2, 'c': 3}
- >>> ser = pd.Series(data=d, index=['x', 'y', 'z'])
- >>> ser
- x NaN
- y NaN
- z NaN
- dtype: float64
- Note that the Index is first build with the keys from the dictionary.
- After this the Series is reindexed with the given Index values, hence we
- get all NaN as a result.
- Constructing Series from a list with `copy=False`.
- >>> r = [1, 2]
- >>> ser = pd.Series(r, copy=False)
- >>> ser.iloc[0] = 999
- >>> r
- [1, 2]
- >>> ser
- 0 999
- 1 2
- dtype: int64
- Due to input data type the Series has a `copy` of
- the original data even though `copy=False`, so
- the data is unchanged.
- Constructing Series from a 1d ndarray with `copy=False`.
- >>> r = np.array([1, 2])
- >>> ser = pd.Series(r, copy=False)
- >>> ser.iloc[0] = 999
- >>> r
- array([999, 2])
- >>> ser
- 0 999
- 1 2
- dtype: int64
- Due to input data type the Series has a `view` on
- the original data, so
- the data is changed as well.
- """
- _typ = "series"
- _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
- _name: Hashable
- _metadata: list[str] = ["name"]
- _internal_names_set = {"index"} | generic.NDFrame._internal_names_set
- _accessors = {"dt", "cat", "str", "sparse"}
- _hidden_attrs = (
- base.IndexOpsMixin._hidden_attrs
- | generic.NDFrame._hidden_attrs
- | frozenset(["compress", "ptp"])
- )
- # Override cache_readonly bc Series is mutable
- # error: Incompatible types in assignment (expression has type "property",
- # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]")
- hasnans = property( # type: ignore[assignment]
- # error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget"
- base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined]
- doc=base.IndexOpsMixin.hasnans.__doc__,
- )
- _mgr: SingleManager
- div: Callable[[Series, Any], Series]
- rdiv: Callable[[Series, Any], Series]
- # ----------------------------------------------------------------------
- # Constructors
- def __init__(
- self,
- data=None,
- index=None,
- dtype: Dtype | None = None,
- name=None,
- copy: bool = False,
- fastpath: bool = False,
- ):
- if (
- isinstance(data, (SingleBlockManager, SingleArrayManager))
- and index is None
- and dtype is None
- and copy is False
- ):
- # GH#33357 called with just the SingleBlockManager
- NDFrame.__init__(self, data)
- self.name = name
- return
- # we are called internally, so short-circuit
- if fastpath:
- # data is an ndarray, index is defined
- if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
- manager = get_option("mode.data_manager")
- if manager == "block":
- data = SingleBlockManager.from_array(data, index)
- elif manager == "array":
- data = SingleArrayManager.from_array(data, index)
- if copy:
- data = data.copy()
- if index is None:
- index = data.index
- else:
- name = ibase.maybe_extract_name(name, data, type(self))
- if is_empty_data(data) and dtype is None:
- # gh-17261
- warnings.warn(
- "The default dtype for empty Series will be 'object' instead "
- "of 'float64' in a future version. Specify a dtype explicitly "
- "to silence this warning.",
- FutureWarning,
- stacklevel=2,
- )
- # uncomment the line below when removing the FutureWarning
- # dtype = np.dtype(object)
- if index is not None:
- index = ensure_index(index)
- if data is None:
- data = {}
- if dtype is not None:
- dtype = self._validate_dtype(dtype)
- if isinstance(data, MultiIndex):
- raise NotImplementedError(
- "initializing a Series from a MultiIndex is not supported"
- )
- elif isinstance(data, Index):
- if dtype is not None:
- # astype copies
- data = data.astype(dtype)
- else:
- # GH#24096 we need to ensure the index remains immutable
- data = data._values.copy()
- copy = False
- elif isinstance(data, np.ndarray):
- if len(data.dtype):
- # GH#13296 we are dealing with a compound dtype, which
- # should be treated as 2D
- raise ValueError(
- "Cannot construct a Series from an ndarray with "
- "compound dtype. Use DataFrame instead."
- )
- elif isinstance(data, Series):
- if index is None:
- index = data.index
- else:
- data = data.reindex(index, copy=copy)
- copy = False
- data = data._mgr
- elif is_dict_like(data):
- data, index = self._init_dict(data, index, dtype)
- dtype = None
- copy = False
- elif isinstance(data, (SingleBlockManager, SingleArrayManager)):
- if index is None:
- index = data.index
- elif not data.index.equals(index) or copy:
- # GH#19275 SingleBlockManager input should only be called
- # internally
- raise AssertionError(
- "Cannot pass both SingleBlockManager "
- "`data` argument and a different "
- "`index` argument. `copy` must be False."
- )
- elif isinstance(data, ExtensionArray):
- pass
- else:
- data = com.maybe_iterable_to_list(data)
- if index is None:
- if not is_list_like(data):
- data = [data]
- index = default_index(len(data))
- elif is_list_like(data):
- com.require_length_match(data, index)
- # create/copy the manager
- if isinstance(data, (SingleBlockManager, SingleArrayManager)):
- if dtype is not None:
- data = data.astype(dtype=dtype, errors="ignore", copy=copy)
- elif copy:
- data = data.copy()
- else:
- data = sanitize_array(data, index, dtype, copy)
- manager = get_option("mode.data_manager")
- if manager == "block":
- data = SingleBlockManager.from_array(data, index)
- elif manager == "array":
- data = SingleArrayManager.from_array(data, index)
- generic.NDFrame.__init__(self, data)
- self.name = name
- self._set_axis(0, index, fastpath=True)
- def _init_dict(
- self, data, index: Index | None = None, dtype: DtypeObj | None = None
- ):
- """
- Derive the "_mgr" and "index" attributes of a new Series from a
- dictionary input.
- Parameters
- ----------
- data : dict or dict-like
- Data used to populate the new Series.
- index : Index or None, default None
- Index for the new Series: if None, use dict keys.
- dtype : np.dtype, ExtensionDtype, or None, default None
- The dtype for the new Series: if None, infer from data.
- Returns
- -------
- _data : BlockManager for the new Series
- index : index for the new Series
- """
- keys: Index | tuple
- # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
- # raises KeyError), so we iterate the entire dict, and align
- if data:
- # GH:34717, issue was using zip to extract key and values from data.
- # using generators in effects the performance.
- # Below is the new way of extracting the keys and values
- keys = tuple(data.keys())
- values = list(data.values()) # Generating list of values- faster way
- elif index is not None:
- # fastpath for Series(data=None). Just use broadcasting a scalar
- # instead of reindexing.
- values = na_value_for_dtype(pandas_dtype(dtype), compat=False)
- keys = index
- else:
- keys, values = (), []
- # Input is now list-like, so rely on "standard" construction:
- # TODO: passing np.float64 to not break anything yet. See GH-17261
- s = create_series_with_explicit_dtype(
- # error: Argument "index" to "create_series_with_explicit_dtype" has
- # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
- # ndarray, Index, None]"
- values,
- index=keys, # type: ignore[arg-type]
- dtype=dtype,
- dtype_if_empty=np.float64,
- )
- # Now we just make sure the order is respected, if any
- if data and index is not None:
- s = s.reindex(index, copy=False)
- return s._mgr, s.index
- # ----------------------------------------------------------------------
- @property
- def _constructor(self) -> type[Series]:
- return Series
- @property
- def _constructor_expanddim(self) -> type[DataFrame]:
- """
- Used when a manipulation result has one higher dimension as the
- original, such as Series.to_frame()
- """
- from pandas.core.frame import DataFrame
- return DataFrame
- # types
- @property
- def _can_hold_na(self) -> bool:
- return self._mgr._can_hold_na
- def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
- """
- Override generic, we want to set the _typ here.
- This is called from the cython code when we set the `index` attribute
- directly, e.g. `series.index = [1, 2, 3]`.
- """
- if not fastpath:
- labels = ensure_index(labels)
- if labels._is_all_dates:
- deep_labels = labels
- if isinstance(labels, CategoricalIndex):
- deep_labels = labels.categories
- if not isinstance(
- deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
- ):
- try:
- labels = DatetimeIndex(labels)
- # need to set here because we changed the index
- if fastpath:
- self._mgr.set_axis(axis, labels)
- except (tslibs.OutOfBoundsDatetime, ValueError):
- # labels may exceeds datetime bounds,
- # or not be a DatetimeIndex
- pass
- if not fastpath:
- # The ensure_index call above ensures we have an Index object
- self._mgr.set_axis(axis, labels)
- # ndarray compatibility
- @property
- def dtype(self) -> DtypeObj:
- """
- Return the dtype object of the underlying data.
- """
- return self._mgr.dtype
- @property
- def dtypes(self) -> DtypeObj:
- """
- Return the dtype object of the underlying data.
- """
- # DataFrame compatibility
- return self.dtype
- @property
- def name(self) -> Hashable:
- """
- Return the name of the Series.
- The name of a Series becomes its index or column name if it is used
- to form a DataFrame. It is also used whenever displaying the Series
- using the interpreter.
- Returns
- -------
- label (hashable object)
- The name of the Series, also the column name if part of a DataFrame.
- See Also
- --------
- Series.rename : Sets the Series name when given a scalar input.
- Index.name : Corresponding Index property.
- Examples
- --------
- The Series name can be set initially when calling the constructor.
- >>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers')
- >>> s
- 0 1
- 1 2
- 2 3
- Name: Numbers, dtype: int64
- >>> s.name = "Integers"
- >>> s
- 0 1
- 1 2
- 2 3
- Name: Integers, dtype: int64
- The name of a Series within a DataFrame is its column name.
- >>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]],
- ... columns=["Odd Numbers", "Even Numbers"])
- >>> df
- Odd Numbers Even Numbers
- 0 1 2
- 1 3 4
- 2 5 6
- >>> df["Even Numbers"].name
- 'Even Numbers'
- """
- return self._name
- @name.setter
- def name(self, value: Hashable) -> None:
- validate_all_hashable(value, error_name=f"{type(self).__name__}.name")
- object.__setattr__(self, "_name", value)
- @property
- def values(self):
- """
- Return Series as ndarray or ndarray-like depending on the dtype.
- .. warning::
- We recommend using :attr:`Series.array` or
- :meth:`Series.to_numpy`, depending on whether you need
- a reference to the underlying data or a NumPy array.
- Returns
- -------
- numpy.ndarray or ndarray-like
- See Also
- --------
- Series.array : Reference to the underlying data.
- Series.to_numpy : A NumPy array representing the underlying data.
- Examples
- --------
- >>> pd.Series([1, 2, 3]).values
- array([1, 2, 3])
- >>> pd.Series(list('aabc')).values
- array(['a', 'a', 'b', 'c'], dtype=object)
- >>> pd.Series(list('aabc')).astype('category').values
- ['a', 'a', 'b', 'c']
- Categories (3, object): ['a', 'b', 'c']
- Timezone aware datetime data is converted to UTC:
- >>> pd.Series(pd.date_range('20130101', periods=3,
- ... tz='US/Eastern')).values
- array(['2013-01-01T05:00:00.000000000',
- '2013-01-02T05:00:00.000000000',
- '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
- """
- return self._mgr.external_values()
- @property
- def _values(self):
- """
- Return the internal repr of this data (defined by Block.interval_values).
- This are the values as stored in the Block (ndarray or ExtensionArray
- depending on the Block class), with datetime64[ns] and timedelta64[ns]
- wrapped in ExtensionArrays to match Index._values behavior.
- Differs from the public ``.values`` for certain data types, because of
- historical backwards compatibility of the public attribute (e.g. period
- returns object ndarray and datetimetz a datetime64[ns] ndarray for
- ``.values`` while it returns an ExtensionArray for ``._values`` in those
- cases).
- Differs from ``.array`` in that this still returns the numpy array if
- the Block is backed by a numpy array (except for datetime64 and
- timedelta64 dtypes), while ``.array`` ensures to always return an
- ExtensionArray.
- Overview:
- dtype | values | _values | array |
- ----------- | ------------- | ------------- | ------------- |
- Numeric | ndarray | ndarray | PandasArray |
- Category | Categorical | Categorical | Categorical |
- dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
- dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
- td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
- Period | ndarray[obj] | PeriodArray | PeriodArray |
- Nullable | EA | EA | EA |
- """
- return self._mgr.internal_values()
- # error: Decorated property not supported
- @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc]
- @property
- def array(self) -> ExtensionArray:
- return self._mgr.array_values()
- # ops
- def ravel(self, order="C"):
- """
- Return the flattened underlying data as an ndarray.
- Returns
- -------
- numpy.ndarray or ndarray-like
- Flattened data of the Series.
- See Also
- --------
- numpy.ndarray.ravel : Return a flattened array.
- """
- return self._values.ravel(order=order)
- def __len__(self) -> int:
- """
- Return the length of the Series.
- """
- return len(self._mgr)
- def view(self, dtype: Dtype | None = None) -> Series:
- """
- Create a new view of the Series.
- This function will return a new Series with a view of the same
- underlying values in memory, optionally reinterpreted with a new data
- type. The new data type must preserve the same size in bytes as to not
- cause index misalignment.
- Parameters
- ----------
- dtype : data type
- Data type object or one of their string representations.
- Returns
- -------
- Series
- A new Series object as a view of the same data in memory.
- See Also
- --------
- numpy.ndarray.view : Equivalent numpy function to create a new view of
- the same data in memory.
- Notes
- -----
- Series are instantiated with ``dtype=float64`` by default. While
- ``numpy.ndarray.view()`` will return a view with the same data type as
- the original array, ``Series.view()`` (without specified dtype)
- will try using ``float64`` and may fail if the original data type size
- in bytes is not the same.
- Examples
- --------
- >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
- >>> s
- 0 -2
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- The 8 bit signed integer representation of `-1` is `0b11111111`, but
- the same bytes represent 255 if read as an 8 bit unsigned integer:
- >>> us = s.view('uint8')
- >>> us
- 0 254
- 1 255
- 2 0
- 3 1
- 4 2
- dtype: uint8
- The views share the same underlying values:
- >>> us[0] = 128
- >>> s
- 0 -128
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- """
- return self._constructor(
- self._values.view(dtype), index=self.index
- ).__finalize__(self, method="view")
- # ----------------------------------------------------------------------
- # NDArray Compat
- _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
- def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
- """
- Return the values as a NumPy array.
- Users should not call this directly. Rather, it is invoked by
- :func:`numpy.array` and :func:`numpy.asarray`.
- Parameters
- ----------
- dtype : str or numpy.dtype, optional
- The dtype to use for the resulting NumPy array. By default,
- the dtype is inferred from the data.
- Returns
- -------
- numpy.ndarray
- The values in the series converted to a :class:`numpy.ndarray`
- with the specified `dtype`.
- See Also
- --------
- array : Create a new array from data.
- Series.array : Zero-copy view to the array backing the Series.
- Series.to_numpy : Series method for similar behavior.
- Examples
- --------
- >>> ser = pd.Series([1, 2, 3])
- >>> np.asarray(ser)
- array([1, 2, 3])
- For timezone-aware data, the timezones may be retained with
- ``dtype='object'``
- >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
- >>> np.asarray(tzser, dtype="object")
- array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
- Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
- dtype=object)
- Or the values may be localized to UTC and the tzinfo discarded with
- ``dtype='datetime64[ns]'``
- >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
- array(['1999-12-31T23:00:00.000000000', ...],
- dtype='datetime64[ns]')
- """
- return np.asarray(self._values, dtype)
- # ----------------------------------------------------------------------
- # Unary Methods
- # coercion
- __float__ = _coerce_method(float)
- __long__ = _coerce_method(int)
- __int__ = _coerce_method(int)
- # ----------------------------------------------------------------------
- # indexers
- @property
- def axes(self) -> list[Index]:
- """
- Return a list of the row axis labels.
- """
- return [self.index]
- # ----------------------------------------------------------------------
- # Indexing Methods
- @Appender(generic.NDFrame.take.__doc__)
- def take(self, indices, axis=0, is_copy=None, **kwargs) -> Series:
- if is_copy is not None:
- warnings.warn(
- "is_copy is deprecated and will be removed in a future version. "
- "'take' always returns a copy, so there is no need to specify this.",
- FutureWarning,
- stacklevel=2,
- )
- nv.validate_take((), kwargs)
- indices = ensure_platform_int(indices)
- new_index = self.index.take(indices)
- new_values = self._values.take(indices)
- result = self._constructor(new_values, index=new_index, fastpath=True)
- return result.__finalize__(self, method="take")
- def _take_with_is_copy(self, indices, axis=0) -> Series:
- """
- Internal version of the `take` method that sets the `_is_copy`
- attribute to keep track of the parent dataframe (using in indexing
- for the SettingWithCopyWarning). For Series this does the same
- as the public take (it never sets `_is_copy`).
- See the docstring of `take` for full explanation of the parameters.
- """
- return self.take(indices=indices, axis=axis)
- def _ixs(self, i: int, axis: int = 0):
- """
- Return the i-th value or values in the Series by location.
- Parameters
- ----------
- i : int
- Returns
- -------
- scalar (int) or Series (slice, sequence)
- """
- return self._values[i]
- def _slice(self, slobj: slice, axis: int = 0) -> Series:
- # axis kwarg is retained for compat with NDFrame method
- # _slice is *always* positional
- return self._get_values(slobj)
- def __getitem__(self, key):
- key = com.apply_if_callable(key, self)
- if key is Ellipsis:
- return self
- key_is_scalar = is_scalar(key)
- if isinstance(key, (list, tuple)):
- key = unpack_1tuple(key)
- if is_integer(key) and self.index._should_fallback_to_positional:
- return self._values[key]
- elif key_is_scalar:
- return self._get_value(key)
- if is_hashable(key):
- # Otherwise index.get_value will raise InvalidIndexError
- try:
- # For labels that don't resolve as scalars like tuples and frozensets
- result = self._get_value(key)
- return result
- except (KeyError, TypeError):
- if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
- # We still have the corner case where a tuple is a key
- # in the first level of our MultiIndex
- return self._get_values_tuple(key)
- if is_iterator(key):
- key = list(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- key = np.asarray(key, dtype=bool)
- return self._get_values(key)
- return self._get_with(key)
- def _get_with(self, key):
- # other: fancy integer or otherwise
- if isinstance(key, slice):
- # _convert_slice_indexer to determine if this slice is positional
- # or label based, and if the latter, convert to positional
- slobj = self.index._convert_slice_indexer(key, kind="getitem")
- return self._slice(slobj)
- elif isinstance(key, ABCDataFrame):
- raise TypeError(
- "Indexing a Series with DataFrame is not "
- "supported, use the appropriate DataFrame column"
- )
- elif isinstance(key, tuple):
- return self._get_values_tuple(key)
- elif not is_list_like(key):
- # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684
- return self.loc[key]
- if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
- key = list(key)
- if isinstance(key, Index):
- key_type = key.inferred_type
- else:
- key_type = lib.infer_dtype(key, skipna=False)
- # Note: The key_type == "boolean" case should be caught by the
- # com.is_bool_indexer check in __getitem__
- if key_type == "integer":
- # We need to decide whether to treat this as a positional indexer
- # (i.e. self.iloc) or label-based (i.e. self.loc)
- if not self.index._should_fallback_to_positional:
- return self.loc[key]
- else:
- return self.iloc[key]
- # handle the dup indexing case GH#4246
- return self.loc[key]
- def _get_values_tuple(self, key):
- # mpl hackaround
- if com.any_none(*key):
- result = self._get_values(key)
- deprecate_ndim_indexing(result, stacklevel=5)
- return result
- if not isinstance(self.index, MultiIndex):
- raise KeyError("key of type tuple not found and not a MultiIndex")
- # If key is contained, would have returned by now
- indexer, new_index = self.index.get_loc_level(key)
- return self._constructor(self._values[indexer], index=new_index).__finalize__(
- self
- )
- def _get_values(self, indexer):
- try:
- new_mgr = self._mgr.getitem_mgr(indexer)
- return self._constructor(new_mgr).__finalize__(self)
- except ValueError:
- # mpl compat if we look up e.g. ser[:, np.newaxis];
- # see tests.series.timeseries.test_mpl_compat_hack
- # the asarray is needed to avoid returning a 2D DatetimeArray
- return np.asarray(self._values[indexer])
- def _get_value(self, label, takeable: bool = False):
- """
- Quickly retrieve single value at passed index label.
- Parameters
- ----------
- label : object
- takeable : interpret the index as indexers, default False
- Returns
- -------
- scalar value
- """
- if takeable:
- return self._values[label]
- # Similar to Index.get_value, but we do not fall back to positional
- loc = self.index.get_loc(label)
- return self.index._get_values_for_loc(self, loc, label)
- def __setitem__(self, key, value) -> None:
- key = com.apply_if_callable(key, self)
- cacher_needs_updating = self._check_is_chained_assignment_possible()
- if key is Ellipsis:
- key = slice(None)
- if isinstance(key, slice):
- indexer = self.index._convert_slice_indexer(key, kind="getitem")
- return self._set_values(indexer, value)
- try:
- self._set_with_engine(key, value)
- except (KeyError, ValueError):
- if is_integer(key) and self.index.inferred_type != "integer":
- # positional setter
- if not self.index._should_fallback_to_positional:
- # GH#33469
- warnings.warn(
- "Treating integers as positional in Series.__setitem__ "
- "with a Float64Index is deprecated. In a future version, "
- "`series[an_int] = val` will insert a new key into the "
- "Series. Use `series.iloc[an_int] = val` to treat the "
- "key as positional.",
- FutureWarning,
- stacklevel=2,
- )
- # this is equivalent to self._values[key] = value
- self._mgr.setitem_inplace(key, value)
- else:
- # GH#12862 adding a new key to the Series
- self.loc[key] = value
- except (InvalidIndexError, TypeError) as err:
- if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
- # cases with MultiIndex don't get here bc they raise KeyError
- raise KeyError(
- "key of type tuple not found and not a MultiIndex"
- ) from err
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- key = np.asarray(key, dtype=bool)
- if (
- is_list_like(value)
- and len(value) != len(self)
- and not isinstance(value, Series)
- and not is_object_dtype(self.dtype)
- ):
- # Series will be reindexed to have matching length inside
- # _where call below
- # GH#44265
- indexer = key.nonzero()[0]
- self._set_values(indexer, value)
- return
- # otherwise with listlike other we interpret series[mask] = other
- # as series[mask] = other[mask]
- try:
- self._where(~key, value, inplace=True)
- except InvalidIndexError:
- # test_where_dups
- self.iloc[key] = value
- return
- else:
- self._set_with(key, value)
- if cacher_needs_updating:
- self._maybe_update_cacher()
- def _set_with_engine(self, key, value) -> None:
- loc = self.index.get_loc(key)
- # error: Argument 1 to "validate_numeric_casting" has incompatible type
- # "Union[dtype, ExtensionDtype]"; expected "dtype"
- validate_numeric_casting(self.dtype, value) # type: ignore[arg-type]
- # this is equivalent to self._values[key] = value
- self._mgr.setitem_inplace(loc, value)
- def _set_with(self, key, value):
- # other: fancy integer or otherwise
- assert not isinstance(key, tuple)
- if is_scalar(key):
- key = [key]
- elif is_iterator(key):
- # Without this, the call to infer_dtype will consume the generator
- key = list(key)
- key_type = lib.infer_dtype(key, skipna=False)
- # Note: key_type == "boolean" should not occur because that
- # should be caught by the is_bool_indexer check in __setitem__
- if key_type == "integer":
- if not self.index._should_fallback_to_positional:
- self._set_labels(key, value)
- else:
- self._set_values(key, value)
- else:
- self.loc[key] = value
- def _set_labels(self, key, value) -> None:
- key = com.asarray_tuplesafe(key)
- indexer: np.ndarray = self.index.get_indexer(key)
- mask = indexer == -1
- if mask.any():
- raise KeyError(f"{key[mask]} not in index")
- self._set_values(indexer, value)
- def _set_values(self, key, value) -> None:
- if isinstance(key, (Index, Series)):
- key = key._values
- self._mgr = self._mgr.setitem(indexer=key, value=value)
- self._maybe_update_cacher()
- def _set_value(self, label, value, takeable: bool = False):
- """
- Quickly set single value at passed label.
- If label is not contained, a new object is created with the label
- placed at the end of the result index.
- Parameters
- ----------
- label : object
- Partial indexing with MultiIndex not allowed.
- value : object
- Scalar value.
- takeable : interpret the index as indexers, default False
- """
- if not takeable:
- try:
- loc = self.index.get_loc(label)
- except KeyError:
- # set using a non-recursive method
- self.loc[label] = value
- return
- else:
- loc = label
- self._set_values(loc, value)
- # ----------------------------------------------------------------------
- # Lookup Caching
- @property
- def _is_cached(self) -> bool:
- """Return boolean indicating if self is cached or not."""
- return getattr(self, "_cacher", None) is not None
- def _get_cacher(self):
- """return my cacher or None"""
- cacher = getattr(self, "_cacher", None)
- if cacher is not None:
- cacher = cacher[1]()
- return cacher
- def _reset_cacher(self) -> None:
- """
- Reset the cacher.
- """
- if hasattr(self, "_cacher"):
- # should only get here with self.ndim == 1
- del self._cacher
- def _set_as_cached(self, item, cacher) -> None:
- """
- Set the _cacher attribute on the calling object with a weakref to
- cacher.
- """
- self._cacher = (item, weakref.ref(cacher))
- def _clear_item_cache(self) -> None:
- # no-op for Series
- pass
- def _check_is_chained_assignment_possible(self) -> bool:
- """
- See NDFrame._check_is_chained_assignment_possible.__doc__
- """
- if self._is_view and self._is_cached:
- ref = self._get_cacher()
- if ref is not None and ref._is_mixed_type:
- self._check_setitem_copy(t="referent", force=True)
- return True
- return super()._check_is_chained_assignment_possible()
- def _maybe_update_cacher(
- self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False
- ) -> None:
- """
- See NDFrame._maybe_update_cacher.__doc__
- """
- cacher = getattr(self, "_cacher", None)
- if cacher is not None:
- assert self.ndim == 1
- ref: DataFrame = cacher[1]()
- # we are trying to reference a dead referent, hence
- # a copy
- if ref is None:
- del self._cacher
- elif len(self) == len(ref) and self.name in ref.columns:
- # GH#42530 self.name must be in ref.columns
- # to ensure column still in dataframe
- # otherwise, either self or ref has swapped in new arrays
- ref._maybe_cache_changed(cacher[0], self, inplace=inplace)
- else:
- # GH#33675 we have swapped in a new array, so parent
- # reference to self is now invalid
- ref._item_cache.pop(cacher[0], None)
- super()._maybe_update_cacher(
- clear=clear, verify_is_copy=verify_is_copy, inplace=inplace
- )
- # ----------------------------------------------------------------------
- # Unsorted
- @property
- def _is_mixed_type(self):
- return False
- def repeat(self, repeats, axis=None) -> Series:
- """
- Repeat elements of a Series.
- Returns a new Series where each element of the current Series
- is repeated consecutively a given number of times.
- Parameters
- ----------
- repeats : int or array of ints
- The number of repetitions for each element. This should be a
- non-negative integer. Repeating 0 times will return an empty
- Series.
- axis : None
- Must be ``None``. Has no effect but is accepted for compatibility
- with numpy.
- Returns
- -------
- Series
- Newly created Series with repeated elements.
- See Also
- --------
- Index.repeat : Equivalent function for Index.
- numpy.repeat : Similar method for :class:`numpy.ndarray`.
- Examples
- --------
- >>> s = pd.Series(['a', 'b', 'c'])
- >>> s
- 0 a
- 1 b
- 2 c
- dtype: object
- >>> s.repeat(2)
- 0 a
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- dtype: object
- >>> s.repeat([1, 2, 3])
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- 2 c
- dtype: object
- """
- nv.validate_repeat((), {"axis": axis})
- new_index = self.index.repeat(repeats)
- new_values = self._values.repeat(repeats)
- return self._constructor(new_values, index=new_index).__finalize__(
- self, method="repeat"
- )
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"])
- def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False):
- """
- Generate a new DataFrame or Series with the index reset.
- This is useful when the index needs to be treated as a column, or
- when the index is meaningless and needs to be reset to the default
- before another operation.
- Parameters
- ----------
- level : int, str, tuple, or list, default optional
- For a Series with a MultiIndex, only remove the specified levels
- from the index. Removes all levels by default.
- drop : bool, default False
- Just reset the index, without inserting it as a column in
- the new DataFrame.
- name : object, optional
- The name to use for the column containing the original Series
- values. Uses ``self.name`` by default. This argument is ignored
- when `drop` is True.
- inplace : bool, default False
- Modify the Series in place (do not create a new object).
- Returns
- -------
- Series or DataFrame or None
- When `drop` is False (the default), a DataFrame is returned.
- The newly created columns will come first in the DataFrame,
- followed by the original Series values.
- When `drop` is True, a `Series` is returned.
- In either case, if ``inplace=True``, no value is returned.
- See Also
- --------
- DataFrame.reset_index: Analogous function for DataFrame.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4], name='foo',
- ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
- Generate a DataFrame with default index.
- >>> s.reset_index()
- idx foo
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To specify the name of the new column use `name`.
- >>> s.reset_index(name='values')
- idx values
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To generate a new Series with the default set `drop` to True.
- >>> s.reset_index(drop=True)
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- To update the Series in place, without generating a new one
- set `inplace` to True. Note that it also requires ``drop=True``.
- >>> s.reset_index(inplace=True, drop=True)
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- The `level` parameter is interesting for Series with a multi-level
- index.
- >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
- ... np.array(['one', 'two', 'one', 'two'])]
- >>> s2 = pd.Series(
- ... range(4), name='foo',
- ... index=pd.MultiIndex.from_arrays(arrays,
- ... names=['a', 'b']))
- To remove a specific level from the Index, use `level`.
- >>> s2.reset_index(level='a')
- a foo
- b
- one bar 0
- two bar 1
- one baz 2
- two baz 3
- If `level` is not set, all levels are removed from the Index.
- >>> s2.reset_index()
- a b foo
- 0 bar one 0
- 1 bar two 1
- 2 baz one 2
- 3 baz two 3
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- if drop:
- if name is lib.no_default:
- name = self.name
- new_index = default_index(len(self))
- if level is not None:
- if not isinstance(level, (tuple, list)):
- level = [level]
- level = [self.index._get_level_number(lev) for lev in level]
- if len(level) < self.index.nlevels:
- new_index = self.index.droplevel(level)
- if inplace:
- self.index = new_index
- # set name if it was passed, otherwise, keep the previous name
- self.name = name or self.name
- else:
- return self._constructor(
- self._values.copy(), index=new_index
- ).__finalize__(self, method="reset_index")
- elif inplace:
- raise TypeError(
- "Cannot reset_index inplace on a Series to create a DataFrame"
- )
- else:
- if name is lib.no_default:
- # For backwards compatibility, keep columns as [0] instead of
- # [None] when self.name is None
- if self.name is None:
- name = 0
- else:
- name = self.name
- df = self.to_frame(name)
- return df.reset_index(level=level, drop=drop)
- # ----------------------------------------------------------------------
- # Rendering Methods
- def __repr__(self) -> str:
- """
- Return a string representation for a particular Series.
- """
- repr_params = fmt.get_series_repr_params()
- return self.to_string(**repr_params)
- def to_string(
- self,
- buf=None,
- na_rep="NaN",
- float_format=None,
- header=True,
- index=True,
- length=False,
- dtype=False,
- name=False,
- max_rows=None,
- min_rows=None,
- ):
- """
- Render a string representation of the Series.
- Parameters
- ----------
- buf : StringIO-like, optional
- Buffer to write to.
- na_rep : str, optional
- String representation of NaN to use, default 'NaN'.
- float_format : one-parameter function, optional
- Formatter function to apply to columns' elements if they are
- floats, default None.
- header : bool, default True
- Add the Series header (index name).
- index : bool, optional
- Add index (row) labels, default True.
- length : bool, default False
- Add the Series length.
- dtype : bool, default False
- Add the Series dtype.
- name : b…
Large files files are truncated, but you can click here to view the full file