/pandas/core/series.py
Python | 5558 lines | 5296 code | 76 blank | 186 comment | 92 complexity | ea8dc81a6f75149e2835496ceafedefc MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
- """
- Data structure for 1-dimensional cross-sectional and time series data
- """
- from __future__ import annotations
- from textwrap import dedent
- from typing import (
- IO,
- TYPE_CHECKING,
- Any,
- Callable,
- Hashable,
- Iterable,
- Literal,
- Sequence,
- Union,
- cast,
- overload,
- )
- import warnings
- import weakref
- import numpy as np
- from pandas._config import get_option
- from pandas._libs import (
- lib,
- properties,
- reshape,
- tslibs,
- )
- from pandas._libs.lib import no_default
- from pandas._typing import (
- AggFuncType,
- ArrayLike,
- Axis,
- Dtype,
- DtypeObj,
- FillnaOptions,
- IndexKeyFunc,
- SingleManager,
- StorageOptions,
- TimedeltaConvertibleTypes,
- TimestampConvertibleTypes,
- ValueKeyFunc,
- npt,
- )
- from pandas.compat.numpy import function as nv
- from pandas.errors import InvalidIndexError
- from pandas.util._decorators import (
- Appender,
- Substitution,
- deprecate_nonkeyword_arguments,
- doc,
- )
- from pandas.util._validators import (
- validate_ascending,
- validate_bool_kwarg,
- validate_percentile,
- )
- from pandas.core.dtypes.cast import (
- convert_dtypes,
- maybe_box_native,
- maybe_cast_pointwise_result,
- validate_numeric_casting,
- )
- from pandas.core.dtypes.common import (
- ensure_platform_int,
- is_dict_like,
- is_integer,
- is_iterator,
- is_list_like,
- is_object_dtype,
- is_scalar,
- pandas_dtype,
- validate_all_hashable,
- )
- from pandas.core.dtypes.generic import ABCDataFrame
- from pandas.core.dtypes.inference import is_hashable
- from pandas.core.dtypes.missing import (
- isna,
- na_value_for_dtype,
- notna,
- remove_na_arraylike,
- )
- from pandas.core import (
- algorithms,
- base,
- generic,
- missing,
- nanops,
- ops,
- )
- from pandas.core.accessor import CachedAccessor
- from pandas.core.apply import SeriesApply
- from pandas.core.arrays import ExtensionArray
- from pandas.core.arrays.categorical import CategoricalAccessor
- from pandas.core.arrays.sparse import SparseAccessor
- import pandas.core.common as com
- from pandas.core.construction import (
- create_series_with_explicit_dtype,
- extract_array,
- is_empty_data,
- sanitize_array,
- )
- from pandas.core.generic import NDFrame
- from pandas.core.indexers import (
- deprecate_ndim_indexing,
- unpack_1tuple,
- )
- from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
- from pandas.core.indexes.api import (
- CategoricalIndex,
- DatetimeIndex,
- Float64Index,
- Index,
- MultiIndex,
- PeriodIndex,
- TimedeltaIndex,
- default_index,
- ensure_index,
- )
- import pandas.core.indexes.base as ibase
- from pandas.core.indexing import check_bool_indexer
- from pandas.core.internals import (
- SingleArrayManager,
- SingleBlockManager,
- )
- from pandas.core.shared_docs import _shared_docs
- from pandas.core.sorting import (
- ensure_key_mapped,
- nargsort,
- )
- from pandas.core.strings import StringMethods
- from pandas.core.tools.datetimes import to_datetime
- import pandas.io.formats.format as fmt
- import pandas.plotting
- if TYPE_CHECKING:
- from pandas._typing import (
- NumpySorter,
- NumpyValueArrayLike,
- )
- from pandas.core.frame import DataFrame
- from pandas.core.groupby.generic import SeriesGroupBy
- from pandas.core.resample import Resampler
- __all__ = ["Series"]
- _shared_doc_kwargs = {
- "axes": "index",
- "klass": "Series",
- "axes_single_arg": "{0 or 'index'}",
- "axis": """axis : {0 or 'index'}
- Parameter needed for compatibility with DataFrame.""",
- "inplace": """inplace : bool, default False
- If True, performs operation inplace and returns None.""",
- "unique": "np.ndarray",
- "duplicated": "Series",
- "optional_by": "",
- "optional_mapper": "",
- "optional_labels": "",
- "optional_axis": "",
- "replace_iloc": """
- This differs from updating with ``.loc`` or ``.iloc``, which require
- you to specify a location to update with some value.""",
- }
- def _coerce_method(converter):
- """
- Install the scalar coercion methods.
- """
- def wrapper(self):
- if len(self) == 1:
- return converter(self.iloc[0])
- raise TypeError(f"cannot convert the series to {converter}")
- wrapper.__name__ = f"__{converter.__name__}__"
- return wrapper
- # ----------------------------------------------------------------------
- # Series class
- class Series(base.IndexOpsMixin, generic.NDFrame):
- """
- One-dimensional ndarray with axis labels (including time series).
- Labels need not be unique but must be a hashable type. The object
- supports both integer- and label-based indexing and provides a host of
- methods for performing operations involving the index. Statistical
- methods from ndarray have been overridden to automatically exclude
- missing data (currently represented as NaN).
- Operations between Series (+, -, /, \\*, \\*\\*) align values based on their
- associated index values-- they need not be the same length. The result
- index will be the sorted union of the two indexes.
- Parameters
- ----------
- data : array-like, Iterable, dict, or scalar value
- Contains data stored in Series. If data is a dict, argument order is
- maintained.
- index : array-like or Index (1d)
- Values must be hashable and have the same length as `data`.
- Non-unique index values are allowed. Will default to
- RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like
- and index is None, then the keys in the data are used as the index. If the
- index is not None, the resulting Series is reindexed with the index values.
- dtype : str, numpy.dtype, or ExtensionDtype, optional
- Data type for the output Series. If not specified, this will be
- inferred from `data`.
- See the :ref:`user guide <basics.dtypes>` for more usages.
- name : str, optional
- The name to give to the Series.
- copy : bool, default False
- Copy input data. Only affects Series or 1d ndarray input. See examples.
- Examples
- --------
- Constructing Series from a dictionary with an Index specified
- >>> d = {'a': 1, 'b': 2, 'c': 3}
- >>> ser = pd.Series(data=d, index=['a', 'b', 'c'])
- >>> ser
- a 1
- b 2
- c 3
- dtype: int64
- The keys of the dictionary match with the Index values, hence the Index
- values have no effect.
- >>> d = {'a': 1, 'b': 2, 'c': 3}
- >>> ser = pd.Series(data=d, index=['x', 'y', 'z'])
- >>> ser
- x NaN
- y NaN
- z NaN
- dtype: float64
- Note that the Index is first build with the keys from the dictionary.
- After this the Series is reindexed with the given Index values, hence we
- get all NaN as a result.
- Constructing Series from a list with `copy=False`.
- >>> r = [1, 2]
- >>> ser = pd.Series(r, copy=False)
- >>> ser.iloc[0] = 999
- >>> r
- [1, 2]
- >>> ser
- 0 999
- 1 2
- dtype: int64
- Due to input data type the Series has a `copy` of
- the original data even though `copy=False`, so
- the data is unchanged.
- Constructing Series from a 1d ndarray with `copy=False`.
- >>> r = np.array([1, 2])
- >>> ser = pd.Series(r, copy=False)
- >>> ser.iloc[0] = 999
- >>> r
- array([999, 2])
- >>> ser
- 0 999
- 1 2
- dtype: int64
- Due to input data type the Series has a `view` on
- the original data, so
- the data is changed as well.
- """
- _typ = "series"
- _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
- _name: Hashable
- _metadata: list[str] = ["name"]
- _internal_names_set = {"index"} | generic.NDFrame._internal_names_set
- _accessors = {"dt", "cat", "str", "sparse"}
- _hidden_attrs = (
- base.IndexOpsMixin._hidden_attrs
- | generic.NDFrame._hidden_attrs
- | frozenset(["compress", "ptp"])
- )
- # Override cache_readonly bc Series is mutable
- # error: Incompatible types in assignment (expression has type "property",
- # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]")
- hasnans = property( # type: ignore[assignment]
- # error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget"
- base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined]
- doc=base.IndexOpsMixin.hasnans.__doc__,
- )
- _mgr: SingleManager
- div: Callable[[Series, Any], Series]
- rdiv: Callable[[Series, Any], Series]
- # ----------------------------------------------------------------------
- # Constructors
- def __init__(
- self,
- data=None,
- index=None,
- dtype: Dtype | None = None,
- name=None,
- copy: bool = False,
- fastpath: bool = False,
- ):
- if (
- isinstance(data, (SingleBlockManager, SingleArrayManager))
- and index is None
- and dtype is None
- and copy is False
- ):
- # GH#33357 called with just the SingleBlockManager
- NDFrame.__init__(self, data)
- self.name = name
- return
- # we are called internally, so short-circuit
- if fastpath:
- # data is an ndarray, index is defined
- if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
- manager = get_option("mode.data_manager")
- if manager == "block":
- data = SingleBlockManager.from_array(data, index)
- elif manager == "array":
- data = SingleArrayManager.from_array(data, index)
- if copy:
- data = data.copy()
- if index is None:
- index = data.index
- else:
- name = ibase.maybe_extract_name(name, data, type(self))
- if is_empty_data(data) and dtype is None:
- # gh-17261
- warnings.warn(
- "The default dtype for empty Series will be 'object' instead "
- "of 'float64' in a future version. Specify a dtype explicitly "
- "to silence this warning.",
- FutureWarning,
- stacklevel=2,
- )
- # uncomment the line below when removing the FutureWarning
- # dtype = np.dtype(object)
- if index is not None:
- index = ensure_index(index)
- if data is None:
- data = {}
- if dtype is not None:
- dtype = self._validate_dtype(dtype)
- if isinstance(data, MultiIndex):
- raise NotImplementedError(
- "initializing a Series from a MultiIndex is not supported"
- )
- elif isinstance(data, Index):
- if dtype is not None:
- # astype copies
- data = data.astype(dtype)
- else:
- # GH#24096 we need to ensure the index remains immutable
- data = data._values.copy()
- copy = False
- elif isinstance(data, np.ndarray):
- if len(data.dtype):
- # GH#13296 we are dealing with a compound dtype, which
- # should be treated as 2D
- raise ValueError(
- "Cannot construct a Series from an ndarray with "
- "compound dtype. Use DataFrame instead."
- )
- elif isinstance(data, Series):
- if index is None:
- index = data.index
- else:
- data = data.reindex(index, copy=copy)
- copy = False
- data = data._mgr
- elif is_dict_like(data):
- data, index = self._init_dict(data, index, dtype)
- dtype = None
- copy = False
- elif isinstance(data, (SingleBlockManager, SingleArrayManager)):
- if index is None:
- index = data.index
- elif not data.index.equals(index) or copy:
- # GH#19275 SingleBlockManager input should only be called
- # internally
- raise AssertionError(
- "Cannot pass both SingleBlockManager "
- "`data` argument and a different "
- "`index` argument. `copy` must be False."
- )
- elif isinstance(data, ExtensionArray):
- pass
- else:
- data = com.maybe_iterable_to_list(data)
- if index is None:
- if not is_list_like(data):
- data = [data]
- index = default_index(len(data))
- elif is_list_like(data):
- com.require_length_match(data, index)
- # create/copy the manager
- if isinstance(data, (SingleBlockManager, SingleArrayManager)):
- if dtype is not None:
- data = data.astype(dtype=dtype, errors="ignore", copy=copy)
- elif copy:
- data = data.copy()
- else:
- data = sanitize_array(data, index, dtype, copy)
- manager = get_option("mode.data_manager")
- if manager == "block":
- data = SingleBlockManager.from_array(data, index)
- elif manager == "array":
- data = SingleArrayManager.from_array(data, index)
- generic.NDFrame.__init__(self, data)
- self.name = name
- self._set_axis(0, index, fastpath=True)
- def _init_dict(
- self, data, index: Index | None = None, dtype: DtypeObj | None = None
- ):
- """
- Derive the "_mgr" and "index" attributes of a new Series from a
- dictionary input.
- Parameters
- ----------
- data : dict or dict-like
- Data used to populate the new Series.
- index : Index or None, default None
- Index for the new Series: if None, use dict keys.
- dtype : np.dtype, ExtensionDtype, or None, default None
- The dtype for the new Series: if None, infer from data.
- Returns
- -------
- _data : BlockManager for the new Series
- index : index for the new Series
- """
- keys: Index | tuple
- # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
- # raises KeyError), so we iterate the entire dict, and align
- if data:
- # GH:34717, issue was using zip to extract key and values from data.
- # using generators in effects the performance.
- # Below is the new way of extracting the keys and values
- keys = tuple(data.keys())
- values = list(data.values()) # Generating list of values- faster way
- elif index is not None:
- # fastpath for Series(data=None). Just use broadcasting a scalar
- # instead of reindexing.
- values = na_value_for_dtype(pandas_dtype(dtype), compat=False)
- keys = index
- else:
- keys, values = (), []
- # Input is now list-like, so rely on "standard" construction:
- # TODO: passing np.float64 to not break anything yet. See GH-17261
- s = create_series_with_explicit_dtype(
- # error: Argument "index" to "create_series_with_explicit_dtype" has
- # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
- # ndarray, Index, None]"
- values,
- index=keys, # type: ignore[arg-type]
- dtype=dtype,
- dtype_if_empty=np.float64,
- )
- # Now we just make sure the order is respected, if any
- if data and index is not None:
- s = s.reindex(index, copy=False)
- return s._mgr, s.index
- # ----------------------------------------------------------------------
- @property
- def _constructor(self) -> type[Series]:
- return Series
- @property
- def _constructor_expanddim(self) -> type[DataFrame]:
- """
- Used when a manipulation result has one higher dimension as the
- original, such as Series.to_frame()
- """
- from pandas.core.frame import DataFrame
- return DataFrame
- # types
- @property
- def _can_hold_na(self) -> bool:
- return self._mgr._can_hold_na
- def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
- """
- Override generic, we want to set the _typ here.
- This is called from the cython code when we set the `index` attribute
- directly, e.g. `series.index = [1, 2, 3]`.
- """
- if not fastpath:
- labels = ensure_index(labels)
- if labels._is_all_dates:
- deep_labels = labels
- if isinstance(labels, CategoricalIndex):
- deep_labels = labels.categories
- if not isinstance(
- deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
- ):
- try:
- labels = DatetimeIndex(labels)
- # need to set here because we changed the index
- if fastpath:
- self._mgr.set_axis(axis, labels)
- except (tslibs.OutOfBoundsDatetime, ValueError):
- # labels may exceeds datetime bounds,
- # or not be a DatetimeIndex
- pass
- if not fastpath:
- # The ensure_index call above ensures we have an Index object
- self._mgr.set_axis(axis, labels)
- # ndarray compatibility
- @property
- def dtype(self) -> DtypeObj:
- """
- Return the dtype object of the underlying data.
- """
- return self._mgr.dtype
- @property
- def dtypes(self) -> DtypeObj:
- """
- Return the dtype object of the underlying data.
- """
- # DataFrame compatibility
- return self.dtype
- @property
- def name(self) -> Hashable:
- """
- Return the name of the Series.
- The name of a Series becomes its index or column name if it is used
- to form a DataFrame. It is also used whenever displaying the Series
- using the interpreter.
- Returns
- -------
- label (hashable object)
- The name of the Series, also the column name if part of a DataFrame.
- See Also
- --------
- Series.rename : Sets the Series name when given a scalar input.
- Index.name : Corresponding Index property.
- Examples
- --------
- The Series name can be set initially when calling the constructor.
- >>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers')
- >>> s
- 0 1
- 1 2
- 2 3
- Name: Numbers, dtype: int64
- >>> s.name = "Integers"
- >>> s
- 0 1
- 1 2
- 2 3
- Name: Integers, dtype: int64
- The name of a Series within a DataFrame is its column name.
- >>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]],
- ... columns=["Odd Numbers", "Even Numbers"])
- >>> df
- Odd Numbers Even Numbers
- 0 1 2
- 1 3 4
- 2 5 6
- >>> df["Even Numbers"].name
- 'Even Numbers'
- """
- return self._name
- @name.setter
- def name(self, value: Hashable) -> None:
- validate_all_hashable(value, error_name=f"{type(self).__name__}.name")
- object.__setattr__(self, "_name", value)
- @property
- def values(self):
- """
- Return Series as ndarray or ndarray-like depending on the dtype.
- .. warning::
- We recommend using :attr:`Series.array` or
- :meth:`Series.to_numpy`, depending on whether you need
- a reference to the underlying data or a NumPy array.
- Returns
- -------
- numpy.ndarray or ndarray-like
- See Also
- --------
- Series.array : Reference to the underlying data.
- Series.to_numpy : A NumPy array representing the underlying data.
- Examples
- --------
- >>> pd.Series([1, 2, 3]).values
- array([1, 2, 3])
- >>> pd.Series(list('aabc')).values
- array(['a', 'a', 'b', 'c'], dtype=object)
- >>> pd.Series(list('aabc')).astype('category').values
- ['a', 'a', 'b', 'c']
- Categories (3, object): ['a', 'b', 'c']
- Timezone aware datetime data is converted to UTC:
- >>> pd.Series(pd.date_range('20130101', periods=3,
- ... tz='US/Eastern')).values
- array(['2013-01-01T05:00:00.000000000',
- '2013-01-02T05:00:00.000000000',
- '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
- """
- return self._mgr.external_values()
- @property
- def _values(self):
- """
- Return the internal repr of this data (defined by Block.interval_values).
- This are the values as stored in the Block (ndarray or ExtensionArray
- depending on the Block class), with datetime64[ns] and timedelta64[ns]
- wrapped in ExtensionArrays to match Index._values behavior.
- Differs from the public ``.values`` for certain data types, because of
- historical backwards compatibility of the public attribute (e.g. period
- returns object ndarray and datetimetz a datetime64[ns] ndarray for
- ``.values`` while it returns an ExtensionArray for ``._values`` in those
- cases).
- Differs from ``.array`` in that this still returns the numpy array if
- the Block is backed by a numpy array (except for datetime64 and
- timedelta64 dtypes), while ``.array`` ensures to always return an
- ExtensionArray.
- Overview:
- dtype | values | _values | array |
- ----------- | ------------- | ------------- | ------------- |
- Numeric | ndarray | ndarray | PandasArray |
- Category | Categorical | Categorical | Categorical |
- dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
- dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
- td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
- Period | ndarray[obj] | PeriodArray | PeriodArray |
- Nullable | EA | EA | EA |
- """
- return self._mgr.internal_values()
- # error: Decorated property not supported
- @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc]
- @property
- def array(self) -> ExtensionArray:
- return self._mgr.array_values()
- # ops
- def ravel(self, order="C"):
- """
- Return the flattened underlying data as an ndarray.
- Returns
- -------
- numpy.ndarray or ndarray-like
- Flattened data of the Series.
- See Also
- --------
- numpy.ndarray.ravel : Return a flattened array.
- """
- return self._values.ravel(order=order)
- def __len__(self) -> int:
- """
- Return the length of the Series.
- """
- return len(self._mgr)
- def view(self, dtype: Dtype | None = None) -> Series:
- """
- Create a new view of the Series.
- This function will return a new Series with a view of the same
- underlying values in memory, optionally reinterpreted with a new data
- type. The new data type must preserve the same size in bytes as to not
- cause index misalignment.
- Parameters
- ----------
- dtype : data type
- Data type object or one of their string representations.
- Returns
- -------
- Series
- A new Series object as a view of the same data in memory.
- See Also
- --------
- numpy.ndarray.view : Equivalent numpy function to create a new view of
- the same data in memory.
- Notes
- -----
- Series are instantiated with ``dtype=float64`` by default. While
- ``numpy.ndarray.view()`` will return a view with the same data type as
- the original array, ``Series.view()`` (without specified dtype)
- will try using ``float64`` and may fail if the original data type size
- in bytes is not the same.
- Examples
- --------
- >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
- >>> s
- 0 -2
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- The 8 bit signed integer representation of `-1` is `0b11111111`, but
- the same bytes represent 255 if read as an 8 bit unsigned integer:
- >>> us = s.view('uint8')
- >>> us
- 0 254
- 1 255
- 2 0
- 3 1
- 4 2
- dtype: uint8
- The views share the same underlying values:
- >>> us[0] = 128
- >>> s
- 0 -128
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- """
- return self._constructor(
- self._values.view(dtype), index=self.index
- ).__finalize__(self, method="view")
- # ----------------------------------------------------------------------
- # NDArray Compat
- _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
- def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
- """
- Return the values as a NumPy array.
- Users should not call this directly. Rather, it is invoked by
- :func:`numpy.array` and :func:`numpy.asarray`.
- Parameters
- ----------
- dtype : str or numpy.dtype, optional
- The dtype to use for the resulting NumPy array. By default,
- the dtype is inferred from the data.
- Returns
- -------
- numpy.ndarray
- The values in the series converted to a :class:`numpy.ndarray`
- with the specified `dtype`.
- See Also
- --------
- array : Create a new array from data.
- Series.array : Zero-copy view to the array backing the Series.
- Series.to_numpy : Series method for similar behavior.
- Examples
- --------
- >>> ser = pd.Series([1, 2, 3])
- >>> np.asarray(ser)
- array([1, 2, 3])
- For timezone-aware data, the timezones may be retained with
- ``dtype='object'``
- >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
- >>> np.asarray(tzser, dtype="object")
- array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
- Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
- dtype=object)
- Or the values may be localized to UTC and the tzinfo discarded with
- ``dtype='datetime64[ns]'``
- >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
- array(['1999-12-31T23:00:00.000000000', ...],
- dtype='datetime64[ns]')
- """
- return np.asarray(self._values, dtype)
- # ----------------------------------------------------------------------
- # Unary Methods
- # coercion
- __float__ = _coerce_method(float)
- __long__ = _coerce_method(int)
- __int__ = _coerce_method(int)
- # ----------------------------------------------------------------------
- # indexers
- @property
- def axes(self) -> list[Index]:
- """
- Return a list of the row axis labels.
- """
- return [self.index]
- # ----------------------------------------------------------------------
- # Indexing Methods
- @Appender(generic.NDFrame.take.__doc__)
- def take(self, indices, axis=0, is_copy=None, **kwargs) -> Series:
- if is_copy is not None:
- warnings.warn(
- "is_copy is deprecated and will be removed in a future version. "
- "'take' always returns a copy, so there is no need to specify this.",
- FutureWarning,
- stacklevel=2,
- )
- nv.validate_take((), kwargs)
- indices = ensure_platform_int(indices)
- new_index = self.index.take(indices)
- new_values = self._values.take(indices)
- result = self._constructor(new_values, index=new_index, fastpath=True)
- return result.__finalize__(self, method="take")
- def _take_with_is_copy(self, indices, axis=0) -> Series:
- """
- Internal version of the `take` method that sets the `_is_copy`
- attribute to keep track of the parent dataframe (using in indexing
- for the SettingWithCopyWarning). For Series this does the same
- as the public take (it never sets `_is_copy`).
- See the docstring of `take` for full explanation of the parameters.
- """
- return self.take(indices=indices, axis=axis)
- def _ixs(self, i: int, axis: int = 0):
- """
- Return the i-th value or values in the Series by location.
- Parameters
- ----------
- i : int
- Returns
- -------
- scalar (int) or Series (slice, sequence)
- """
- return self._values[i]
- def _slice(self, slobj: slice, axis: int = 0) -> Series:
- # axis kwarg is retained for compat with NDFrame method
- # _slice is *always* positional
- return self._get_values(slobj)
- def __getitem__(self, key):
- key = com.apply_if_callable(key, self)
- if key is Ellipsis:
- return self
- key_is_scalar = is_scalar(key)
- if isinstance(key, (list, tuple)):
- key = unpack_1tuple(key)
- if is_integer(key) and self.index._should_fallback_to_positional:
- return self._values[key]
- elif key_is_scalar:
- return self._get_value(key)
- if is_hashable(key):
- # Otherwise index.get_value will raise InvalidIndexError
- try:
- # For labels that don't resolve as scalars like tuples and frozensets
- result = self._get_value(key)
- return result
- except (KeyError, TypeError):
- if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
- # We still have the corner case where a tuple is a key
- # in the first level of our MultiIndex
- return self._get_values_tuple(key)
- if is_iterator(key):
- key = list(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- key = np.asarray(key, dtype=bool)
- return self._get_values(key)
- return self._get_with(key)
- def _get_with(self, key):
- # other: fancy integer or otherwise
- if isinstance(key, slice):
- # _convert_slice_indexer to determine if this slice is positional
- # or label based, and if the latter, convert to positional
- slobj = self.index._convert_slice_indexer(key, kind="getitem")
- return self._slice(slobj)
- elif isinstance(key, ABCDataFrame):
- raise TypeError(
- "Indexing a Series with DataFrame is not "
- "supported, use the appropriate DataFrame column"
- )
- elif isinstance(key, tuple):
- return self._get_values_tuple(key)
- elif not is_list_like(key):
- # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684
- return self.loc[key]
- if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
- key = list(key)
- if isinstance(key, Index):
- key_type = key.inferred_type
- else:
- key_type = lib.infer_dtype(key, skipna=False)
- # Note: The key_type == "boolean" case should be caught by the
- # com.is_bool_indexer check in __getitem__
- if key_type == "integer":
- # We need to decide whether to treat this as a positional indexer
- # (i.e. self.iloc) or label-based (i.e. self.loc)
- if not self.index._should_fallback_to_positional:
- return self.loc[key]
- else:
- return self.iloc[key]
- # handle the dup indexing case GH#4246
- return self.loc[key]
- def _get_values_tuple(self, key):
- # mpl hackaround
- if com.any_none(*key):
- result = self._get_values(key)
- deprecate_ndim_indexing(result, stacklevel=5)
- return result
- if not isinstance(self.index, MultiIndex):
- raise KeyError("key of type tuple not found and not a MultiIndex")
- # If key is contained, would have returned by now
- indexer, new_index = self.index.get_loc_level(key)
- return self._constructor(self._values[indexer], index=new_index).__finalize__(
- self
- )
- def _get_values(self, indexer):
- try:
- new_mgr = self._mgr.getitem_mgr(indexer)
- return self._constructor(new_mgr).__finalize__(self)
- except ValueError:
- # mpl compat if we look up e.g. ser[:, np.newaxis];
- # see tests.series.timeseries.test_mpl_compat_hack
- # the asarray is needed to avoid returning a 2D DatetimeArray
- return np.asarray(self._values[indexer])
- def _get_value(self, label, takeable: bool = False):
- """
- Quickly retrieve single value at passed index label.
- Parameters
- ----------
- label : object
- takeable : interpret the index as indexers, default False
- Returns
- -------
- scalar value
- """
- if takeable:
- return self._values[label]
- # Similar to Index.get_value, but we do not fall back to positional
- loc = self.index.get_loc(label)
- return self.index._get_values_for_loc(self, loc, label)
- def __setitem__(self, key, value) -> None:
- key = com.apply_if_callable(key, self)
- cacher_needs_updating = self._check_is_chained_assignment_possible()
- if key is Ellipsis:
- key = slice(None)
- if isinstance(key, slice):
- indexer = self.index._convert_slice_indexer(key, kind="getitem")
- return self._set_values(indexer, value)
- try:
- self._set_with_engine(key, value)
- except (KeyError, ValueError):
- if is_integer(key) and self.index.inferred_type != "integer":
- # positional setter
- if not self.index._should_fallback_to_positional:
- # GH#33469
- warnings.warn(
- "Treating integers as positional in Series.__setitem__ "
- "with a Float64Index is deprecated. In a future version, "
- "`series[an_int] = val` will insert a new key into the "
- "Series. Use `series.iloc[an_int] = val` to treat the "
- "key as positional.",
- FutureWarning,
- stacklevel=2,
- )
- # this is equivalent to self._values[key] = value
- self._mgr.setitem_inplace(key, value)
- else:
- # GH#12862 adding a new key to the Series
- self.loc[key] = value
- except (InvalidIndexError, TypeError) as err:
- if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
- # cases with MultiIndex don't get here bc they raise KeyError
- raise KeyError(
- "key of type tuple not found and not a MultiIndex"
- ) from err
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- key = np.asarray(key, dtype=bool)
- if (
- is_list_like(value)
- and len(value) != len(self)
- and not isinstance(value, Series)
- and not is_object_dtype(self.dtype)
- ):
- # Series will be reindexed to have matching length inside
- # _where call below
- # GH#44265
- indexer = key.nonzero()[0]
- self._set_values(indexer, value)
- return
- # otherwise with listlike other we interpret series[mask] = other
- # as series[mask] = other[mask]
- try:
- self._where(~key, value, inplace=True)
- except InvalidIndexError:
- # test_where_dups
- self.iloc[key] = value
- return
- else:
- self._set_with(key, value)
- if cacher_needs_updating:
- self._maybe_update_cacher()
- def _set_with_engine(self, key, value) -> None:
- loc = self.index.get_loc(key)
- # error: Argument 1 to "validate_numeric_casting" has incompatible type
- # "Union[dtype, ExtensionDtype]"; expected "dtype"
- validate_numeric_casting(self.dtype, value) # type: ignore[arg-type]
- # this is equivalent to self._values[key] = value
- self._mgr.setitem_inplace(loc, value)
- def _set_with(self, key, value):
- # other: fancy integer or otherwise
- assert not isinstance(key, tuple)
- if is_scalar(key):
- key = [key]
- elif is_iterator(key):
- # Without this, the call to infer_dtype will consume the generator
- key = list(key)
- key_type = lib.infer_dtype(key, skipna=False)
- # Note: key_type == "boolean" should not occur because that
- # should be caught by the is_bool_indexer check in __setitem__
- if key_type == "integer":
- if not self.index._should_fallback_to_positional:
- self._set_labels(key, value)
- else:
- self._set_values(key, value)
- else:
- self.loc[key] = value
- def _set_labels(self, key, value) -> None:
- key = com.asarray_tuplesafe(key)
- indexer: np.ndarray = self.index.get_indexer(key)
- mask = indexer == -1
- if mask.any():
- raise KeyError(f"{key[mask]} not in index")
- self._set_values(indexer, value)
- def _set_values(self, key, value) -> None:
- if isinstance(key, (Index, Series)):
- key = key._values
- self._mgr = self._mgr.setitem(indexer=key, value=value)
- self._maybe_update_cacher()
- def _set_value(self, label, value, takeable: bool = False):
- """
- Quickly set single value at passed label.
- If label is not contained, a new object is created with the label
- placed at the end of the result index.
- Parameters
- ----------
- label : object
- Partial indexing with MultiIndex not allowed.
- value : object
- Scalar value.
- takeable : interpret the index as indexers, default False
- """
- if not takeable:
- try:
- loc = self.index.get_loc(label)
- except KeyError:
- # set using a non-recursive method
- self.loc[label] = value
- return
- else:
- loc = label
- self._set_values(loc, value)
- # ----------------------------------------------------------------------
- # Lookup Caching
- @property
- def _is_cached(self) -> bool:
- """Return boolean indicating if self is cached or not."""
- return getattr(self, "_cacher", None) is not None
- def _get_cacher(self):
- """return my cacher or None"""
- cacher = getattr(self, "_cacher", None)
- if cacher is not None:
- cacher = cacher[1]()
- return cacher
- def _reset_cacher(self) -> None:
- """
- Reset the cacher.
- """
- if hasattr(self, "_cacher"):
- # should only get here with self.ndim == 1
- del self._cacher
- def _set_as_cached(self, item, cacher) -> None:
- """
- Set the _cacher attribute on the calling object with a weakref to
- cacher.
- """
- self._cacher = (item, weakref.ref(cacher))
- def _clear_item_cache(self) -> None:
- # no-op for Series
- pass
- def _check_is_chained_assignment_possible(self) -> bool:
- """
- See NDFrame._check_is_chained_assignment_possible.__doc__
- """
- if self._is_view and self._is_cached:
- ref = self._get_cacher()
- if ref is not None and ref._is_mixed_type:
- self._check_setitem_copy(t="referent", force=True)
- return True
- return super()._check_is_chained_assignment_possible()
- def _maybe_update_cacher(
- self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False
- ) -> None:
- """
- See NDFrame._maybe_update_cacher.__doc__
- """
- cacher = getattr(self, "_cacher", None)
- if cacher is not None:
- assert self.ndim == 1
- ref: DataFrame = cacher[1]()
- # we are trying to reference a dead referent, hence
- # a copy
- if ref is None:
- del self._cacher
- elif len(self) == len(ref) and self.name in ref.columns:
- # GH#42530 self.name must be in ref.columns
- # to ensure column still in dataframe
- # otherwise, either self or ref has swapped in new arrays
- ref._maybe_cache_changed(cacher[0], self, inplace=inplace)
- else:
- # GH#33675 we have swapped in a new array, so parent
- # reference to self is now invalid
- ref._item_cache.pop(cacher[0], None)
- super()._maybe_update_cacher(
- clear=clear, verify_is_copy=verify_is_copy, inplace=inplace
- )
- # ----------------------------------------------------------------------
- # Unsorted
- @property
- def _is_mixed_type(self):
- return False
- def repeat(self, repeats, axis=None) -> Series:
- """
- Repeat elements of a Series.
- Returns a new Series where each element of the current Series
- is repeated consecutively a given number of times.
- Parameters
- ----------
- repeats : int or array of ints
- The number of repetitions for each element. This should be a
- non-negative integer. Repeating 0 times will return an empty
- Series.
- axis : None
- Must be ``None``. Has no effect but is accepted for compatibility
- with numpy.
- Returns
- -------
- Series
- Newly created Series with repeated elements.
- See Also
- --------
- Index.repeat : Equivalent function for Index.
- numpy.repeat : Similar method for :class:`numpy.ndarray`.
- Examples
- --------
- >>> s = pd.Series(['a', 'b', 'c'])
- >>> s
- 0 a
- 1 b
- 2 c
- dtype: object
- >>> s.repeat(2)
- 0 a
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- dtype: object
- >>> s.repeat([1, 2, 3])
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- 2 c
- dtype: object
- """
- nv.validate_repeat((), {"axis": axis})
- new_index = self.index.repeat(repeats)
- new_values = self._values.repeat(repeats)
- return self._constructor(new_values, index=new_index).__finalize__(
- self, method="repeat"
- )
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"])
- def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False):
- """
- Generate a new DataFrame or Series with the index reset.
- This is useful when the index needs to be treated as a column, or
- when the index is meaningless and needs to be reset to the default
- before another operation.
- Parameters
- ----------
- level : int, str, tuple, or list, default optional
- For a Series with a MultiIndex, only remove the specified levels
- from the index. Removes all levels by default.
- drop : bool, default False
- Just reset the index, without inserting it as a column in
- the new DataFrame.
- name : object, optional
- The name to use for the column containing the original Series
- values. Uses ``self.name`` by default. This argument is ignored
- when `drop` is True.
- inplace : bool, default False
- Modify the Series in place (do not create a new object).
- Returns
- -------
- Series or DataFrame or None
- When `drop` is False (the default), a DataFrame is returned.
- The newly created columns will come first in the DataFrame,
- followed by the original Series values.
- When `drop` is True, a `Series` is returned.
- In either case, if ``inplace=True``, no value is returned.
- See Also
- --------
- DataFrame.reset_index: Analogous function for DataFrame.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4], name='foo',
- ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
- Generate a DataFrame with default index.
- >>> s.reset_index()
- idx foo
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To specify the name of the new column use `name`.
- >>> s.reset_index(name='values')
- idx values
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To generate a new Series with the default set `drop` to True.
- >>> s.reset_index(drop=True)
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- To update the Series in place, without generating a new one
- set `inplace` to True. Note that it also requires ``drop=True``.
- >>> s.reset_index(inplace=True, drop=True)
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- The `level` parameter is interesting for Series with a multi-level
- index.
- >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
- ... np.array(['one', 'two', 'one', 'two'])]
- >>> s2 = pd.Series(
- ... range(4), name='foo',
- ... index=pd.MultiIndex.from_arrays(arrays,
- ... names=['a', 'b']))
- To remove a specific level from the Index, use `level`.
- >>> s2.reset_index(level='a')
- a foo
- b
- one bar 0
- two bar 1
- one baz 2
- two baz 3
- If `level` is not set, all levels are removed from the Index.
- >>> s2.reset_index()
- a b foo
- 0 bar one 0
- 1 bar two 1
- 2 baz one 2
- 3 baz two 3
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- if drop:
- if name is lib.no_default:
- name = self.name
- new_index = default_index(len(self))
- if level is not None:
- if not isinstance(level, (tuple, list)):
- level = [level]
- level = [self.index._get_level_number(lev) for lev in level]
- if len(level) < self.index.nlevels:
- new_index = self.index.droplevel(level)
- if inplace:
- self.index = new_index
- # set name if it was passed, otherwise, keep the previous name
- self.name = name or self.name
- else:
- return self._constructor(
- self._values.copy(), index=new_index
- ).__finalize__(self, method="reset_index")
- elif inplace:
- raise TypeError(
- "Cannot reset_index inplace on a Series to create a DataFrame"
- )
- else:
- if name is lib.no_default:
- # For backwards compatibility, keep columns as [0] instead of
- # [None] when self.name is None
- if self.name is None:
- name = 0
- else:
- name = self.name
- df = self.to_frame(name)
- return df.reset_index(level=level, drop=drop)
- # ----------------------------------------------------------------------
- # Rendering Methods
- def __repr__(self) -> str:
- """
- Return a string representation for a particular Series.
- """
- repr_params = fmt.get_series_repr_params()
- return self.to_string(**repr_params)
- def to_string(
- self,
- buf=None,
- na_rep="NaN",
- float_format=None,
- header=True,
- index=True,
- length=False,
- dtype=False,
- name=False,
- max_rows=None,
- min_rows=None,
- ):
- """
- Render a string representation of the Series.
- Parameters
- ----------
- buf : StringIO-like, optional
- Buffer to write to.
- na_rep : str, optional
- String representation of NaN to use, default 'NaN'.
- float_format : one-parameter function, optional
- Formatter function to apply to columns' elements if they are
- floats, default None.
- header : bool, default True
- Add the Series header (index name).
- index : bool, optional
- Add index (row) labels, default True.
- length : bool, default False
- Add the Series length.
- dtype : bool, default False
- Add the Series dtype.
- name : bool, default False
- Add the Series name if not None.
- max_rows : int, optional
- Maximum number of rows to show before truncating. If None, show
- all.
- min_rows : int, optional
- The number of rows to display in a truncated repr (when number
- of rows is above `max_rows`).
- Returns
- -------
- str or None
- String representation of Series if ``buf=None``, otherwise None.
- """
- formatter = fmt.SeriesFormatter(
- self,
- name=name,
- length=length,
- header=header,
- index=index,
- dtype=dtype,
- na_rep=na_rep,
- float_format=float_format,
- min_rows=min_rows,
- max_rows=max_rows,
- )
- result = formatter.to_string()
- # catch contract violations
- if not isinstance(result, str):
- raise AssertionError(
- "result must be of type str, type "
- f"of result is {repr(type(result).__name__)}"
- )
- if buf is None:
- return result
- else:
- try:
- buf.write(result)
- except AttributeError:
- with open(buf, "w") as f:
- f.write(result)
- @doc(
- klass=_shared_doc_kwargs["klass"],
- storage_options=generic._shared_docs["storage_options"],
- examples=dedent(
- """Examples
- --------
- >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
- >>> print(s.to_markdown())
- | | animal |
- |---:|:---------|
- | 0 | elk |
- | 1 | pig |
- | 2 | dog |
- | 3 | quetzal |
- Output markdown with a tabulate option.
- >>> print(s.to_markdown(tablefmt="grid"))
- +----+----------+
- | | animal |
- +====+==========+
- | 0 | elk |
- +----+----------+
- | 1 | pig |
- +----+----------+
- | 2 | dog |
- +----+----------+
- | 3 | quetzal |
- +----+----------+"""
- ),
- )
- def to_markdown(
- self,
- buf: IO[str] | None = None,
- mode: str = "wt",
- index: bool = True,
- storage_options: StorageOptions = None,
- **kwargs,
- ) -> str | None:
- """
- Print {klass} in Markdown-friendly format.
- .. versionadded:: 1.0.0
- Parameters
- ----------
- buf : str, Path or StringIO-like, optional, default None
- Buffer to write to. If None, the output is returned as a string.
- mode : str, optional
- Mode in which file is opened, "wt" by default.
- index : bool, optional, default True
- Add index (row) labels.
- .. versionadded:: 1.1.0
- {storage_options}
- .. versionadded:: 1.2.0
- **kwargs
- These parameters will be passed to `tabulate \
- <https://pypi.org/project/tabulate>`_.
- Returns
- -------
- str
- {klass} in Markdown-friendly format.
- Notes
- -----
- Requires the `tabulate <https://pypi.org/project/tabulate>`_ package.
- {examples}
- """
- return self.to_frame().to_markdown(
- buf, mode, index, storage_options=storage_options, **kwargs
- )
- # ----------------------------------------------------------------------
- def items(self) -> Iterable[tuple[Hashable, Any]]:
- """
- Lazily iterate over (index, value) tuples.
- This method returns an iterable tuple (index, value). This is
- convenient if you want to create a lazy iterator.
- Returns
- -------
- iterable
- Iterable of tuples containing the (index, value) pairs from a
- Series.
- See Also
- --------
- DataFrame.items : Iterate over (column name, Series) pairs.
- DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
- Examples
- --------
- >>> s = pd.Series(['A', 'B', 'C'])
- >>> for index, value in s.items():
- ... print(f"Index : {index}, Value : {value}")
- Index : 0, Value : A
- Index : 1, Value : B
- Index : 2, Value : C
- """
- return zip(iter(self.index), iter(self))
- @Appender(items.__doc__)
- def iteritems(self) -> Iterable[tuple[Hashable, Any]]:
- return self.items()
- # ----------------------------------------------------------------------
- # Misc public methods
- def keys(self) -> Index:
- """
- Return alias for index.
- Returns
- -------
- Index
- Index of the Series.
- """
- return self.index
- def to_dict(self, into=dict):
- """
- Convert Series to {label -> value} dict or dict-like object.
- Parameters
- ----------
- into : class, default dict
- The collections.abc.Mapping subclass to use as the return
- object. Can be the actual class or an empty
- instance of the mapping type you want. If you want a
- collections.defaultdict, you must pass it initialized.
- Returns
- -------
- collections.abc.Mapping
- Key-value representation of Series.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.to_dict()
- {0: 1, 1: 2, 2: 3, 3: 4}
- >>> from collections import OrderedDict, defaultdict
- >>> s.to_dict(OrderedDict)
- OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
- >>> dd = defaultdict(list)
- >>> s.to_dict(dd)
- defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
- """
- # GH16122
- into_c = com.standardize_mapping(into)
- return into_c((k, maybe_box_native(v)) for k, v in self.items())
- def to_frame(self, name: Hashable = lib.no_default) -> DataFrame:
- """
- Convert Series to DataFrame.
- Parameters
- ----------
- name : object, default None
- The passed name should substitute for the series name (if it has
- one).
- Returns
- -------
- DataFrame
- DataFrame representation of Series.
- Examples
- --------
- >>> s = pd.Series(["a", "b", "c"],
- ... name="vals")
- >>> s.to_frame()
- vals
- 0 a
- 1 b
- 2 c
- """
- columns: Index
- if name is lib.no_default:
- name = self.name
- if name is None:
- # default to [0], same as we would get with DataFrame(self)
- columns = default_index(1)
- else:
- columns = Index([name])
- else:
- columns = Index([name])
- mgr = self._mgr.to_2d_mgr(columns)
- return self._constructor_expanddim(mgr)
- def _set_name(self, name, inplace=False) -> Series:
- """
- Set the Series name.
- Parameters
- ----------
- name : str
- inplace : bool
- Whether to modify `self` directly or return a copy.
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- ser = self if inplace else self.copy()
- ser.name = name
- return ser
- @Appender(
- """
- Examples
- --------
- >>> ser = pd.Series([390., 350., 30., 20.],
- ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
- >>> ser
- Falcon 390.0
- Falcon 350.0
- Parrot 30.0
- Parrot 20.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(["a", "b", "a", "b"]).mean()
- a 210.0
- b 185.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level=0).mean()
- Falcon 370.0
- Parrot 25.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(ser > 100).mean()
- Max Speed
- False 25.0
- True 370.0
- Name: Max Speed, dtype: float64
- **Grouping by Indexes**
- We can groupby different levels of a hierarchical index
- using the `level` parameter:
- >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
- ... ['Captive', 'Wild', 'Captive', 'Wild']]
- >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
- >>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
- >>> ser
- Animal Type
- Falcon Captive 390.0
- Wild 350.0
- Parrot Captive 30.0
- Wild 20.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level=0).mean()
- Animal
- Falcon 370.0
- Parrot 25.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level="Type").mean()
- Type
- Captive 210.0
- Wild 185.0
- Name: Max Speed, dtype: float64
- We can also choose to include `NA` in group keys or not by defining
- `dropna` parameter, the default setting is `True`:
- >>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan])
- >>> ser.groupby(level=0).sum()
- a 3
- b 3
- dtype: int64
- >>> ser.groupby(level=0, dropna=False).sum()
- a 3
- b 3
- NaN 3
- dtype: int64
- >>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
- >>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
- >>> ser.groupby(["a", "b", "a", np.nan]).mean()
- a 210.0
- b 350.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()
- a 210.0
- b 350.0
- NaN 20.0
- Name: Max Speed, dtype: float64
- """
- )
- @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
- def groupby(
- self,
- by=None,
- axis=0,
- level=None,
- as_index: bool = True,
- sort: bool = True,
- group_keys: bool = True,
- squeeze: bool | lib.NoDefault = no_default,
- observed: bool = False,
- dropna: bool = True,
- ) -> SeriesGroupBy:
- from pandas.core.groupby.generic import SeriesGroupBy
- if squeeze is not no_default:
- warnings.warn(
- (
- "The `squeeze` parameter is deprecated and "
- "will be removed in a future version."
- ),
- FutureWarning,
- stacklevel=2,
- )
- else:
- squeeze = False
- if level is None and by is None:
- raise TypeError("You have to supply one of 'by' and 'level'")
- axis = self._get_axis_number(axis)
- # error: Argument "squeeze" to "SeriesGroupBy" has incompatible type
- # "Union[bool, NoDefault]"; expected "bool"
- return SeriesGroupBy(
- obj=self,
- keys=by,
- axis=axis,
- level=level,
- as_index=as_index,
- sort=sort,
- group_keys=group_keys,
- squeeze=squeeze, # type: ignore[arg-type]
- observed=observed,
- dropna=dropna,
- )
- # ----------------------------------------------------------------------
- # Statistics, overridden ndarray methods
- # TODO: integrate bottleneck
- def count(self, level=None):
- """
- Return number of non-NA/null observations in the Series.
- Parameters
- ----------
- level : int or level name, default None
- If the axis is a MultiIndex (hierarchical), count along a
- particular level, collapsing into a smaller Series.
- Returns
- -------
- int or Series (if level specified)
- Number of non-null values in the Series.
- See Also
- --------
- DataFrame.count : Count non-NA cells for each column or row.
- Examples
- --------
- >>> s = pd.Series([0.0, 1.0, np.nan])
- >>> s.count()
- 2
- """
- if level is None:
- return notna(self._values).sum().astype("int64")
- else:
- warnings.warn(
- "Using the level keyword in DataFrame and Series aggregations is "
- "deprecated and will be removed in a future version. Use groupby "
- "instead. ser.count(level=1) should use ser.groupby(level=1).count().",
- FutureWarning,
- stacklevel=2,
- )
- if not isinstance(self.index, MultiIndex):
- raise ValueError("Series.count level is only valid with a MultiIndex")
- index = self.index
- assert isinstance(index, MultiIndex) # for mypy
- if isinstance(level, str):
- level = index._get_level_number(level)
- lev = index.levels[level]
- level_codes = np.array(index.codes[level], subok=False, copy=True)
- mask = level_codes == -1
- if mask.any():
- level_codes[mask] = cnt = len(lev)
- lev = lev.insert(cnt, lev._na_value)
- obs = level_codes[notna(self._values)]
- out = np.bincount(obs, minlength=len(lev) or None)
- return self._constructor(out, index=lev, dtype="int64").__finalize__(
- self, method="count"
- )
- def mode(self, dropna=True) -> Series:
- """
- Return the mode(s) of the Series.
- The mode is the value that appears most often. There can be multiple modes.
- Always returns Series even if only one value is returned.
- Parameters
- ----------
- dropna : bool, default True
- Don't consider counts of NaN/NaT.
- Returns
- -------
- Series
- Modes of the Series in sorted order.
- """
- # TODO: Add option for bins like value_counts()
- return algorithms.mode(self, dropna=dropna)
- def unique(self) -> ArrayLike:
- """
- Return unique values of Series object.
- Uniques are returned in order of appearance. Hash table-based unique,
- therefore does NOT sort.
- Returns
- -------
- ndarray or ExtensionArray
- The unique values returned as a NumPy array. See Notes.
- See Also
- --------
- unique : Top-level unique method for any 1-d array-like object.
- Index.unique : Return Index with unique values from an Index object.
- Notes
- -----
- Returns the unique values as a NumPy array. In case of an
- extension-array backed Series, a new
- :class:`~api.extensions.ExtensionArray` of that type with just
- the unique values is returned. This includes
- * Categorical
- * Period
- * Datetime with Timezone
- * Interval
- * Sparse
- * IntegerNA
- See Examples section.
- Examples
- --------
- >>> pd.Series([2, 1, 3, 3], name='A').unique()
- array([2, 1, 3])
- >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
- array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
- >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
- ... for _ in range(3)]).unique()
- <DatetimeArray>
- ['2016-01-01 00:00:00-05:00']
- Length: 1, dtype: datetime64[ns, US/Eastern]
- An Categorical will return categories in the order of
- appearance and with the same dtype.
- >>> pd.Series(pd.Categorical(list('baabc'))).unique()
- ['b', 'a', 'c']
- Categories (3, object): ['a', 'b', 'c']
- >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
- ... ordered=True)).unique()
- ['b', 'a', 'c']
- Categories (3, object): ['a' < 'b' < 'c']
- """
- return super().unique()
- @overload
- def drop_duplicates(self, keep=..., inplace: Literal[False] = ...) -> Series:
- ...
- @overload
- def drop_duplicates(self, keep, inplace: Literal[True]) -> None:
- ...
- @overload
- def drop_duplicates(self, *, inplace: Literal[True]) -> None:
- ...
- @overload
- def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None:
- ...
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
- def drop_duplicates(self, keep="first", inplace=False) -> Series | None:
- """
- Return Series with duplicate values removed.
- Parameters
- ----------
- keep : {'first', 'last', ``False``}, default 'first'
- Method to handle dropping duplicates:
- - 'first' : Drop duplicates except for the first occurrence.
- - 'last' : Drop duplicates except for the last occurrence.
- - ``False`` : Drop all duplicates.
- inplace : bool, default ``False``
- If ``True``, performs operation inplace and returns None.
- Returns
- -------
- Series or None
- Series with duplicates dropped or None if ``inplace=True``.
- See Also
- --------
- Index.drop_duplicates : Equivalent method on Index.
- DataFrame.drop_duplicates : Equivalent method on DataFrame.
- Series.duplicated : Related method on Series, indicating duplicate
- Series values.
- Examples
- --------
- Generate a Series with duplicated entries.
- >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
- ... name='animal')
- >>> s
- 0 lama
- 1 cow
- 2 lama
- 3 beetle
- 4 lama
- 5 hippo
- Name: animal, dtype: object
- With the 'keep' parameter, the selection behaviour of duplicated values
- can be changed. The value 'first' keeps the first occurrence for each
- set of duplicated entries. The default value of keep is 'first'.
- >>> s.drop_duplicates()
- 0 lama
- 1 cow
- 3 beetle
- 5 hippo
- Name: animal, dtype: object
- The value 'last' for parameter 'keep' keeps the last occurrence for
- each set of duplicated entries.
- >>> s.drop_duplicates(keep='last')
- 1 cow
- 3 beetle
- 4 lama
- 5 hippo
- Name: animal, dtype: object
- The value ``False`` for parameter 'keep' discards all sets of
- duplicated entries. Setting the value of 'inplace' to ``True`` performs
- the operation inplace and returns ``None``.
- >>> s.drop_duplicates(keep=False, inplace=True)
- >>> s
- 1 cow
- 3 beetle
- 5 hippo
- Name: animal, dtype: object
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- result = super().drop_duplicates(keep=keep)
- if inplace:
- self._update_inplace(result)
- return None
- else:
- return result
- def duplicated(self, keep="first") -> Series:
- """
- Indicate duplicate Series values.
- Duplicated values are indicated as ``True`` values in the resulting
- Series. Either all duplicates, all except the first or all except the
- last occurrence of duplicates can be indicated.
- Parameters
- ----------
- keep : {'first', 'last', False}, default 'first'
- Method to handle dropping duplicates:
- - 'first' : Mark duplicates as ``True`` except for the first
- occurrence.
- - 'last' : Mark duplicates as ``True`` except for the last
- occurrence.
- - ``False`` : Mark all duplicates as ``True``.
- Returns
- -------
- Series[bool]
- Series indicating whether each value has occurred in the
- preceding values.
- See Also
- --------
- Index.duplicated : Equivalent method on pandas.Index.
- DataFrame.duplicated : Equivalent method on pandas.DataFrame.
- Series.drop_duplicates : Remove duplicate values from Series.
- Examples
- --------
- By default, for each set of duplicated values, the first occurrence is
- set on False and all others on True:
- >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
- >>> animals.duplicated()
- 0 False
- 1 False
- 2 True
- 3 False
- 4 True
- dtype: bool
- which is equivalent to
- >>> animals.duplicated(keep='first')
- 0 False
- 1 False
- 2 True
- 3 False
- 4 True
- dtype: bool
- By using 'last', the last occurrence of each set of duplicated values
- is set on False and all others on True:
- >>> animals.duplicated(keep='last')
- 0 True
- 1 False
- 2 True
- 3 False
- 4 False
- dtype: bool
- By setting keep on ``False``, all duplicates are True:
- >>> animals.duplicated(keep=False)
- 0 True
- 1 False
- 2 True
- 3 False
- 4 True
- dtype: bool
- """
- res = self._duplicated(keep=keep)
- result = self._constructor(res, index=self.index)
- return result.__finalize__(self, method="duplicated")
- def idxmin(self, axis=0, skipna=True, *args, **kwargs):
- """
- Return the row label of the minimum value.
- If multiple values equal the minimum, the first row label with that
- value is returned.
- Parameters
- ----------
- axis : int, default 0
- For compatibility with DataFrame.idxmin. Redundant for application
- on Series.
- skipna : bool, default True
- Exclude NA/null values. If the entire Series is NA, the result
- will be NA.
- *args, **kwargs
- Additional arguments and keywords have no effect but might be
- accepted for compatibility with NumPy.
- Returns
- -------
- Index
- Label of the minimum value.
- Raises
- ------
- ValueError
- If the Series is empty.
- See Also
- --------
- numpy.argmin : Return indices of the minimum values
- along the given axis.
- DataFrame.idxmin : Return index of first occurrence of minimum
- over requested axis.
- Series.idxmax : Return index *label* of the first occurrence
- of maximum of values.
- Notes
- -----
- This method is the Series version of ``ndarray.argmin``. This method
- returns the label of the minimum, while ``ndarray.argmin`` returns
- the position. To get the position, use ``series.values.argmin()``.
- Examples
- --------
- >>> s = pd.Series(data=[1, None, 4, 1],
- ... index=['A', 'B', 'C', 'D'])
- >>> s
- A 1.0
- B NaN
- C 4.0
- D 1.0
- dtype: float64
- >>> s.idxmin()
- 'A'
- If `skipna` is False and there is an NA value in the data,
- the function returns ``nan``.
- >>> s.idxmin(skipna=False)
- nan
- """
- i = self.argmin(axis, skipna, *args, **kwargs)
- if i == -1:
- return np.nan
- return self.index[i]
- def idxmax(self, axis=0, skipna=True, *args, **kwargs):
- """
- Return the row label of the maximum value.
- If multiple values equal the maximum, the first row label with that
- value is returned.
- Parameters
- ----------
- axis : int, default 0
- For compatibility with DataFrame.idxmax. Redundant for application
- on Series.
- skipna : bool, default True
- Exclude NA/null values. If the entire Series is NA, the result
- will be NA.
- *args, **kwargs
- Additional arguments and keywords have no effect but might be
- accepted for compatibility with NumPy.
- Returns
- -------
- Index
- Label of the maximum value.
- Raises
- ------
- ValueError
- If the Series is empty.
- See Also
- --------
- numpy.argmax : Return indices of the maximum values
- along the given axis.
- DataFrame.idxmax : Return index of first occurrence of maximum
- over requested axis.
- Series.idxmin : Return index *label* of the first occurrence
- of minimum of values.
- Notes
- -----
- This method is the Series version of ``ndarray.argmax``. This method
- returns the label of the maximum, while ``ndarray.argmax`` returns
- the position. To get the position, use ``series.values.argmax()``.
- Examples
- --------
- >>> s = pd.Series(data=[1, None, 4, 3, 4],
- ... index=['A', 'B', 'C', 'D', 'E'])
- >>> s
- A 1.0
- B NaN
- C 4.0
- D 3.0
- E 4.0
- dtype: float64
- >>> s.idxmax()
- 'C'
- If `skipna` is False and there is an NA value in the data,
- the function returns ``nan``.
- >>> s.idxmax(skipna=False)
- nan
- """
- i = self.argmax(axis, skipna, *args, **kwargs)
- if i == -1:
- return np.nan
- return self.index[i]
- def round(self, decimals=0, *args, **kwargs) -> Series:
- """
- Round each value in a Series to the given number of decimals.
- Parameters
- ----------
- decimals : int, default 0
- Number of decimal places to round to. If decimals is negative,
- it specifies the number of positions to the left of the decimal point.
- *args, **kwargs
- Additional arguments and keywords have no effect but might be
- accepted for compatibility with NumPy.
- Returns
- -------
- Series
- Rounded values of the Series.
- See Also
- --------
- numpy.around : Round values of an np.array.
- DataFrame.round : Round values of a DataFrame.
- Examples
- --------
- >>> s = pd.Series([0.1, 1.3, 2.7])
- >>> s.round()
- 0 0.0
- 1 1.0
- 2 3.0
- dtype: float64
- """
- nv.validate_round(args, kwargs)
- result = self._values.round(decimals)
- result = self._constructor(result, index=self.index).__finalize__(
- self, method="round"
- )
- return result
- def quantile(self, q=0.5, interpolation="linear"):
- """
- Return value at the given quantile.
- Parameters
- ----------
- q : float or array-like, default 0.5 (50% quantile)
- The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
- This optional parameter specifies the interpolation method to use,
- when the desired quantile lies between two data points `i` and `j`:
- * linear: `i + (j - i) * fraction`, where `fraction` is the
- fractional part of the index surrounded by `i` and `j`.
- * lower: `i`.
- * higher: `j`.
- * nearest: `i` or `j` whichever is nearest.
- * midpoint: (`i` + `j`) / 2.
- Returns
- -------
- float or Series
- If ``q`` is an array, a Series will be returned where the
- index is ``q`` and the values are the quantiles, otherwise
- a float will be returned.
- See Also
- --------
- core.window.Rolling.quantile : Calculate the rolling quantile.
- numpy.percentile : Returns the q-th percentile(s) of the array elements.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.quantile(.5)
- 2.5
- >>> s.quantile([.25, .5, .75])
- 0.25 1.75
- 0.50 2.50
- 0.75 3.25
- dtype: float64
- """
- validate_percentile(q)
- # We dispatch to DataFrame so that core.internals only has to worry
- # about 2D cases.
- df = self.to_frame()
- result = df.quantile(q=q, interpolation=interpolation, numeric_only=False)
- if result.ndim == 2:
- result = result.iloc[:, 0]
- if is_list_like(q):
- result.name = self.name
- return self._constructor(result, index=Float64Index(q), name=self.name)
- else:
- # scalar
- return result.iloc[0]
- def corr(self, other, method="pearson", min_periods=None) -> float:
- """
- Compute correlation with `other` Series, excluding missing values.
- Parameters
- ----------
- other : Series
- Series with which to compute the correlation.
- method : {'pearson', 'kendall', 'spearman'} or callable
- Method used to compute correlation:
- - pearson : Standard correlation coefficient
- - kendall : Kendall Tau correlation coefficient
- - spearman : Spearman rank correlation
- - callable: Callable with input two 1d ndarrays and returning a float.
- .. warning::
- Note that the returned matrix from corr will have 1 along the
- diagonals and will be symmetric regardless of the callable's
- behavior.
- min_periods : int, optional
- Minimum number of observations needed to have a valid result.
- Returns
- -------
- float
- Correlation with other.
- See Also
- --------
- DataFrame.corr : Compute pairwise correlation between columns.
- DataFrame.corrwith : Compute pairwise correlation with another
- DataFrame or Series.
- Examples
- --------
- >>> def histogram_intersection(a, b):
- ... v = np.minimum(a, b).sum().round(decimals=1)
- ... return v
- >>> s1 = pd.Series([.2, .0, .6, .2])
- >>> s2 = pd.Series([.3, .6, .0, .1])
- >>> s1.corr(s2, method=histogram_intersection)
- 0.3
- """
- this, other = self.align(other, join="inner", copy=False)
- if len(this) == 0:
- return np.nan
- if method in ["pearson", "spearman", "kendall"] or callable(method):
- return nanops.nancorr(
- this.values, other.values, method=method, min_periods=min_periods
- )
- raise ValueError(
- "method must be either 'pearson', "
- "'spearman', 'kendall', or a callable, "
- f"'{method}' was supplied"
- )
- def cov(
- self,
- other: Series,
- min_periods: int | None = None,
- ddof: int | None = 1,
- ) -> float:
- """
- Compute covariance with Series, excluding missing values.
- Parameters
- ----------
- other : Series
- Series with which to compute the covariance.
- min_periods : int, optional
- Minimum number of observations needed to have a valid result.
- ddof : int, default 1
- Delta degrees of freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- .. versionadded:: 1.1.0
- Returns
- -------
- float
- Covariance between Series and other normalized by N-1
- (unbiased estimator).
- See Also
- --------
- DataFrame.cov : Compute pairwise covariance of columns.
- Examples
- --------
- >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
- >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
- >>> s1.cov(s2)
- -0.01685762652715874
- """
- this, other = self.align(other, join="inner", copy=False)
- if len(this) == 0:
- return np.nan
- return nanops.nancov(
- this.values, other.values, min_periods=min_periods, ddof=ddof
- )
- @doc(
- klass="Series",
- extra_params="",
- other_klass="DataFrame",
- examples=dedent(
- """
- Difference with previous row
- >>> s = pd.Series([1, 1, 2, 3, 5, 8])
- >>> s.diff()
- 0 NaN
- 1 0.0
- 2 1.0
- 3 1.0
- 4 2.0
- 5 3.0
- dtype: float64
- Difference with 3rd previous row
- >>> s.diff(periods=3)
- 0 NaN
- 1 NaN
- 2 NaN
- 3 2.0
- 4 4.0
- 5 6.0
- dtype: float64
- Difference with following row
- >>> s.diff(periods=-1)
- 0 0.0
- 1 -1.0
- 2 -1.0
- 3 -2.0
- 4 -3.0
- 5 NaN
- dtype: float64
- Overflow in input dtype
- >>> s = pd.Series([1, 0], dtype=np.uint8)
- >>> s.diff()
- 0 NaN
- 1 255.0
- dtype: float64"""
- ),
- )
- def diff(self, periods: int = 1) -> Series:
- """
- First discrete difference of element.
- Calculates the difference of a {klass} element compared with another
- element in the {klass} (default is element in previous row).
- Parameters
- ----------
- periods : int, default 1
- Periods to shift for calculating difference, accepts negative
- values.
- {extra_params}
- Returns
- -------
- {klass}
- First differences of the Series.
- See Also
- --------
- {klass}.pct_change: Percent change over given number of periods.
- {klass}.shift: Shift index by desired number of periods with an
- optional time freq.
- {other_klass}.diff: First discrete difference of object.
- Notes
- -----
- For boolean dtypes, this uses :meth:`operator.xor` rather than
- :meth:`operator.sub`.
- The result is calculated according to current dtype in {klass},
- however dtype of the result is always float64.
- Examples
- --------
- {examples}
- """
- result = algorithms.diff(self._values, periods)
- return self._constructor(result, index=self.index).__finalize__(
- self, method="diff"
- )
- def autocorr(self, lag=1) -> float:
- """
- Compute the lag-N autocorrelation.
- This method computes the Pearson correlation between
- the Series and its shifted self.
- Parameters
- ----------
- lag : int, default 1
- Number of lags to apply before performing autocorrelation.
- Returns
- -------
- float
- The Pearson correlation between self and self.shift(lag).
- See Also
- --------
- Series.corr : Compute the correlation between two Series.
- Series.shift : Shift index by desired number of periods.
- DataFrame.corr : Compute pairwise correlation of columns.
- DataFrame.corrwith : Compute pairwise correlation between rows or
- columns of two DataFrame objects.
- Notes
- -----
- If the Pearson correlation is not well defined return 'NaN'.
- Examples
- --------
- >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
- >>> s.autocorr() # doctest: +ELLIPSIS
- 0.10355...
- >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
- -0.99999...
- If the Pearson correlation is not well defined, then 'NaN' is returned.
- >>> s = pd.Series([1, 0, 0, 0])
- >>> s.autocorr()
- nan
- """
- return self.corr(self.shift(lag))
- def dot(self, other):
- """
- Compute the dot product between the Series and the columns of other.
- This method computes the dot product between the Series and another
- one, or the Series and each columns of a DataFrame, or the Series and
- each columns of an array.
- It can also be called using `self @ other` in Python >= 3.5.
- Parameters
- ----------
- other : Series, DataFrame or array-like
- The other object to compute the dot product with its columns.
- Returns
- -------
- scalar, Series or numpy.ndarray
- Return the dot product of the Series and other if other is a
- Series, the Series of the dot product of Series and each rows of
- other if other is a DataFrame or a numpy.ndarray between the Series
- and each columns of the numpy array.
- See Also
- --------
- DataFrame.dot: Compute the matrix product with the DataFrame.
- Series.mul: Multiplication of series and other, element-wise.
- Notes
- -----
- The Series and other has to share the same index if other is a Series
- or a DataFrame.
- Examples
- --------
- >>> s = pd.Series([0, 1, 2, 3])
- >>> other = pd.Series([-1, 2, -3, 4])
- >>> s.dot(other)
- 8
- >>> s @ other
- 8
- >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
- >>> s.dot(df)
- 0 24
- 1 14
- dtype: int64
- >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
- >>> s.dot(arr)
- array([24, 14])
- """
- if isinstance(other, (Series, ABCDataFrame)):
- common = self.index.union(other.index)
- if len(common) > len(self.index) or len(common) > len(other.index):
- raise ValueError("matrices are not aligned")
- left = self.reindex(index=common, copy=False)
- right = other.reindex(index=common, copy=False)
- lvals = left.values
- rvals = right.values
- else:
- lvals = self.values
- rvals = np.asarray(other)
- if lvals.shape[0] != rvals.shape[0]:
- raise Exception(
- f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
- )
- if isinstance(other, ABCDataFrame):
- return self._constructor(
- np.dot(lvals, rvals), index=other.columns
- ).__finalize__(self, method="dot")
- elif isinstance(other, Series):
- return np.dot(lvals, rvals)
- elif isinstance(rvals, np.ndarray):
- return np.dot(lvals, rvals)
- else: # pragma: no cover
- raise TypeError(f"unsupported type: {type(other)}")
- def __matmul__(self, other):
- """
- Matrix multiplication using binary `@` operator in Python>=3.5.
- """
- return self.dot(other)
- def __rmatmul__(self, other):
- """
- Matrix multiplication using binary `@` operator in Python>=3.5.
- """
- return self.dot(np.transpose(other))
- @doc(base.IndexOpsMixin.searchsorted, klass="Series")
- # Signature of "searchsorted" incompatible with supertype "IndexOpsMixin"
- def searchsorted( # type: ignore[override]
- self,
- value: NumpyValueArrayLike | ExtensionArray,
- side: Literal["left", "right"] = "left",
- sorter: NumpySorter = None,
- ) -> npt.NDArray[np.intp] | np.intp:
- return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter)
- # -------------------------------------------------------------------
- # Combination
- def append(
- self, to_append, ignore_index: bool = False, verify_integrity: bool = False
- ):
- """
- Concatenate two or more Series.
- Parameters
- ----------
- to_append : Series or list/tuple of Series
- Series to append with self.
- ignore_index : bool, default False
- If True, the resulting axis will be labeled 0, 1, …, n - 1.
- verify_integrity : bool, default False
- If True, raise Exception on creating index with duplicates.
- Returns
- -------
- Series
- Concatenated Series.
- See Also
- --------
- concat : General function to concatenate DataFrame or Series objects.
- Notes
- -----
- Iteratively appending to a Series can be more computationally intensive
- than a single concatenate. A better solution is to append values to a
- list and then concatenate the list with the original Series all at
- once.
- Examples
- --------
- >>> s1 = pd.Series([1, 2, 3])
- >>> s2 = pd.Series([4, 5, 6])
- >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5])
- >>> s1.append(s2)
- 0 1
- 1 2
- 2 3
- 0 4
- 1 5
- 2 6
- dtype: int64
- >>> s1.append(s3)
- 0 1
- 1 2
- 2 3
- 3 4
- 4 5
- 5 6
- dtype: int64
- With `ignore_index` set to True:
- >>> s1.append(s2, ignore_index=True)
- 0 1
- 1 2
- 2 3
- 3 4
- 4 5
- 5 6
- dtype: int64
- With `verify_integrity` set to True:
- >>> s1.append(s2, verify_integrity=True)
- Traceback (most recent call last):
- ...
- ValueError: Indexes have overlapping values: [0, 1, 2]
- """
- from pandas.core.reshape.concat import concat
- if isinstance(to_append, (list, tuple)):
- to_concat = [self]
- to_concat.extend(to_append)
- else:
- to_concat = [self, to_append]
- if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]):
- msg = "to_append should be a Series or list/tuple of Series, got DataFrame"
- raise TypeError(msg)
- return concat(
- to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
- )
- def _binop(self, other: Series, func, level=None, fill_value=None):
- """
- Perform generic binary operation with optional fill value.
- Parameters
- ----------
- other : Series
- func : binary operator
- fill_value : float or object
- Value to substitute for NA/null values. If both Series are NA in a
- location, the result will be NA regardless of the passed fill value.
- level : int or level name, default None
- Broadcast across a level, matching Index values on the
- passed MultiIndex level.
- Returns
- -------
- Series
- """
- if not isinstance(other, Series):
- raise AssertionError("Other operand must be Series")
- this = self
- if not self.index.equals(other.index):
- this, other = self.align(other, level=level, join="outer", copy=False)
- this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value)
- with np.errstate(all="ignore"):
- result = func(this_vals, other_vals)
- name = ops.get_op_result_name(self, other)
- return this._construct_result(result, name)
- def _construct_result(
- self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable
- ) -> Series | tuple[Series, Series]:
- """
- Construct an appropriately-labelled Series from the result of an op.
- Parameters
- ----------
- result : ndarray or ExtensionArray
- name : Label
- Returns
- -------
- Series
- In the case of __divmod__ or __rdivmod__, a 2-tuple of Series.
- """
- if isinstance(result, tuple):
- # produced by divmod or rdivmod
- res1 = self._construct_result(result[0], name=name)
- res2 = self._construct_result(result[1], name=name)
- # GH#33427 assertions to keep mypy happy
- assert isinstance(res1, Series)
- assert isinstance(res2, Series)
- return (res1, res2)
- # We do not pass dtype to ensure that the Series constructor
- # does inference in the case where `result` has object-dtype.
- out = self._constructor(result, index=self.index)
- out = out.__finalize__(self)
- # Set the result's name after __finalize__ is called because __finalize__
- # would set it back to self.name
- out.name = name
- return out
- @doc(
- generic._shared_docs["compare"],
- """
- Returns
- -------
- Series or DataFrame
- If axis is 0 or 'index' the result will be a Series.
- The resulting index will be a MultiIndex with 'self' and 'other'
- stacked alternately at the inner level.
- If axis is 1 or 'columns' the result will be a DataFrame.
- It will have two columns namely 'self' and 'other'.
- See Also
- --------
- DataFrame.compare : Compare with another DataFrame and show differences.
- Notes
- -----
- Matching NaNs will not appear as a difference.
- Examples
- --------
- >>> s1 = pd.Series(["a", "b", "c", "d", "e"])
- >>> s2 = pd.Series(["a", "a", "c", "b", "e"])
- Align the differences on columns
- >>> s1.compare(s2)
- self other
- 1 b a
- 3 d b
- Stack the differences on indices
- >>> s1.compare(s2, align_axis=0)
- 1 self b
- other a
- 3 self d
- other b
- dtype: object
- Keep all original rows
- >>> s1.compare(s2, keep_shape=True)
- self other
- 0 NaN NaN
- 1 b a
- 2 NaN NaN
- 3 d b
- 4 NaN NaN
- Keep all original rows and also all original values
- >>> s1.compare(s2, keep_shape=True, keep_equal=True)
- self other
- 0 a a
- 1 b a
- 2 c c
- 3 d b
- 4 e e
- """,
- klass=_shared_doc_kwargs["klass"],
- )
- def compare(
- self,
- other: Series,
- align_axis: Axis = 1,
- keep_shape: bool = False,
- keep_equal: bool = False,
- ) -> DataFrame | Series:
- return super().compare(
- other=other,
- align_axis=align_axis,
- keep_shape=keep_shape,
- keep_equal=keep_equal,
- )
- def combine(self, other, func, fill_value=None) -> Series:
- """
- Combine the Series with a Series or scalar according to `func`.
- Combine the Series and `other` using `func` to perform elementwise
- selection for combined Series.
- `fill_value` is assumed when value is missing at some index
- from one of the two objects being combined.
- Parameters
- ----------
- other : Series or scalar
- The value(s) to be combined with the `Series`.
- func : function
- Function that takes two scalars as inputs and returns an element.
- fill_value : scalar, optional
- The value to assume when an index is missing from
- one Series or the other. The default specifies to use the
- appropriate NaN value for the underlying dtype of the Series.
- Returns
- -------
- Series
- The result of combining the Series with the other object.
- See Also
- --------
- Series.combine_first : Combine Series values, choosing the calling
- Series' values first.
- Examples
- --------
- Consider 2 Datasets ``s1`` and ``s2`` containing
- highest clocked speeds of different birds.
- >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
- >>> s1
- falcon 330.0
- eagle 160.0
- dtype: float64
- >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
- >>> s2
- falcon 345.0
- eagle 200.0
- duck 30.0
- dtype: float64
- Now, to combine the two datasets and view the highest speeds
- of the birds across the two datasets
- >>> s1.combine(s2, max)
- duck NaN
- eagle 200.0
- falcon 345.0
- dtype: float64
- In the previous example, the resulting value for duck is missing,
- because the maximum of a NaN and a float is a NaN.
- So, in the example, we set ``fill_value=0``,
- so the maximum value returned will be the value from some dataset.
- >>> s1.combine(s2, max, fill_value=0)
- duck 30.0
- eagle 200.0
- falcon 345.0
- dtype: float64
- """
- if fill_value is None:
- fill_value = na_value_for_dtype(self.dtype, compat=False)
- if isinstance(other, Series):
- # If other is a Series, result is based on union of Series,
- # so do this element by element
- new_index = self.index.union(other.index)
- new_name = ops.get_op_result_name(self, other)
- new_values = np.empty(len(new_index), dtype=object)
- for i, idx in enumerate(new_index):
- lv = self.get(idx, fill_value)
- rv = other.get(idx, fill_value)
- with np.errstate(all="ignore"):
- new_values[i] = func(lv, rv)
- else:
- # Assume that other is a scalar, so apply the function for
- # each element in the Series
- new_index = self.index
- new_values = np.empty(len(new_index), dtype=object)
- with np.errstate(all="ignore"):
- new_values[:] = [func(lv, other) for lv in self._values]
- new_name = self.name
- # try_float=False is to match agg_series
- npvalues = lib.maybe_convert_objects(new_values, try_float=False)
- res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
- return self._constructor(res_values, index=new_index, name=new_name)
- def combine_first(self, other) -> Series:
- """
- Update null elements with value in the same location in 'other'.
- Combine two Series objects by filling null values in one Series with
- non-null values from the other Series. Result index will be the union
- of the two indexes.
- Parameters
- ----------
- other : Series
- The value(s) to be used for filling null values.
- Returns
- -------
- Series
- The result of combining the provided Series with the other object.
- See Also
- --------
- Series.combine : Perform element-wise operation on two Series
- using a given function.
- Examples
- --------
- >>> s1 = pd.Series([1, np.nan])
- >>> s2 = pd.Series([3, 4, 5])
- >>> s1.combine_first(s2)
- 0 1.0
- 1 4.0
- 2 5.0
- dtype: float64
- Null values still persist if the location of that null value
- does not exist in `other`
- >>> s1 = pd.Series({'falcon': np.nan, 'eagle': 160.0})
- >>> s2 = pd.Series({'eagle': 200.0, 'duck': 30.0})
- >>> s1.combine_first(s2)
- duck 30.0
- eagle 160.0
- falcon NaN
- dtype: float64
- """
- new_index = self.index.union(other.index)
- this = self.reindex(new_index, copy=False)
- other = other.reindex(new_index, copy=False)
- if this.dtype.kind == "M" and other.dtype.kind != "M":
- other = to_datetime(other)
- return this.where(notna(this), other)
- def update(self, other) -> None:
- """
- Modify Series in place using values from passed Series.
- Uses non-NA values from passed Series to make updates. Aligns
- on index.
- Parameters
- ----------
- other : Series, or object coercible into Series
- Examples
- --------
- >>> s = pd.Series([1, 2, 3])
- >>> s.update(pd.Series([4, 5, 6]))
- >>> s
- 0 4
- 1 5
- 2 6
- dtype: int64
- >>> s = pd.Series(['a', 'b', 'c'])
- >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
- >>> s
- 0 d
- 1 b
- 2 e
- dtype: object
- >>> s = pd.Series([1, 2, 3])
- >>> s.update(pd.Series([4, 5, 6, 7, 8]))
- >>> s
- 0 4
- 1 5
- 2 6
- dtype: int64
- If ``other`` contains NaNs the corresponding values are not updated
- in the original Series.
- >>> s = pd.Series([1, 2, 3])
- >>> s.update(pd.Series([4, np.nan, 6]))
- >>> s
- 0 4
- 1 2
- 2 6
- dtype: int64
- ``other`` can also be a non-Series object type
- that is coercible into a Series
- >>> s = pd.Series([1, 2, 3])
- >>> s.update([4, np.nan, 6])
- >>> s
- 0 4
- 1 2
- 2 6
- dtype: int64
- >>> s = pd.Series([1, 2, 3])
- >>> s.update({1: 9})
- >>> s
- 0 1
- 1 9
- 2 3
- dtype: int64
- """
- if not isinstance(other, Series):
- other = Series(other)
- other = other.reindex_like(self)
- mask = notna(other)
- self._mgr = self._mgr.putmask(mask=mask, new=other)
- self._maybe_update_cacher()
- # ----------------------------------------------------------------------
- # Reindexing, sorting
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
- def sort_values(
- self,
- axis=0,
- ascending: bool | int | Sequence[bool | int] = True,
- inplace: bool = False,
- kind: str = "quicksort",
- na_position: str = "last",
- ignore_index: bool = False,
- key: ValueKeyFunc = None,
- ):
- """
- Sort by the values.
- Sort a Series in ascending or descending order by some
- criterion.
- Parameters
- ----------
- axis : {0 or 'index'}, default 0
- Axis to direct sorting. The value 'index' is accepted for
- compatibility with DataFrame.sort_values.
- ascending : bool or list of bools, default True
- If True, sort values in ascending order, otherwise descending.
- inplace : bool, default False
- If True, perform operation in-place.
- kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
- Choice of sorting algorithm. See also :func:`numpy.sort` for more
- information. 'mergesort' and 'stable' are the only stable algorithms.
- na_position : {'first' or 'last'}, default 'last'
- Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
- the end.
- ignore_index : bool, default False
- If True, the resulting axis will be labeled 0, 1, …, n - 1.
- .. versionadded:: 1.0.0
- key : callable, optional
- If not None, apply the key function to the series values
- before sorting. This is similar to the `key` argument in the
- builtin :meth:`sorted` function, with the notable difference that
- this `key` function should be *vectorized*. It should expect a
- ``Series`` and return an array-like.
- .. versionadded:: 1.1.0
- Returns
- -------
- Series or None
- Series ordered by values or None if ``inplace=True``.
- See Also
- --------
- Series.sort_index : Sort by the Series indices.
- DataFrame.sort_values : Sort DataFrame by the values along either axis.
- DataFrame.sort_index : Sort DataFrame by indices.
- Examples
- --------
- >>> s = pd.Series([np.nan, 1, 3, 10, 5])
- >>> s
- 0 NaN
- 1 1.0
- 2 3.0
- 3 10.0
- 4 5.0
- dtype: float64
- Sort values ascending order (default behaviour)
- >>> s.sort_values(ascending=True)
- 1 1.0
- 2 3.0
- 4 5.0
- 3 10.0
- 0 NaN
- dtype: float64
- Sort values descending order
- >>> s.sort_values(ascending=False)
- 3 10.0
- 4 5.0
- 2 3.0
- 1 1.0
- 0 NaN
- dtype: float64
- Sort values inplace
- >>> s.sort_values(ascending=False, inplace=True)
- >>> s
- 3 10.0
- 4 5.0
- 2 3.0
- 1 1.0
- 0 NaN
- dtype: float64
- Sort values putting NAs first
- >>> s.sort_values(na_position='first')
- 0 NaN
- 1 1.0
- 2 3.0
- 4 5.0
- 3 10.0
- dtype: float64
- Sort a series of strings
- >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
- >>> s
- 0 z
- 1 b
- 2 d
- 3 a
- 4 c
- dtype: object
- >>> s.sort_values()
- 3 a
- 1 b
- 4 c
- 2 d
- 0 z
- dtype: object
- Sort using a key function. Your `key` function will be
- given the ``Series`` of values and should return an array-like.
- >>> s = pd.Series(['a', 'B', 'c', 'D', 'e'])
- >>> s.sort_values()
- 1 B
- 3 D
- 0 a
- 2 c
- 4 e
- dtype: object
- >>> s.sort_values(key=lambda x: x.str.lower())
- 0 a
- 1 B
- 2 c
- 3 D
- 4 e
- dtype: object
- NumPy ufuncs work well here. For example, we can
- sort by the ``sin`` of the value
- >>> s = pd.Series([-4, -2, 0, 2, 4])
- >>> s.sort_values(key=np.sin)
- 1 -2
- 4 4
- 2 0
- 0 -4
- 3 2
- dtype: int64
- More complicated user-defined functions can be used,
- as long as they expect a Series and return an array-like
- >>> s.sort_values(key=lambda x: (np.tan(x.cumsum())))
- 0 -4
- 3 2
- 4 4
- 1 -2
- 2 0
- dtype: int64
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- # Validate the axis parameter
- self._get_axis_number(axis)
- # GH 5856/5853
- if inplace and self._is_cached:
- raise ValueError(
- "This Series is a view of some other array, to "
- "sort in-place you must create a copy"
- )
- if is_list_like(ascending):
- ascending = cast(Sequence[Union[bool, int]], ascending)
- if len(ascending) != 1:
- raise ValueError(
- f"Length of ascending ({len(ascending)}) must be 1 for Series"
- )
- ascending = ascending[0]
- ascending = validate_ascending(ascending)
- if na_position not in ["first", "last"]:
- raise ValueError(f"invalid na_position: {na_position}")
- # GH 35922. Make sorting stable by leveraging nargsort
- values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
- sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
- result = self._constructor(
- self._values[sorted_index], index=self.index[sorted_index]
- )
- if ignore_index:
- result.index = default_index(len(sorted_index))
- if inplace:
- self._update_inplace(result)
- else:
- return result.__finalize__(self, method="sort_values")
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
- def sort_index(
- self,
- axis=0,
- level=None,
- ascending: bool | int | Sequence[bool | int] = True,
- inplace: bool = False,
- kind: str = "quicksort",
- na_position: str = "last",
- sort_remaining: bool = True,
- ignore_index: bool = False,
- key: IndexKeyFunc = None,
- ):
- """
- Sort Series by index labels.
- Returns a new Series sorted by label if `inplace` argument is
- ``False``, otherwise updates the original series and returns None.
- Parameters
- ----------
- axis : int, default 0
- Axis to direct sorting. This can only be 0 for Series.
- level : int, optional
- If not None, sort on values in specified index level(s).
- ascending : bool or list-like of bools, default True
- Sort ascending vs. descending. When the index is a MultiIndex the
- sort direction can be controlled for each level individually.
- inplace : bool, default False
- If True, perform operation in-place.
- kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
- Choice of sorting algorithm. See also :func:`numpy.sort` for more
- information. 'mergesort' and 'stable' are the only stable algorithms. For
- DataFrames, this option is only applied when sorting on a single
- column or label.
- na_position : {'first', 'last'}, default 'last'
- If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
- Not implemented for MultiIndex.
- sort_remaining : bool, default True
- If True and sorting by level and index is multilevel, sort by other
- levels too (in order) after sorting by specified level.
- ignore_index : bool, default False
- If True, the resulting axis will be labeled 0, 1, …, n - 1.
- .. versionadded:: 1.0.0
- key : callable, optional
- If not None, apply the key function to the index values
- before sorting. This is similar to the `key` argument in the
- builtin :meth:`sorted` function, with the notable difference that
- this `key` function should be *vectorized*. It should expect an
- ``Index`` and return an ``Index`` of the same shape.
- .. versionadded:: 1.1.0
- Returns
- -------
- Series or None
- The original Series sorted by the labels or None if ``inplace=True``.
- See Also
- --------
- DataFrame.sort_index: Sort DataFrame by the index.
- DataFrame.sort_values: Sort DataFrame by the value.
- Series.sort_values : Sort Series by the value.
- Examples
- --------
- >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
- >>> s.sort_index()
- 1 c
- 2 b
- 3 a
- 4 d
- dtype: object
- Sort Descending
- >>> s.sort_index(ascending=False)
- 4 d
- 3 a
- 2 b
- 1 c
- dtype: object
- Sort Inplace
- >>> s.sort_index(inplace=True)
- >>> s
- 1 c
- 2 b
- 3 a
- 4 d
- dtype: object
- By default NaNs are put at the end, but use `na_position` to place
- them at the beginning
- >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
- >>> s.sort_index(na_position='first')
- NaN d
- 1.0 c
- 2.0 b
- 3.0 a
- dtype: object
- Specify index level to sort
- >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
- ... 'baz', 'baz', 'bar', 'bar']),
- ... np.array(['two', 'one', 'two', 'one',
- ... 'two', 'one', 'two', 'one'])]
- >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
- >>> s.sort_index(level=1)
- bar one 8
- baz one 6
- foo one 4
- qux one 2
- bar two 7
- baz two 5
- foo two 3
- qux two 1
- dtype: int64
- Does not sort by remaining levels when sorting by levels
- >>> s.sort_index(level=1, sort_remaining=False)
- qux one 2
- foo one 4
- baz one 6
- bar one 8
- qux two 1
- foo two 3
- baz two 5
- bar two 7
- dtype: int64
- Apply a key function before sorting
- >>> s = pd.Series([1, 2, 3, 4], index=['A', 'b', 'C', 'd'])
- >>> s.sort_index(key=lambda x : x.str.lower())
- A 1
- b 2
- C 3
- d 4
- dtype: int64
- """
- return super().sort_index(
- axis,
- level,
- ascending,
- inplace,
- kind,
- na_position,
- sort_remaining,
- ignore_index,
- key,
- )
- def argsort(self, axis=0, kind="quicksort", order=None) -> Series:
- """
- Return the integer indices that would sort the Series values.
- Override ndarray.argsort. Argsorts the value, omitting NA/null values,
- and places the result in the same locations as the non-NA values.
- Parameters
- ----------
- axis : {0 or "index"}
- Has no effect but is accepted for compatibility with numpy.
- kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
- Choice of sorting algorithm. See :func:`numpy.sort` for more
- information. 'mergesort' and 'stable' are the only stable algorithms.
- order : None
- Has no effect but is accepted for compatibility with numpy.
- Returns
- -------
- Series[np.intp]
- Positions of values within the sort order with -1 indicating
- nan values.
- See Also
- --------
- numpy.ndarray.argsort : Returns the indices that would sort this array.
- """
- values = self._values
- mask = isna(values)
- if mask.any():
- result = np.full(len(self), -1, dtype=np.intp)
- notmask = ~mask
- result[notmask] = np.argsort(values[notmask], kind=kind)
- else:
- result = np.argsort(values, kind=kind)
- res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
- return res.__finalize__(self, method="argsort")
- def nlargest(self, n=5, keep="first") -> Series:
- """
- Return the largest `n` elements.
- Parameters
- ----------
- n : int, default 5
- Return this many descending sorted values.
- keep : {'first', 'last', 'all'}, default 'first'
- When there are duplicate values that cannot all fit in a
- Series of `n` elements:
- - ``first`` : return the first `n` occurrences in order
- of appearance.
- - ``last`` : return the last `n` occurrences in reverse
- order of appearance.
- - ``all`` : keep all occurrences. This can result in a Series of
- size larger than `n`.
- Returns
- -------
- Series
- The `n` largest values in the Series, sorted in decreasing order.
- See Also
- --------
- Series.nsmallest: Get the `n` smallest elements.
- Series.sort_values: Sort Series by values.
- Series.head: Return the first `n` rows.
- Notes
- -----
- Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
- relative to the size of the ``Series`` object.
- Examples
- --------
- >>> countries_population = {"Italy": 59000000, "France": 65000000,
- ... "Malta": 434000, "Maldives": 434000,
- ... "Brunei": 434000, "Iceland": 337000,
- ... "Nauru": 11300, "Tuvalu": 11300,
- ... "Anguilla": 11300, "Montserrat": 5200}
- >>> s = pd.Series(countries_population)
- >>> s
- Italy 59000000
- France 65000000
- Malta 434000
- Maldives 434000
- Brunei 434000
- Iceland 337000
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- Montserrat 5200
- dtype: int64
- The `n` largest elements where ``n=5`` by default.
- >>> s.nlargest()
- France 65000000
- Italy 59000000
- Malta 434000
- Maldives 434000
- Brunei 434000
- dtype: int64
- The `n` largest elements where ``n=3``. Default `keep` value is 'first'
- so Malta will be kept.
- >>> s.nlargest(3)
- France 65000000
- Italy 59000000
- Malta 434000
- dtype: int64
- The `n` largest elements where ``n=3`` and keeping the last duplicates.
- Brunei will be kept since it is the last with value 434000 based on
- the index order.
- >>> s.nlargest(3, keep='last')
- France 65000000
- Italy 59000000
- Brunei 434000
- dtype: int64
- The `n` largest elements where ``n=3`` with all duplicates kept. Note
- that the returned Series has five elements due to the three duplicates.
- >>> s.nlargest(3, keep='all')
- France 65000000
- Italy 59000000
- Malta 434000
- Maldives 434000
- Brunei 434000
- dtype: int64
- """
- return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
- def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
- """
- Return the smallest `n` elements.
- Parameters
- ----------
- n : int, default 5
- Return this many ascending sorted values.
- keep : {'first', 'last', 'all'}, default 'first'
- When there are duplicate values that cannot all fit in a
- Series of `n` elements:
- - ``first`` : return the first `n` occurrences in order
- of appearance.
- - ``last`` : return the last `n` occurrences in reverse
- order of appearance.
- - ``all`` : keep all occurrences. This can result in a Series of
- size larger than `n`.
- Returns
- -------
- Series
- The `n` smallest values in the Series, sorted in increasing order.
- See Also
- --------
- Series.nlargest: Get the `n` largest elements.
- Series.sort_values: Sort Series by values.
- Series.head: Return the first `n` rows.
- Notes
- -----
- Faster than ``.sort_values().head(n)`` for small `n` relative to
- the size of the ``Series`` object.
- Examples
- --------
- >>> countries_population = {"Italy": 59000000, "France": 65000000,
- ... "Brunei": 434000, "Malta": 434000,
- ... "Maldives": 434000, "Iceland": 337000,
- ... "Nauru": 11300, "Tuvalu": 11300,
- ... "Anguilla": 11300, "Montserrat": 5200}
- >>> s = pd.Series(countries_population)
- >>> s
- Italy 59000000
- France 65000000
- Brunei 434000
- Malta 434000
- Maldives 434000
- Iceland 337000
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- Montserrat 5200
- dtype: int64
- The `n` smallest elements where ``n=5`` by default.
- >>> s.nsmallest()
- Montserrat 5200
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- Iceland 337000
- dtype: int64
- The `n` smallest elements where ``n=3``. Default `keep` value is
- 'first' so Nauru and Tuvalu will be kept.
- >>> s.nsmallest(3)
- Montserrat 5200
- Nauru 11300
- Tuvalu 11300
- dtype: int64
- The `n` smallest elements where ``n=3`` and keeping the last
- duplicates. Anguilla and Tuvalu will be kept since they are the last
- with value 11300 based on the index order.
- >>> s.nsmallest(3, keep='last')
- Montserrat 5200
- Anguilla 11300
- Tuvalu 11300
- dtype: int64
- The `n` smallest elements where ``n=3`` with all duplicates kept. Note
- that the returned Series has four elements due to the three duplicates.
- >>> s.nsmallest(3, keep='all')
- Montserrat 5200
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- dtype: int64
- """
- return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
- @doc(
- klass=_shared_doc_kwargs["klass"],
- extra_params=dedent(
- """copy : bool, default True
- Whether to copy underlying data."""
- ),
- examples=dedent(
- """Examples
- --------
- >>> s = pd.Series(
- ... ["A", "B", "A", "C"],
- ... index=[
- ... ["Final exam", "Final exam", "Coursework", "Coursework"],
- ... ["History", "Geography", "History", "Geography"],
- ... ["January", "February", "March", "April"],
- ... ],
- ... )
- >>> s
- Final exam History January A
- Geography February B
- Coursework History March A
- Geography April C
- dtype: object
- In the following example, we will swap the levels of the indices.
- Here, we will swap the levels column-wise, but levels can be swapped row-wise
- in a similar manner. Note that column-wise is the default behaviour.
- By not supplying any arguments for i and j, we swap the last and second to
- last indices.
- >>> s.swaplevel()
- Final exam January History A
- February Geography B
- Coursework March History A
- April Geography C
- dtype: object
- By supplying one argument, we can choose which index to swap the last
- index with. We can for example swap the first index with the last one as
- follows.
- >>> s.swaplevel(0)
- January History Final exam A
- February Geography Final exam B
- March History Coursework A
- April Geography Coursework C
- dtype: object
- We can also define explicitly which indices we want to swap by supplying values
- for both i and j. Here, we for example swap the first and second indices.
- >>> s.swaplevel(0, 1)
- History Final exam January A
- Geography Final exam February B
- History Coursework March A
- Geography Coursework April C
- dtype: object"""
- ),
- )
- def swaplevel(self, i=-2, j=-1, copy=True) -> Series:
- """
- Swap levels i and j in a :class:`MultiIndex`.
- Default is to swap the two innermost levels of the index.
- Parameters
- ----------
- i, j : int or str
- Levels of the indices to be swapped. Can pass level name as string.
- {extra_params}
- Returns
- -------
- {klass}
- {klass} with levels swapped in MultiIndex.
- {examples}
- """
- assert isinstance(self.index, MultiIndex)
- new_index = self.index.swaplevel(i, j)
- return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
- self, method="swaplevel"
- )
- def reorder_levels(self, order) -> Series:
- """
- Rearrange index levels using input order.
- May not drop or duplicate levels.
- Parameters
- ----------
- order : list of int representing new level order
- Reference level by number or key.
- Returns
- -------
- type of caller (new object)
- """
- if not isinstance(self.index, MultiIndex): # pragma: no cover
- raise Exception("Can only reorder levels on a hierarchical axis.")
- result = self.copy()
- assert isinstance(result.index, MultiIndex)
- result.index = result.index.reorder_levels(order)
- return result
- def explode(self, ignore_index: bool = False) -> Series:
- """
- Transform each element of a list-like to a row.
- .. versionadded:: 0.25.0
- Parameters
- ----------
- ignore_index : bool, default False
- If True, the resulting index will be labeled 0, 1, …, n - 1.
- .. versionadded:: 1.1.0
- Returns
- -------
- Series
- Exploded lists to rows; index will be duplicated for these rows.
- See Also
- --------
- Series.str.split : Split string values on specified separator.
- Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
- to produce DataFrame.
- DataFrame.melt : Unpivot a DataFrame from wide format to long format.
- DataFrame.explode : Explode a DataFrame from list-like
- columns to long format.
- Notes
- -----
- This routine will explode list-likes including lists, tuples, sets,
- Series, and np.ndarray. The result dtype of the subset rows will
- be object. Scalars will be returned unchanged, and empty list-likes will
- result in a np.nan for that row. In addition, the ordering of elements in
- the output will be non-deterministic when exploding sets.
- Examples
- --------
- >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
- >>> s
- 0 [1, 2, 3]
- 1 foo
- 2 []
- 3 [3, 4]
- dtype: object
- >>> s.explode()
- 0 1
- 0 2
- 0 3
- 1 foo
- 2 NaN
- 3 3
- 3 4
- dtype: object
- """
- if not len(self) or not is_object_dtype(self):
- result = self.copy()
- return result.reset_index(drop=True) if ignore_index else result
- values, counts = reshape.explode(np.asarray(self._values))
- if ignore_index:
- index = default_index(len(values))
- else:
- index = self.index.repeat(counts)
- return self._constructor(values, index=index, name=self.name)
- def unstack(self, level=-1, fill_value=None) -> DataFrame:
- """
- Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
- Parameters
- ----------
- level : int, str, or list of these, default last level
- Level(s) to unstack, can pass level name.
- fill_value : scalar value, default None
- Value to use when replacing NaN values.
- Returns
- -------
- DataFrame
- Unstacked Series.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4],
- ... index=pd.MultiIndex.from_product([['one', 'two'],
- ... ['a', 'b']]))
- >>> s
- one a 1
- b 2
- two a 3
- b 4
- dtype: int64
- >>> s.unstack(level=-1)
- a b
- one 1 2
- two 3 4
- >>> s.unstack(level=0)
- one two
- a 1 3
- b 2 4
- """
- from pandas.core.reshape.reshape import unstack
- return unstack(self, level, fill_value)
- # ----------------------------------------------------------------------
- # function application
- def map(self, arg, na_action=None) -> Series:
- """
- Map values of Series according to input correspondence.
- Used for substituting each value in a Series with another value,
- that may be derived from a function, a ``dict`` or
- a :class:`Series`.
- Parameters
- ----------
- arg : function, collections.abc.Mapping subclass or Series
- Mapping correspondence.
- na_action : {None, 'ignore'}, default None
- If 'ignore', propagate NaN values, without passing them to the
- mapping correspondence.
- Returns
- -------
- Series
- Same index as caller.
- See Also
- --------
- Series.apply : For applying more complex functions on a Series.
- DataFrame.apply : Apply a function row-/column-wise.
- DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
- Notes
- -----
- When ``arg`` is a dictionary, values in Series that are not in the
- dictionary (as keys) are converted to ``NaN``. However, if the
- dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
- provides a method for default values), then this default is used
- rather than ``NaN``.
- Examples
- --------
- >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
- >>> s
- 0 cat
- 1 dog
- 2 NaN
- 3 rabbit
- dtype: object
- ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
- in the ``dict`` are converted to ``NaN``, unless the dict has a default
- value (e.g. ``defaultdict``):
- >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
- 0 kitten
- 1 puppy
- 2 NaN
- 3 NaN
- dtype: object
- It also accepts a function:
- >>> s.map('I am a {}'.format)
- 0 I am a cat
- 1 I am a dog
- 2 I am a nan
- 3 I am a rabbit
- dtype: object
- To avoid applying the function to missing values (and keep them as
- ``NaN``) ``na_action='ignore'`` can be used:
- >>> s.map('I am a {}'.format, na_action='ignore')
- 0 I am a cat
- 1 I am a dog
- 2 NaN
- 3 I am a rabbit
- dtype: object
- """
- new_values = self._map_values(arg, na_action=na_action)
- return self._constructor(new_values, index=self.index).__finalize__(
- self, method="map"
- )
- def _gotitem(self, key, ndim, subset=None) -> Series:
- """
- Sub-classes to define. Return a sliced object.
- Parameters
- ----------
- key : string / list of selections
- ndim : {1, 2}
- Requested ndim of result.
- subset : object, default None
- Subset to act on.
- """
- return self
- _agg_see_also_doc = dedent(
- """
- See Also
- --------
- Series.apply : Invoke function on a Series.
- Series.transform : Transform function producing a Series with like indexes.
- """
- )
- _agg_examples_doc = dedent(
- """
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- dtype: int64
- >>> s.agg('min')
- 1
- >>> s.agg(['min', 'max'])
- min 1
- max 4
- dtype: int64
- """
- )
- @doc(
- generic._shared_docs["aggregate"],
- klass=_shared_doc_kwargs["klass"],
- axis=_shared_doc_kwargs["axis"],
- see_also=_agg_see_also_doc,
- examples=_agg_examples_doc,
- )
- def aggregate(self, func=None, axis=0, *args, **kwargs):
- # Validate the axis parameter
- self._get_axis_number(axis)
- # if func is None, will switch to user-provided "named aggregation" kwargs
- if func is None:
- func = dict(kwargs.items())
- op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs)
- result = op.agg()
- return result
- agg = aggregate
- @doc(
- _shared_docs["transform"],
- klass=_shared_doc_kwargs["klass"],
- axis=_shared_doc_kwargs["axis"],
- )
- def transform(
- self, func: AggFuncType, axis: Axis = 0, *args, **kwargs
- ) -> DataFrame | Series:
- # Validate axis argument
- self._get_axis_number(axis)
- result = SeriesApply(
- self, func=func, convert_dtype=True, args=args, kwargs=kwargs
- ).transform()
- return result
- def apply(
- self,
- func: AggFuncType,
- convert_dtype: bool = True,
- args: tuple[Any, ...] = (),
- **kwargs,
- ) -> DataFrame | Series:
- """
- Invoke function on values of Series.
- Can be ufunc (a NumPy function that applies to the entire Series)
- or a Python function that only works on single values.
- Parameters
- ----------
- func : function
- Python function or NumPy ufunc to apply.
- convert_dtype : bool, default True
- Try to find better dtype for elementwise function results. If
- False, leave as dtype=object. Note that the dtype is always
- preserved for some extension array dtypes, such as Categorical.
- args : tuple
- Positional arguments passed to func after the series value.
- **kwargs
- Additional keyword arguments passed to func.
- Returns
- -------
- Series or DataFrame
- If func returns a Series object the result will be a DataFrame.
- See Also
- --------
- Series.map: For element-wise operations.
- Series.agg: Only perform aggregating type operations.
- Series.transform: Only perform transforming type operations.
- Notes
- -----
- Functions that mutate the passed object can produce unexpected
- behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
- for more details.
- Examples
- --------
- Create a series with typical summer temperatures for each city.
- >>> s = pd.Series([20, 21, 12],
- ... index=['London', 'New York', 'Helsinki'])
- >>> s
- London 20
- New York 21
- Helsinki 12
- dtype: int64
- Square the values by defining a function and passing it as an
- argument to ``apply()``.
- >>> def square(x):
- ... return x ** 2
- >>> s.apply(square)
- London 400
- New York 441
- Helsinki 144
- dtype: int64
- Square the values by passing an anonymous function as an
- argument to ``apply()``.
- >>> s.apply(lambda x: x ** 2)
- London 400
- New York 441
- Helsinki 144
- dtype: int64
- Define a custom function that needs additional positional
- arguments and pass these additional arguments using the
- ``args`` keyword.
- >>> def subtract_custom_value(x, custom_value):
- ... return x - custom_value
- >>> s.apply(subtract_custom_value, args=(5,))
- London 15
- New York 16
- Helsinki 7
- dtype: int64
- Define a custom function that takes keyword arguments
- and pass these arguments to ``apply``.
- >>> def add_custom_values(x, **kwargs):
- ... for month in kwargs:
- ... x += kwargs[month]
- ... return x
- >>> s.apply(add_custom_values, june=30, july=20, august=25)
- London 95
- New York 96
- Helsinki 87
- dtype: int64
- Use a function from the Numpy library.
- >>> s.apply(np.log)
- London 2.995732
- New York 3.044522
- Helsinki 2.484907
- dtype: float64
- """
- return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
- def _reduce(
- self,
- op,
- name: str,
- *,
- axis=0,
- skipna=True,
- numeric_only=None,
- filter_type=None,
- **kwds,
- ):
- """
- Perform a reduction operation.
- If we have an ndarray as a value, then simply perform the operation,
- otherwise delegate to the object.
- """
- delegate = self._values
- if axis is not None:
- self._get_axis_number(axis)
- if isinstance(delegate, ExtensionArray):
- # dispatch to ExtensionArray interface
- return delegate._reduce(name, skipna=skipna, **kwds)
- else:
- # dispatch to numpy arrays
- if numeric_only:
- kwd_name = "numeric_only"
- if name in ["any", "all"]:
- kwd_name = "bool_only"
- raise NotImplementedError(
- f"Series.{name} does not implement {kwd_name}."
- )
- with np.errstate(all="ignore"):
- return op(delegate, skipna=skipna, **kwds)
- def _reindex_indexer(
- self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, copy: bool
- ) -> Series:
- # Note: new_index is None iff indexer is None
- # if not None, indexer is np.intp
- if indexer is None:
- if copy:
- return self.copy()
- return self
- new_values = algorithms.take_nd(
- self._values, indexer, allow_fill=True, fill_value=None
- )
- return self._constructor(new_values, index=new_index)
- def _needs_reindex_multi(self, axes, method, level) -> bool:
- """
- Check if we do need a multi reindex; this is for compat with
- higher dims.
- """
- return False
- # error: Cannot determine type of 'align'
- @doc(
- NDFrame.align, # type: ignore[has-type]
- klass=_shared_doc_kwargs["klass"],
- axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
- )
- def align(
- self,
- other,
- join="outer",
- axis=None,
- level=None,
- copy=True,
- fill_value=None,
- method=None,
- limit=None,
- fill_axis=0,
- broadcast_axis=None,
- ):
- return super().align(
- other,
- join=join,
- axis=axis,
- level=level,
- copy=copy,
- fill_value=fill_value,
- method=method,
- limit=limit,
- fill_axis=fill_axis,
- broadcast_axis=broadcast_axis,
- )
- def rename(
- self,
- index=None,
- *,
- axis=None,
- copy=True,
- inplace=False,
- level=None,
- errors="ignore",
- ):
- """
- Alter Series index labels or name.
- Function / dict values must be unique (1-to-1). Labels not contained in
- a dict / Series will be left as-is. Extra labels listed don't throw an
- error.
- Alternatively, change ``Series.name`` with a scalar value.
- See the :ref:`user guide <basics.rename>` for more.
- Parameters
- ----------
- axis : {0 or "index"}
- Unused. Accepted for compatibility with DataFrame method only.
- index : scalar, hashable sequence, dict-like or function, optional
- Functions or dict-like are transformations to apply to
- the index.
- Scalar or hashable sequence-like will alter the ``Series.name``
- attribute.
- **kwargs
- Additional keyword arguments passed to the function. Only the
- "inplace" keyword is used.
- Returns
- -------
- Series or None
- Series with index labels or name altered or None if ``inplace=True``.
- See Also
- --------
- DataFrame.rename : Corresponding DataFrame method.
- Series.rename_axis : Set the name of the axis.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3])
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: int64
- >>> s.rename("my_name") # scalar, changes Series.name
- 0 1
- 1 2
- 2 3
- Name: my_name, dtype: int64
- >>> s.rename(lambda x: x ** 2) # function, changes labels
- 0 1
- 1 2
- 4 3
- dtype: int64
- >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
- 0 1
- 3 2
- 5 3
- dtype: int64
- """
- if axis is not None:
- # Make sure we raise if an invalid 'axis' is passed.
- axis = self._get_axis_number(axis)
- if callable(index) or is_dict_like(index):
- return super().rename(
- index, copy=copy, inplace=inplace, level=level, errors=errors
- )
- else:
- return self._set_name(index, inplace=inplace)
- @overload
- def set_axis(
- self, labels, axis: Axis = ..., inplace: Literal[False] = ...
- ) -> Series:
- ...
- @overload
- def set_axis(self, labels, axis: Axis, inplace: Literal[True]) -> None:
- ...
- @overload
- def set_axis(self, labels, *, inplace: Literal[True]) -> None:
- ...
- @overload
- def set_axis(self, labels, axis: Axis = ..., inplace: bool = ...) -> Series | None:
- ...
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
- @Appender(
- """
- Examples
- --------
- >>> s = pd.Series([1, 2, 3])
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: int64
- >>> s.set_axis(['a', 'b', 'c'], axis=0)
- a 1
- b 2
- c 3
- dtype: int64
- """
- )
- @Substitution(
- **_shared_doc_kwargs,
- extended_summary_sub="",
- axis_description_sub="",
- see_also_sub="",
- )
- @Appender(generic.NDFrame.set_axis.__doc__)
- def set_axis(self, labels, axis: Axis = 0, inplace: bool = False):
- return super().set_axis(labels, axis=axis, inplace=inplace)
- # error: Cannot determine type of 'reindex'
- @doc(
- NDFrame.reindex, # type: ignore[has-type]
- klass=_shared_doc_kwargs["klass"],
- axes=_shared_doc_kwargs["axes"],
- optional_labels=_shared_doc_kwargs["optional_labels"],
- optional_axis=_shared_doc_kwargs["optional_axis"],
- )
- def reindex(self, index=None, **kwargs):
- return super().reindex(index=index, **kwargs)
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
- def drop(
- self,
- labels=None,
- axis=0,
- index=None,
- columns=None,
- level=None,
- inplace=False,
- errors="raise",
- ) -> Series:
- """
- Return Series with specified index labels removed.
- Remove elements of a Series based on specifying the index labels.
- When using a multi-index, labels on different levels can be removed
- by specifying the level.
- Parameters
- ----------
- labels : single label or list-like
- Index labels to drop.
- axis : 0, default 0
- Redundant for application on Series.
- index : single label or list-like
- Redundant for application on Series, but 'index' can be used instead
- of 'labels'.
- columns : single label or list-like
- No change is made to the Series; use 'index' or 'labels' instead.
- level : int or level name, optional
- For MultiIndex, level for which the labels will be removed.
- inplace : bool, default False
- If True, do operation inplace and return None.
- errors : {'ignore', 'raise'}, default 'raise'
- If 'ignore', suppress error and only existing labels are dropped.
- Returns
- -------
- Series or None
- Series with specified index labels removed or None if ``inplace=True``.
- Raises
- ------
- KeyError
- If none of the labels are found in the index.
- See Also
- --------
- Series.reindex : Return only specified index labels of Series.
- Series.dropna : Return series without null values.
- Series.drop_duplicates : Return Series with duplicate values removed.
- DataFrame.drop : Drop specified labels from rows or columns.
- Examples
- --------
- >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
- >>> s
- A 0
- B 1
- C 2
- dtype: int64
- Drop labels B en C
- >>> s.drop(labels=['B', 'C'])
- A 0
- dtype: int64
- Drop 2nd level label in MultiIndex Series
- >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
- ... ['speed', 'weight', 'length']],
- ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
- ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
- >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
- ... index=midx)
- >>> s
- lama speed 45.0
- weight 200.0
- length 1.2
- cow speed 30.0
- weight 250.0
- length 1.5
- falcon speed 320.0
- weight 1.0
- length 0.3
- dtype: float64
- >>> s.drop(labels='weight', level=1)
- lama speed 45.0
- length 1.2
- cow speed 30.0
- length 1.5
- falcon speed 320.0
- length 0.3
- dtype: float64
- """
- return super().drop(
- labels=labels,
- axis=axis,
- index=index,
- columns=columns,
- level=level,
- inplace=inplace,
- errors=errors,
- )
- @overload
- def fillna(
- self,
- value=...,
- method: FillnaOptions | None = ...,
- axis: Axis | None = ...,
- inplace: Literal[False] = ...,
- limit=...,
- downcast=...,
- ) -> Series:
- ...
- @overload
- def fillna(
- self,
- value,
- method: FillnaOptions | None,
- axis: Axis | None,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- *,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- value,
- *,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- *,
- method: FillnaOptions | None,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- *,
- axis: Axis | None,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- *,
- method: FillnaOptions | None,
- axis: Axis | None,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- value,
- *,
- axis: Axis | None,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- value,
- method: FillnaOptions | None,
- *,
- inplace: Literal[True],
- limit=...,
- downcast=...,
- ) -> None:
- ...
- @overload
- def fillna(
- self,
- value=...,
- method: FillnaOptions | None = ...,
- axis: Axis | None = ...,
- inplace: bool = ...,
- limit=...,
- downcast=...,
- ) -> Series | None:
- ...
- # error: Cannot determine type of 'fillna'
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"])
- @doc(NDFrame.fillna, **_shared_doc_kwargs) # type: ignore[has-type]
- def fillna(
- self,
- value: object | ArrayLike | None = None,
- method: FillnaOptions | None = None,
- axis=None,
- inplace=False,
- limit=None,
- downcast=None,
- ) -> Series | None:
- return super().fillna(
- value=value,
- method=method,
- axis=axis,
- inplace=inplace,
- limit=limit,
- downcast=downcast,
- )
- def pop(self, item: Hashable) -> Any:
- """
- Return item and drops from series. Raise KeyError if not found.
- Parameters
- ----------
- item : label
- Index of the element that needs to be removed.
- Returns
- -------
- Value that is popped from series.
- Examples
- --------
- >>> ser = pd.Series([1,2,3])
- >>> ser.pop(0)
- 1
- >>> ser
- 1 2
- 2 3
- dtype: int64
- """
- return super().pop(item=item)
- # error: Cannot determine type of 'replace'
- @doc(
- NDFrame.replace, # type: ignore[has-type]
- klass=_shared_doc_kwargs["klass"],
- inplace=_shared_doc_kwargs["inplace"],
- replace_iloc=_shared_doc_kwargs["replace_iloc"],
- )
- def replace(
- self,
- to_replace=None,
- value=None,
- inplace=False,
- limit=None,
- regex=False,
- method="pad",
- ):
- return super().replace(
- to_replace=to_replace,
- value=value,
- inplace=inplace,
- limit=limit,
- regex=regex,
- method=method,
- )
- def _replace_single(self, to_replace, method: str, inplace: bool, limit):
- """
- Replaces values in a Series using the fill method specified when no
- replacement value is given in the replace method
- """
- result = self if inplace else self.copy()
- values = result._values
- mask = missing.mask_missing(values, to_replace)
- if isinstance(values, ExtensionArray):
- # dispatch to the EA's _pad_mask_inplace method
- values._fill_mask_inplace(method, limit, mask)
- else:
- fill_f = missing.get_fill_func(method)
- values, _ = fill_f(values, limit=limit, mask=mask)
- if inplace:
- return
- return result
- # error: Cannot determine type of 'shift'
- @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
- def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series:
- return super().shift(
- periods=periods, freq=freq, axis=axis, fill_value=fill_value
- )
- def memory_usage(self, index: bool = True, deep: bool = False) -> int:
- """
- Return the memory usage of the Series.
- The memory usage can optionally include the contribution of
- the index and of elements of `object` dtype.
- Parameters
- ----------
- index : bool, default True
- Specifies whether to include the memory usage of the Series index.
- deep : bool, default False
- If True, introspect the data deeply by interrogating
- `object` dtypes for system-level memory consumption, and include
- it in the returned value.
- Returns
- -------
- int
- Bytes of memory consumed.
- See Also
- --------
- numpy.ndarray.nbytes : Total bytes consumed by the elements of the
- array.
- DataFrame.memory_usage : Bytes consumed by a DataFrame.
- Examples
- --------
- >>> s = pd.Series(range(3))
- >>> s.memory_usage()
- 152
- Not including the index gives the size of the rest of the data, which
- is necessarily smaller:
- >>> s.memory_usage(index=False)
- 24
- The memory footprint of `object` values is ignored by default:
- >>> s = pd.Series(["a", "b"])
- >>> s.values
- array(['a', 'b'], dtype=object)
- >>> s.memory_usage()
- 144
- >>> s.memory_usage(deep=True)
- 244
- """
- v = self._memory_usage(deep=deep)
- if index:
- v += self.index.memory_usage(deep=deep)
- return v
- def isin(self, values) -> Series:
- """
- Whether elements in Series are contained in `values`.
- Return a boolean Series showing whether each element in the Series
- matches an element in the passed sequence of `values` exactly.
- Parameters
- ----------
- values : set or list-like
- The sequence of values to test. Passing in a single string will
- raise a ``TypeError``. Instead, turn a single string into a
- list of one element.
- Returns
- -------
- Series
- Series of booleans indicating if each element is in values.
- Raises
- ------
- TypeError
- * If `values` is a string
- See Also
- --------
- DataFrame.isin : Equivalent method on DataFrame.
- Examples
- --------
- >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
- ... 'hippo'], name='animal')
- >>> s.isin(['cow', 'lama'])
- 0 True
- 1 True
- 2 True
- 3 False
- 4 True
- 5 False
- Name: animal, dtype: bool
- To invert the boolean values, use the ``~`` operator:
- >>> ~s.isin(['cow', 'lama'])
- 0 False
- 1 False
- 2 False
- 3 True
- 4 False
- 5 True
- Name: animal, dtype: bool
- Passing a single string as ``s.isin('lama')`` will raise an error. Use
- a list of one element instead:
- >>> s.isin(['lama'])
- 0 True
- 1 False
- 2 True
- 3 False
- 4 True
- 5 False
- Name: animal, dtype: bool
- Strings and integers are distinct and are therefore not comparable:
- >>> pd.Series([1]).isin(['1'])
- 0 False
- dtype: bool
- >>> pd.Series([1.1]).isin(['1.1'])
- 0 False
- dtype: bool
- """
- result = algorithms.isin(self._values, values)
- return self._constructor(result, index=self.index).__finalize__(
- self, method="isin"
- )
- def between(self, left, right, inclusive="both") -> Series:
- """
- Return boolean Series equivalent to left <= series <= right.
- This function returns a boolean vector containing `True` wherever the
- corresponding Series element is between the boundary values `left` and
- `right`. NA values are treated as `False`.
- Parameters
- ----------
- left : scalar or list-like
- Left boundary.
- right : scalar or list-like
- Right boundary.
- inclusive : {"both", "neither", "left", "right"}
- Include boundaries. Whether to set each bound as closed or open.
- .. versionchanged:: 1.3.0
- Returns
- -------
- Series
- Series representing whether each element is between left and
- right (inclusive).
- See Also
- --------
- Series.gt : Greater than of series and other.
- Series.lt : Less than of series and other.
- Notes
- -----
- This function is equivalent to ``(left <= ser) & (ser <= right)``
- Examples
- --------
- >>> s = pd.Series([2, 0, 4, 8, np.nan])
- Boundary values are included by default:
- >>> s.between(1, 4)
- 0 True
- 1 False
- 2 True
- 3 False
- 4 False
- dtype: bool
- With `inclusive` set to ``"neither"`` boundary values are excluded:
- >>> s.between(1, 4, inclusive="neither")
- 0 True
- 1 False
- 2 False
- 3 False
- 4 False
- dtype: bool
- `left` and `right` can be any scalar value:
- >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
- >>> s.between('Anna', 'Daniel')
- 0 False
- 1 True
- 2 True
- 3 False
- dtype: bool
- """
- if inclusive is True or inclusive is False:
- warnings.warn(
- "Boolean inputs to the `inclusive` argument are deprecated in "
- "favour of `both` or `neither`.",
- FutureWarning,
- stacklevel=2,
- )
- if inclusive:
- inclusive = "both"
- else:
- inclusive = "neither"
- if inclusive == "both":
- lmask = self >= left
- rmask = self <= right
- elif inclusive == "left":
- lmask = self >= left
- rmask = self < right
- elif inclusive == "right":
- lmask = self > left
- rmask = self <= right
- elif inclusive == "neither":
- lmask = self > left
- rmask = self < right
- else:
- raise ValueError(
- "Inclusive has to be either string of 'both',"
- "'left', 'right', or 'neither'."
- )
- return lmask & rmask
- # ----------------------------------------------------------------------
- # Convert to types that support pd.NA
- def _convert_dtypes(
- self,
- infer_objects: bool = True,
- convert_string: bool = True,
- convert_integer: bool = True,
- convert_boolean: bool = True,
- convert_floating: bool = True,
- ) -> Series:
- input_series = self
- if infer_objects:
- input_series = input_series.infer_objects()
- if is_object_dtype(input_series):
- input_series = input_series.copy()
- if convert_string or convert_integer or convert_boolean or convert_floating:
- inferred_dtype = convert_dtypes(
- input_series._values,
- convert_string,
- convert_integer,
- convert_boolean,
- convert_floating,
- )
- result = input_series.astype(inferred_dtype)
- else:
- result = input_series.copy()
- return result
- # error: Cannot determine type of 'isna'
- @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
- def isna(self) -> Series:
- return generic.NDFrame.isna(self)
- # error: Cannot determine type of 'isna'
- @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
- def isnull(self) -> Series:
- return super().isnull()
- # error: Cannot determine type of 'notna'
- @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
- def notna(self) -> Series:
- return super().notna()
- # error: Cannot determine type of 'notna'
- @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
- def notnull(self) -> Series:
- return super().notnull()
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
- def dropna(self, axis=0, inplace=False, how=None):
- """
- Return a new Series with missing values removed.
- See the :ref:`User Guide <missing_data>` for more on which values are
- considered missing, and how to work with missing data.
- Parameters
- ----------
- axis : {0 or 'index'}, default 0
- There is only one axis to drop values from.
- inplace : bool, default False
- If True, do operation inplace and return None.
- how : str, optional
- Not in use. Kept for compatibility.
- Returns
- -------
- Series or None
- Series with NA entries dropped from it or None if ``inplace=True``.
- See Also
- --------
- Series.isna: Indicate missing values.
- Series.notna : Indicate existing (non-missing) values.
- Series.fillna : Replace missing values.
- DataFrame.dropna : Drop rows or columns which contain NA values.
- Index.dropna : Drop missing indices.
- Examples
- --------
- >>> ser = pd.Series([1., 2., np.nan])
- >>> ser
- 0 1.0
- 1 2.0
- 2 NaN
- dtype: float64
- Drop NA values from a Series.
- >>> ser.dropna()
- 0 1.0
- 1 2.0
- dtype: float64
- Keep the Series with valid entries in the same variable.
- >>> ser.dropna(inplace=True)
- >>> ser
- 0 1.0
- 1 2.0
- dtype: float64
- Empty strings are not considered NA values. ``None`` is considered an
- NA value.
- >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
- >>> ser
- 0 NaN
- 1 2
- 2 NaT
- 3
- 4 None
- 5 I stay
- dtype: object
- >>> ser.dropna()
- 1 2
- 3
- 5 I stay
- dtype: object
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- # Validate the axis parameter
- self._get_axis_number(axis or 0)
- if self._can_hold_na:
- result = remove_na_arraylike(self)
- if inplace:
- self._update_inplace(result)
- else:
- return result
- else:
- if inplace:
- # do nothing
- pass
- else:
- return self.copy()
- # ----------------------------------------------------------------------
- # Time series-oriented methods
- # error: Cannot determine type of 'asfreq'
- @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type]
- def asfreq(
- self,
- freq,
- method=None,
- how: str | None = None,
- normalize: bool = False,
- fill_value=None,
- ) -> Series:
- return super().asfreq(
- freq=freq,
- method=method,
- how=how,
- normalize=normalize,
- fill_value=fill_value,
- )
- # error: Cannot determine type of 'resample'
- @doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type]
- def resample(
- self,
- rule,
- axis=0,
- closed: str | None = None,
- label: str | None = None,
- convention: str = "start",
- kind: str | None = None,
- loffset=None,
- base: int | None = None,
- on=None,
- level=None,
- origin: str | TimestampConvertibleTypes = "start_day",
- offset: TimedeltaConvertibleTypes | None = None,
- ) -> Resampler:
- return super().resample(
- rule=rule,
- axis=axis,
- closed=closed,
- label=label,
- convention=convention,
- kind=kind,
- loffset=loffset,
- base=base,
- on=on,
- level=level,
- origin=origin,
- offset=offset,
- )
- def to_timestamp(self, freq=None, how="start", copy=True) -> Series:
- """
- Cast to DatetimeIndex of Timestamps, at *beginning* of period.
- Parameters
- ----------
- freq : str, default frequency of PeriodIndex
- Desired frequency.
- how : {'s', 'e', 'start', 'end'}
- Convention for converting period to timestamp; start of period
- vs. end.
- copy : bool, default True
- Whether or not to return a copy.
- Returns
- -------
- Series with DatetimeIndex
- """
- new_values = self._values
- if copy:
- new_values = new_values.copy()
- if not isinstance(self.index, PeriodIndex):
- raise TypeError(f"unsupported Type {type(self.index).__name__}")
- new_index = self.index.to_timestamp(freq=freq, how=how)
- return self._constructor(new_values, index=new_index).__finalize__(
- self, method="to_timestamp"
- )
- def to_period(self, freq=None, copy=True) -> Series:
- """
- Convert Series from DatetimeIndex to PeriodIndex.
- Parameters
- ----------
- freq : str, default None
- Frequency associated with the PeriodIndex.
- copy : bool, default True
- Whether or not to return a copy.
- Returns
- -------
- Series
- Series with index converted to PeriodIndex.
- """
- new_values = self._values
- if copy:
- new_values = new_values.copy()
- if not isinstance(self.index, DatetimeIndex):
- raise TypeError(f"unsupported Type {type(self.index).__name__}")
- new_index = self.index.to_period(freq=freq)
- return self._constructor(new_values, index=new_index).__finalize__(
- self, method="to_period"
- )
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
- def ffill(
- self: Series,
- axis: None | Axis = None,
- inplace: bool = False,
- limit: None | int = None,
- downcast=None,
- ) -> Series | None:
- return super().ffill(axis, inplace, limit, downcast)
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
- def bfill(
- self: Series,
- axis: None | Axis = None,
- inplace: bool = False,
- limit: None | int = None,
- downcast=None,
- ) -> Series | None:
- return super().bfill(axis, inplace, limit, downcast)
- @deprecate_nonkeyword_arguments(
- version=None, allowed_args=["self", "lower", "upper"]
- )
- def clip(
- self: Series,
- lower=None,
- upper=None,
- axis: Axis | None = None,
- inplace: bool = False,
- *args,
- **kwargs,
- ) -> Series | None:
- return super().clip(lower, upper, axis, inplace, *args, **kwargs)
- @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"])
- def interpolate(
- self: Series,
- method: str = "linear",
- axis: Axis = 0,
- limit: int | None = None,
- inplace: bool = False,
- limit_direction: str | None = None,
- limit_area: str | None = None,
- downcast: str | None = None,
- **kwargs,
- ) -> Series | None:
- return super().interpolate(
- method,
- axis,
- limit,
- inplace,
- limit_direction,
- limit_area,
- downcast,
- **kwargs,
- )
- @deprecate_nonkeyword_arguments(
- version=None, allowed_args=["self", "cond", "other"]
- )
- def where(
- self,
- cond,
- other=np.nan,
- inplace=False,
- axis=None,
- level=None,
- errors=lib.no_default,
- try_cast=lib.no_default,
- ):
- return super().where(cond, other, inplace, axis, level, errors, try_cast)
- @deprecate_nonkeyword_arguments(
- version=None, allowed_args=["self", "cond", "other"]
- )
- def mask(
- self,
- cond,
- other=np.nan,
- inplace=False,
- axis=None,
- level=None,
- errors=lib.no_default,
- try_cast=lib.no_default,
- ):
- return super().mask(cond, other, inplace, axis, level, errors, try_cast)
- # ----------------------------------------------------------------------
- # Add index
- _AXIS_ORDERS = ["index"]
- _AXIS_LEN = len(_AXIS_ORDERS)
- _info_axis_number = 0
- _info_axis_name = "index"
- index: Index = properties.AxisProperty(
- axis=0, doc="The index (axis labels) of the Series."
- )
- # ----------------------------------------------------------------------
- # Accessor Methods
- # ----------------------------------------------------------------------
- str = CachedAccessor("str", StringMethods)
- dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
- cat = CachedAccessor("cat", CategoricalAccessor)
- plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
- sparse = CachedAccessor("sparse", SparseAccessor)
- # ----------------------------------------------------------------------
- # Add plotting methods to Series
- hist = pandas.plotting.hist_series
- # ----------------------------------------------------------------------
- # Template-Based Arithmetic/Comparison Methods
- def _cmp_method(self, other, op):
- res_name = ops.get_op_result_name(self, other)
- if isinstance(other, Series) and not self._indexed_same(other):
- raise ValueError("Can only compare identically-labeled Series objects")
- lvalues = self._values
- rvalues = extract_array(other, extract_numpy=True, extract_range=True)
- with np.errstate(all="ignore"):
- res_values = ops.comparison_op(lvalues, rvalues, op)
- return self._construct_result(res_values, name=res_name)
- def _logical_method(self, other, op):
- res_name = ops.get_op_result_name(self, other)
- self, other = ops.align_method_SERIES(self, other, align_asobject=True)
- lvalues = self._values
- rvalues = extract_array(other, extract_numpy=True, extract_range=True)
- res_values = ops.logical_op(lvalues, rvalues, op)
- return self._construct_result(res_values, name=res_name)
- def _arith_method(self, other, op):
- self, other = ops.align_method_SERIES(self, other)
- return base.IndexOpsMixin._arith_method(self, other, op)
- Series._add_numeric_operations()
- # Add arithmetic!
- ops.add_flex_arithmetic_methods(Series)