/pandas/core/series.py
Python | 4474 lines | 4459 code | 10 blank | 5 comment | 20 complexity | be421d2813837526f7340c9768686d1b MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- """
- Data structure for 1-dimensional cross-sectional and time series data
- """
- from __future__ import division
- from collections import OrderedDict
- from textwrap import dedent
- import warnings
- import numpy as np
- from pandas._libs import iNaT, index as libindex, lib, tslibs
- import pandas.compat as compat
- from pandas.compat import PY36, StringIO, u, zip
- from pandas.compat.numpy import function as nv
- from pandas.util._decorators import Appender, Substitution, deprecate
- from pandas.util._validators import validate_bool_kwarg
- from pandas.core.dtypes.common import (
- _is_unorderable_exception, ensure_platform_int, is_bool,
- is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like,
- is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
- is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
- from pandas.core.dtypes.generic import (
- ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
- ABCSparseArray, ABCSparseSeries)
- from pandas.core.dtypes.missing import (
- isna, na_value_for_dtype, notna, remove_na_arraylike)
- from pandas.core import algorithms, base, generic, nanops, ops
- from pandas.core.accessor import CachedAccessor
- from pandas.core.arrays import ExtensionArray, SparseArray
- from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
- from pandas.core.arrays.sparse import SparseAccessor
- import pandas.core.common as com
- from pandas.core.config import get_option
- from pandas.core.index import (
- Float64Index, Index, InvalidIndexError, MultiIndex, ensure_index)
- from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
- import pandas.core.indexes.base as ibase
- from pandas.core.indexes.datetimes import DatetimeIndex
- from pandas.core.indexes.period import PeriodIndex
- from pandas.core.indexes.timedeltas import TimedeltaIndex
- from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
- from pandas.core.internals import SingleBlockManager
- from pandas.core.internals.construction import sanitize_array
- from pandas.core.strings import StringMethods
- from pandas.core.tools.datetimes import to_datetime
- import pandas.io.formats.format as fmt
- from pandas.io.formats.terminal import get_terminal_size
- import pandas.plotting._core as gfx
- # pylint: disable=E1101,E1103
- # pylint: disable=W0703,W0622,W0613,W0201
- __all__ = ['Series']
- _shared_doc_kwargs = dict(
- axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
- axis="""axis : {0 or 'index'}
- Parameter needed for compatibility with DataFrame.""",
- inplace="""inplace : boolean, default False
- If True, performs operation inplace and returns None.""",
- unique='np.ndarray', duplicated='Series',
- optional_by='', optional_mapper='', optional_labels='', optional_axis='',
- versionadded_to_excel='\n .. versionadded:: 0.20.0\n')
- # see gh-16971
- def remove_na(arr):
- """
- Remove null values from array like structure.
- .. deprecated:: 0.21.0
- Use s[s.notnull()] instead.
- """
- warnings.warn("remove_na is deprecated and is a private "
- "function. Do not use.", FutureWarning, stacklevel=2)
- return remove_na_arraylike(arr)
- def _coerce_method(converter):
- """
- Install the scalar coercion methods.
- """
- def wrapper(self):
- if len(self) == 1:
- return converter(self.iloc[0])
- raise TypeError("cannot convert the series to "
- "{0}".format(str(converter)))
- wrapper.__name__ = "__{name}__".format(name=converter.__name__)
- return wrapper
- # ----------------------------------------------------------------------
- # Series class
- class Series(base.IndexOpsMixin, generic.NDFrame):
- """
- One-dimensional ndarray with axis labels (including time series).
- Labels need not be unique but must be a hashable type. The object
- supports both integer- and label-based indexing and provides a host of
- methods for performing operations involving the index. Statistical
- methods from ndarray have been overridden to automatically exclude
- missing data (currently represented as NaN).
- Operations between Series (+, -, /, *, **) align values based on their
- associated index values-- they need not be the same length. The result
- index will be the sorted union of the two indexes.
- Parameters
- ----------
- data : array-like, Iterable, dict, or scalar value
- Contains data stored in Series.
- .. versionchanged :: 0.23.0
- If data is a dict, argument order is maintained for Python 3.6
- and later.
- index : array-like or Index (1d)
- Values must be hashable and have the same length as `data`.
- Non-unique index values are allowed. Will default to
- RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
- sequence are used, the index will override the keys found in the
- dict.
- dtype : str, numpy.dtype, or ExtensionDtype, optional
- Data type for the output Series. If not specified, this will be
- inferred from `data`.
- See the :ref:`user guide <basics.dtypes>` for more usages.
- copy : bool, default False
- Copy input data.
- """
- _metadata = ['name']
- _accessors = {'dt', 'cat', 'str', 'sparse'}
- # tolist is not actually deprecated, just suppressed in the __dir__
- _deprecations = generic.NDFrame._deprecations | frozenset(
- ['asobject', 'reshape', 'get_value', 'set_value',
- 'from_csv', 'valid', 'tolist'])
- # Override cache_readonly bc Series is mutable
- hasnans = property(base.IndexOpsMixin.hasnans.func,
- doc=base.IndexOpsMixin.hasnans.__doc__)
- # ----------------------------------------------------------------------
- # Constructors
- def __init__(self, data=None, index=None, dtype=None, name=None,
- copy=False, fastpath=False):
- # we are called internally, so short-circuit
- if fastpath:
- # data is an ndarray, index is defined
- if not isinstance(data, SingleBlockManager):
- data = SingleBlockManager(data, index, fastpath=True)
- if copy:
- data = data.copy()
- if index is None:
- index = data.index
- else:
- if index is not None:
- index = ensure_index(index)
- if data is None:
- data = {}
- if dtype is not None:
- dtype = self._validate_dtype(dtype)
- if isinstance(data, MultiIndex):
- raise NotImplementedError("initializing a Series from a "
- "MultiIndex is not supported")
- elif isinstance(data, Index):
- if name is None:
- name = data.name
- if dtype is not None:
- # astype copies
- data = data.astype(dtype)
- else:
- # need to copy to avoid aliasing issues
- data = data._values.copy()
- if (isinstance(data, ABCDatetimeIndex) and
- data.tz is not None):
- # GH#24096 need copy to be deep for datetime64tz case
- # TODO: See if we can avoid these copies
- data = data._values.copy(deep=True)
- copy = False
- elif isinstance(data, np.ndarray):
- pass
- elif isinstance(data, (ABCSeries, ABCSparseSeries)):
- if name is None:
- name = data.name
- if index is None:
- index = data.index
- else:
- data = data.reindex(index, copy=copy)
- data = data._data
- elif isinstance(data, dict):
- data, index = self._init_dict(data, index, dtype)
- dtype = None
- copy = False
- elif isinstance(data, SingleBlockManager):
- if index is None:
- index = data.index
- elif not data.index.equals(index) or copy:
- # GH#19275 SingleBlockManager input should only be called
- # internally
- raise AssertionError('Cannot pass both SingleBlockManager '
- '`data` argument and a different '
- '`index` argument. `copy` must '
- 'be False.')
- elif is_extension_array_dtype(data):
- pass
- elif isinstance(data, (set, frozenset)):
- raise TypeError("{0!r} type is unordered"
- "".format(data.__class__.__name__))
- # If data is Iterable but not list-like, consume into list.
- elif (isinstance(data, compat.Iterable)
- and not isinstance(data, compat.Sized)):
- data = list(data)
- else:
- # handle sparse passed here (and force conversion)
- if isinstance(data, ABCSparseArray):
- data = data.to_dense()
- if index is None:
- if not is_list_like(data):
- data = [data]
- index = ibase.default_index(len(data))
- elif is_list_like(data):
- # a scalar numpy array is list-like but doesn't
- # have a proper length
- try:
- if len(index) != len(data):
- raise ValueError(
- 'Length of passed values is {val}, '
- 'index implies {ind}'
- .format(val=len(data), ind=len(index)))
- except TypeError:
- pass
- # create/copy the manager
- if isinstance(data, SingleBlockManager):
- if dtype is not None:
- data = data.astype(dtype=dtype, errors='ignore',
- copy=copy)
- elif copy:
- data = data.copy()
- else:
- data = sanitize_array(data, index, dtype, copy,
- raise_cast_failure=True)
- data = SingleBlockManager(data, index, fastpath=True)
- generic.NDFrame.__init__(self, data, fastpath=True)
- self.name = name
- self._set_axis(0, index, fastpath=True)
- def _init_dict(self, data, index=None, dtype=None):
- """
- Derive the "_data" and "index" attributes of a new Series from a
- dictionary input.
- Parameters
- ----------
- data : dict or dict-like
- Data used to populate the new Series
- index : Index or index-like, default None
- index for the new Series: if None, use dict keys
- dtype : dtype, default None
- dtype for the new Series: if None, infer from data
- Returns
- -------
- _data : BlockManager for the new Series
- index : index for the new Series
- """
- # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
- # raises KeyError), so we iterate the entire dict, and align
- if data:
- keys, values = zip(*compat.iteritems(data))
- values = list(values)
- elif index is not None:
- # fastpath for Series(data=None). Just use broadcasting a scalar
- # instead of reindexing.
- values = na_value_for_dtype(dtype)
- keys = index
- else:
- keys, values = [], []
- # Input is now list-like, so rely on "standard" construction:
- s = Series(values, index=keys, dtype=dtype)
- # Now we just make sure the order is respected, if any
- if data and index is not None:
- s = s.reindex(index, copy=False)
- elif not PY36 and not isinstance(data, OrderedDict) and data:
- # Need the `and data` to avoid sorting Series(None, index=[...])
- # since that isn't really dict-like
- try:
- s = s.sort_index()
- except TypeError:
- pass
- return s._data, s.index
- @classmethod
- def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
- fastpath=False):
- """
- Construct Series from array.
- .. deprecated :: 0.23.0
- Use pd.Series(..) constructor instead.
- """
- warnings.warn("'from_array' is deprecated and will be removed in a "
- "future version. Please use the pd.Series(..) "
- "constructor instead.", FutureWarning, stacklevel=2)
- if isinstance(arr, ABCSparseArray):
- from pandas.core.sparse.series import SparseSeries
- cls = SparseSeries
- return cls(arr, index=index, name=name, dtype=dtype,
- copy=copy, fastpath=fastpath)
- # ----------------------------------------------------------------------
- @property
- def _constructor(self):
- return Series
- @property
- def _constructor_expanddim(self):
- from pandas.core.frame import DataFrame
- return DataFrame
- # types
- @property
- def _can_hold_na(self):
- return self._data._can_hold_na
- _index = None
- def _set_axis(self, axis, labels, fastpath=False):
- """
- Override generic, we want to set the _typ here.
- """
- if not fastpath:
- labels = ensure_index(labels)
- is_all_dates = labels.is_all_dates
- if is_all_dates:
- if not isinstance(labels,
- (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
- try:
- labels = DatetimeIndex(labels)
- # need to set here because we changed the index
- if fastpath:
- self._data.set_axis(axis, labels)
- except (tslibs.OutOfBoundsDatetime, ValueError):
- # labels may exceeds datetime bounds,
- # or not be a DatetimeIndex
- pass
- self._set_subtyp(is_all_dates)
- object.__setattr__(self, '_index', labels)
- if not fastpath:
- self._data.set_axis(axis, labels)
- def _set_subtyp(self, is_all_dates):
- if is_all_dates:
- object.__setattr__(self, '_subtyp', 'time_series')
- else:
- object.__setattr__(self, '_subtyp', 'series')
- def _update_inplace(self, result, **kwargs):
- # we want to call the generic version and not the IndexOpsMixin
- return generic.NDFrame._update_inplace(self, result, **kwargs)
- @property
- def name(self):
- """
- Return name of the Series.
- """
- return self._name
- @name.setter
- def name(self, value):
- if value is not None and not is_hashable(value):
- raise TypeError('Series.name must be a hashable type')
- object.__setattr__(self, '_name', value)
- # ndarray compatibility
- @property
- def dtype(self):
- """
- Return the dtype object of the underlying data.
- """
- return self._data.dtype
- @property
- def dtypes(self):
- """
- Return the dtype object of the underlying data.
- """
- return self._data.dtype
- @property
- def ftype(self):
- """
- Return if the data is sparse|dense.
- """
- return self._data.ftype
- @property
- def ftypes(self):
- """
- Return if the data is sparse|dense.
- """
- return self._data.ftype
- @property
- def values(self):
- """
- Return Series as ndarray or ndarray-like depending on the dtype.
- .. warning::
- We recommend using :attr:`Series.array` or
- :meth:`Series.to_numpy`, depending on whether you need
- a reference to the underlying data or a NumPy array.
- Returns
- -------
- numpy.ndarray or ndarray-like
- See Also
- --------
- Series.array : Reference to the underlying data.
- Series.to_numpy : A NumPy array representing the underlying data.
- Examples
- --------
- >>> pd.Series([1, 2, 3]).values
- array([1, 2, 3])
- >>> pd.Series(list('aabc')).values
- array(['a', 'a', 'b', 'c'], dtype=object)
- >>> pd.Series(list('aabc')).astype('category').values
- [a, a, b, c]
- Categories (3, object): [a, b, c]
- Timezone aware datetime data is converted to UTC:
- >>> pd.Series(pd.date_range('20130101', periods=3,
- ... tz='US/Eastern')).values
- array(['2013-01-01T05:00:00.000000000',
- '2013-01-02T05:00:00.000000000',
- '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
- """
- return self._data.external_values()
- @property
- def _values(self):
- """
- Return the internal repr of this data.
- """
- return self._data.internal_values()
- def _formatting_values(self):
- """
- Return the values that can be formatted (used by SeriesFormatter
- and DataFrameFormatter).
- """
- return self._data.formatting_values()
- def get_values(self):
- """
- Same as values (but handles sparseness conversions); is a view.
- """
- return self._data.get_values()
- @property
- def asobject(self):
- """
- Return object Series which contains boxed values.
- .. deprecated :: 0.23.0
- Use ``astype(object)`` instead.
- *this is an internal non-public method*
- """
- warnings.warn("'asobject' is deprecated. Use 'astype(object)'"
- " instead", FutureWarning, stacklevel=2)
- return self.astype(object).values
- # ops
- def ravel(self, order='C'):
- """
- Return the flattened underlying data as an ndarray.
- Returns
- -------
- numpy.ndarray or ndarray-like
- Flattened data of the Series.
- See Also
- --------
- numpy.ndarray.ravel
- """
- return self._values.ravel(order=order)
- def compress(self, condition, *args, **kwargs):
- """
- Return selected slices of an array along given axis as a Series.
- .. deprecated:: 0.24.0
- See Also
- --------
- numpy.ndarray.compress
- """
- msg = ("Series.compress(condition) is deprecated. "
- "Use 'Series[condition]' or "
- "'np.asarray(series).compress(condition)' instead.")
- warnings.warn(msg, FutureWarning, stacklevel=2)
- nv.validate_compress(args, kwargs)
- return self[condition]
- def nonzero(self):
- """
- Return the *integer* indices of the elements that are non-zero.
- .. deprecated:: 0.24.0
- Please use .to_numpy().nonzero() as a replacement.
- This method is equivalent to calling `numpy.nonzero` on the
- series data. For compatibility with NumPy, the return value is
- the same (a tuple with an array of indices for each dimension),
- but it will always be a one-item tuple because series only have
- one dimension.
- See Also
- --------
- numpy.nonzero
- Examples
- --------
- >>> s = pd.Series([0, 3, 0, 4])
- >>> s.nonzero()
- (array([1, 3]),)
- >>> s.iloc[s.nonzero()[0]]
- 1 3
- 3 4
- dtype: int64
- >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd'])
- # same return although index of s is different
- >>> s.nonzero()
- (array([1, 3]),)
- >>> s.iloc[s.nonzero()[0]]
- b 3
- d 4
- dtype: int64
- """
- msg = ("Series.nonzero() is deprecated "
- "and will be removed in a future version."
- "Use Series.to_numpy().nonzero() instead")
- warnings.warn(msg, FutureWarning, stacklevel=2)
- return self._values.nonzero()
- def put(self, *args, **kwargs):
- """
- Apply the `put` method to its `values` attribute if it has one.
- See Also
- --------
- numpy.ndarray.put
- """
- self._values.put(*args, **kwargs)
- def __len__(self):
- """
- Return the length of the Series.
- """
- return len(self._data)
- def view(self, dtype=None):
- """
- Create a new view of the Series.
- This function will return a new Series with a view of the same
- underlying values in memory, optionally reinterpreted with a new data
- type. The new data type must preserve the same size in bytes as to not
- cause index misalignment.
- Parameters
- ----------
- dtype : data type
- Data type object or one of their string representations.
- Returns
- -------
- Series
- A new Series object as a view of the same data in memory.
- See Also
- --------
- numpy.ndarray.view : Equivalent numpy function to create a new view of
- the same data in memory.
- Notes
- -----
- Series are instantiated with ``dtype=float64`` by default. While
- ``numpy.ndarray.view()`` will return a view with the same data type as
- the original array, ``Series.view()`` (without specified dtype)
- will try using ``float64`` and may fail if the original data type size
- in bytes is not the same.
- Examples
- --------
- >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
- >>> s
- 0 -2
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- The 8 bit signed integer representation of `-1` is `0b11111111`, but
- the same bytes represent 255 if read as an 8 bit unsigned integer:
- >>> us = s.view('uint8')
- >>> us
- 0 254
- 1 255
- 2 0
- 3 1
- 4 2
- dtype: uint8
- The views share the same underlying values:
- >>> us[0] = 128
- >>> s
- 0 -128
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- """
- return self._constructor(self._values.view(dtype),
- index=self.index).__finalize__(self)
- # ----------------------------------------------------------------------
- # NDArray Compat
- def __array__(self, dtype=None):
- """
- Return the values as a NumPy array.
- Users should not call this directly. Rather, it is invoked by
- :func:`numpy.array` and :func:`numpy.asarray`.
- Parameters
- ----------
- dtype : str or numpy.dtype, optional
- The dtype to use for the resulting NumPy array. By default,
- the dtype is inferred from the data.
- Returns
- -------
- numpy.ndarray
- The values in the series converted to a :class:`numpy.ndarary`
- with the specified `dtype`.
- See Also
- --------
- array : Create a new array from data.
- Series.array : Zero-copy view to the array backing the Series.
- Series.to_numpy : Series method for similar behavior.
- Examples
- --------
- >>> ser = pd.Series([1, 2, 3])
- >>> np.asarray(ser)
- array([1, 2, 3])
- For timezone-aware data, the timezones may be retained with
- ``dtype='object'``
- >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
- >>> np.asarray(tzser, dtype="object")
- array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
- Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
- dtype=object)
- Or the values may be localized to UTC and the tzinfo discared with
- ``dtype='datetime64[ns]'``
- >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
- array(['1999-12-31T23:00:00.000000000', ...],
- dtype='datetime64[ns]')
- """
- if (dtype is None and isinstance(self.array, ABCDatetimeArray)
- and getattr(self.dtype, 'tz', None)):
- msg = (
- "Converting timezone-aware DatetimeArray to timezone-naive "
- "ndarray with 'datetime64[ns]' dtype. In the future, this "
- "will return an ndarray with 'object' dtype where each "
- "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t"
- "To accept the future behavior, pass 'dtype=object'.\n\t"
- "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'."
- )
- warnings.warn(msg, FutureWarning, stacklevel=3)
- dtype = 'M8[ns]'
- return np.asarray(self.array, dtype)
- def __array_wrap__(self, result, context=None):
- """
- Gets called after a ufunc.
- """
- return self._constructor(result, index=self.index,
- copy=False).__finalize__(self)
- def __array_prepare__(self, result, context=None):
- """
- Gets called prior to a ufunc.
- """
- # nice error message for non-ufunc types
- if (context is not None and
- (not isinstance(self._values, (np.ndarray, ExtensionArray))
- or isinstance(self._values, Categorical))):
- obj = context[1][0]
- raise TypeError("{obj} with dtype {dtype} cannot perform "
- "the numpy op {op}".format(
- obj=type(obj).__name__,
- dtype=getattr(obj, 'dtype', None),
- op=context[0].__name__))
- return result
- # ----------------------------------------------------------------------
- # Unary Methods
- @property
- def real(self):
- """
- Return the real value of vector.
- """
- return self.values.real
- @real.setter
- def real(self, v):
- self.values.real = v
- @property
- def imag(self):
- """
- Return imag value of vector.
- """
- return self.values.imag
- @imag.setter
- def imag(self, v):
- self.values.imag = v
- # coercion
- __float__ = _coerce_method(float)
- __long__ = _coerce_method(int)
- __int__ = _coerce_method(int)
- # ----------------------------------------------------------------------
- def _unpickle_series_compat(self, state):
- if isinstance(state, dict):
- self._data = state['_data']
- self.name = state['name']
- self.index = self._data.index
- elif isinstance(state, tuple):
- # < 0.12 series pickle
- nd_state, own_state = state
- # recreate the ndarray
- data = np.empty(nd_state[1], dtype=nd_state[2])
- np.ndarray.__setstate__(data, nd_state)
- # backwards compat
- index, name = own_state[0], None
- if len(own_state) > 1:
- name = own_state[1]
- # recreate
- self._data = SingleBlockManager(data, index, fastpath=True)
- self._index = index
- self.name = name
- else:
- raise Exception("cannot unpickle legacy formats -> [%s]" % state)
- # indexers
- @property
- def axes(self):
- """
- Return a list of the row axis labels.
- """
- return [self.index]
- def _ixs(self, i, axis=0):
- """
- Return the i-th value or values in the Series by location.
- Parameters
- ----------
- i : int, slice, or sequence of integers
- Returns
- -------
- scalar (int) or Series (slice, sequence)
- """
- try:
- # dispatch to the values if we need
- values = self._values
- if isinstance(values, np.ndarray):
- return libindex.get_value_at(values, i)
- else:
- return values[i]
- except IndexError:
- raise
- except Exception:
- if isinstance(i, slice):
- indexer = self.index._convert_slice_indexer(i, kind='iloc')
- return self._get_values(indexer)
- else:
- label = self.index[i]
- if isinstance(label, Index):
- return self.take(i, axis=axis, convert=True)
- else:
- return libindex.get_value_at(self, i)
- @property
- def _is_mixed_type(self):
- return False
- def _slice(self, slobj, axis=0, kind=None):
- slobj = self.index._convert_slice_indexer(slobj,
- kind=kind or 'getitem')
- return self._get_values(slobj)
- def __getitem__(self, key):
- key = com.apply_if_callable(key, self)
- try:
- result = self.index.get_value(self, key)
- if not is_scalar(result):
- if is_list_like(result) and not isinstance(result, Series):
- # we need to box if loc of the key isn't scalar here
- # otherwise have inline ndarray/lists
- try:
- if not is_scalar(self.index.get_loc(key)):
- result = self._constructor(
- result, index=[key] * len(result),
- dtype=self.dtype).__finalize__(self)
- except KeyError:
- pass
- return result
- except InvalidIndexError:
- pass
- except (KeyError, ValueError):
- if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
- # kludge
- pass
- elif key is Ellipsis:
- return self
- elif com.is_bool_indexer(key):
- pass
- else:
- # we can try to coerce the indexer (or this will raise)
- new_key = self.index._convert_scalar_indexer(key,
- kind='getitem')
- if type(new_key) != type(key):
- return self.__getitem__(new_key)
- raise
- except Exception:
- raise
- if is_iterator(key):
- key = list(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- return self._get_with(key)
- def _get_with(self, key):
- # other: fancy integer or otherwise
- if isinstance(key, slice):
- indexer = self.index._convert_slice_indexer(key, kind='getitem')
- return self._get_values(indexer)
- elif isinstance(key, ABCDataFrame):
- raise TypeError('Indexing a Series with DataFrame is not '
- 'supported, use the appropriate DataFrame column')
- elif isinstance(key, tuple):
- try:
- return self._get_values_tuple(key)
- except Exception:
- if len(key) == 1:
- key = key[0]
- if isinstance(key, slice):
- return self._get_values(key)
- raise
- # pragma: no cover
- if not isinstance(key, (list, np.ndarray, Series, Index)):
- key = list(key)
- if isinstance(key, Index):
- key_type = key.inferred_type
- else:
- key_type = lib.infer_dtype(key, skipna=False)
- if key_type == 'integer':
- if self.index.is_integer() or self.index.is_floating():
- return self.loc[key]
- else:
- return self._get_values(key)
- elif key_type == 'boolean':
- return self._get_values(key)
- try:
- # handle the dup indexing case (GH 4246)
- if isinstance(key, (list, tuple)):
- return self.loc[key]
- return self.reindex(key)
- except Exception:
- # [slice(0, 5, None)] will break if you convert to ndarray,
- # e.g. as requested by np.median
- # hack
- if isinstance(key[0], slice):
- return self._get_values(key)
- raise
- def _get_values_tuple(self, key):
- # mpl hackaround
- if com._any_none(*key):
- return self._get_values(key)
- if not isinstance(self.index, MultiIndex):
- raise ValueError('Can only tuple-index with a MultiIndex')
- # If key is contained, would have returned by now
- indexer, new_index = self.index.get_loc_level(key)
- return self._constructor(self._values[indexer],
- index=new_index).__finalize__(self)
- def _get_values(self, indexer):
- try:
- return self._constructor(self._data.get_slice(indexer),
- fastpath=True).__finalize__(self)
- except Exception:
- return self._values[indexer]
- def __setitem__(self, key, value):
- key = com.apply_if_callable(key, self)
- def setitem(key, value):
- try:
- self._set_with_engine(key, value)
- return
- except com.SettingWithCopyError:
- raise
- except (KeyError, ValueError):
- values = self._values
- if (is_integer(key) and
- not self.index.inferred_type == 'integer'):
- values[key] = value
- return
- elif key is Ellipsis:
- self[:] = value
- return
- elif com.is_bool_indexer(key):
- pass
- elif is_timedelta64_dtype(self.dtype):
- # reassign a null value to iNaT
- if isna(value):
- value = iNaT
- try:
- self.index._engine.set_value(self._values, key,
- value)
- return
- except TypeError:
- pass
- self.loc[key] = value
- return
- except TypeError as e:
- if (isinstance(key, tuple) and
- not isinstance(self.index, MultiIndex)):
- raise ValueError("Can only tuple-index with a MultiIndex")
- # python 3 type errors should be raised
- if _is_unorderable_exception(e):
- raise IndexError(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- try:
- self._where(~key, value, inplace=True)
- return
- except InvalidIndexError:
- pass
- self._set_with(key, value)
- # do the setitem
- cacher_needs_updating = self._check_is_chained_assignment_possible()
- setitem(key, value)
- if cacher_needs_updating:
- self._maybe_update_cacher()
- def _set_with_engine(self, key, value):
- values = self._values
- try:
- self.index._engine.set_value(values, key, value)
- return
- except KeyError:
- values[self.index.get_loc(key)] = value
- return
- def _set_with(self, key, value):
- # other: fancy integer or otherwise
- if isinstance(key, slice):
- indexer = self.index._convert_slice_indexer(key, kind='getitem')
- return self._set_values(indexer, value)
- else:
- if isinstance(key, tuple):
- try:
- self._set_values(key, value)
- except Exception:
- pass
- if is_scalar(key):
- key = [key]
- elif not isinstance(key, (list, Series, np.ndarray)):
- try:
- key = list(key)
- except Exception:
- key = [key]
- if isinstance(key, Index):
- key_type = key.inferred_type
- else:
- key_type = lib.infer_dtype(key, skipna=False)
- if key_type == 'integer':
- if self.index.inferred_type == 'integer':
- self._set_labels(key, value)
- else:
- return self._set_values(key, value)
- elif key_type == 'boolean':
- self._set_values(key.astype(np.bool_), value)
- else:
- self._set_labels(key, value)
- def _set_labels(self, key, value):
- if isinstance(key, Index):
- key = key.values
- else:
- key = com.asarray_tuplesafe(key)
- indexer = self.index.get_indexer(key)
- mask = indexer == -1
- if mask.any():
- raise ValueError('%s not contained in the index' % str(key[mask]))
- self._set_values(indexer, value)
- def _set_values(self, key, value):
- if isinstance(key, Series):
- key = key._values
- self._data = self._data.setitem(indexer=key, value=value)
- self._maybe_update_cacher()
- def repeat(self, repeats, axis=None):
- """
- Repeat elements of a Series.
- Returns a new Series where each element of the current Series
- is repeated consecutively a given number of times.
- Parameters
- ----------
- repeats : int or array of ints
- The number of repetitions for each element. This should be a
- non-negative integer. Repeating 0 times will return an empty
- Series.
- axis : None
- Must be ``None``. Has no effect but is accepted for compatibility
- with numpy.
- Returns
- -------
- Series
- Newly created Series with repeated elements.
- See Also
- --------
- Index.repeat : Equivalent function for Index.
- numpy.repeat : Similar method for :class:`numpy.ndarray`.
- Examples
- --------
- >>> s = pd.Series(['a', 'b', 'c'])
- >>> s
- 0 a
- 1 b
- 2 c
- dtype: object
- >>> s.repeat(2)
- 0 a
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- dtype: object
- >>> s.repeat([1, 2, 3])
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- 2 c
- dtype: object
- """
- nv.validate_repeat(tuple(), dict(axis=axis))
- new_index = self.index.repeat(repeats)
- new_values = self._values.repeat(repeats)
- return self._constructor(new_values,
- index=new_index).__finalize__(self)
- def get_value(self, label, takeable=False):
- """
- Quickly retrieve single value at passed index label.
- .. deprecated:: 0.21.0
- Please use .at[] or .iat[] accessors.
- Parameters
- ----------
- label : object
- takeable : interpret the index as indexers, default False
- Returns
- -------
- scalar value
- """
- warnings.warn("get_value is deprecated and will be removed "
- "in a future release. Please use "
- ".at[] or .iat[] accessors instead", FutureWarning,
- stacklevel=2)
- return self._get_value(label, takeable=takeable)
- def _get_value(self, label, takeable=False):
- if takeable is True:
- return com.maybe_box_datetimelike(self._values[label])
- return self.index.get_value(self._values, label)
- _get_value.__doc__ = get_value.__doc__
- def set_value(self, label, value, takeable=False):
- """
- Quickly set single value at passed label.
- .. deprecated:: 0.21.0
- Please use .at[] or .iat[] accessors.
- If label is not contained, a new object is created with the label
- placed at the end of the result index.
- Parameters
- ----------
- label : object
- Partial indexing with MultiIndex not allowed
- value : object
- Scalar value
- takeable : interpret the index as indexers, default False
- Returns
- -------
- Series
- If label is contained, will be reference to calling Series,
- otherwise a new object.
- """
- warnings.warn("set_value is deprecated and will be removed "
- "in a future release. Please use "
- ".at[] or .iat[] accessors instead", FutureWarning,
- stacklevel=2)
- return self._set_value(label, value, takeable=takeable)
- def _set_value(self, label, value, takeable=False):
- try:
- if takeable:
- self._values[label] = value
- else:
- self.index._engine.set_value(self._values, label, value)
- except (KeyError, TypeError):
- # set using a non-recursive method
- self.loc[label] = value
- return self
- _set_value.__doc__ = set_value.__doc__
- def reset_index(self, level=None, drop=False, name=None, inplace=False):
- """
- Generate a new DataFrame or Series with the index reset.
- This is useful when the index needs to be treated as a column, or
- when the index is meaningless and needs to be reset to the default
- before another operation.
- Parameters
- ----------
- level : int, str, tuple, or list, default optional
- For a Series with a MultiIndex, only remove the specified levels
- from the index. Removes all levels by default.
- drop : bool, default False
- Just reset the index, without inserting it as a column in
- the new DataFrame.
- name : object, optional
- The name to use for the column containing the original Series
- values. Uses ``self.name`` by default. This argument is ignored
- when `drop` is True.
- inplace : bool, default False
- Modify the Series in place (do not create a new object).
- Returns
- -------
- Series or DataFrame
- When `drop` is False (the default), a DataFrame is returned.
- The newly created columns will come first in the DataFrame,
- followed by the original Series values.
- When `drop` is True, a `Series` is returned.
- In either case, if ``inplace=True``, no value is returned.
- See Also
- --------
- DataFrame.reset_index: Analogous function for DataFrame.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4], name='foo',
- ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
- Generate a DataFrame with default index.
- >>> s.reset_index()
- idx foo
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To specify the name of the new column use `name`.
- >>> s.reset_index(name='values')
- idx values
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To generate a new Series with the default set `drop` to True.
- >>> s.reset_index(drop=True)
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- To update the Series in place, without generating a new one
- set `inplace` to True. Note that it also requires ``drop=True``.
- >>> s.reset_index(inplace=True, drop=True)
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- The `level` parameter is interesting for Series with a multi-level
- index.
- >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
- ... np.array(['one', 'two', 'one', 'two'])]
- >>> s2 = pd.Series(
- ... range(4), name='foo',
- ... index=pd.MultiIndex.from_arrays(arrays,
- ... names=['a', 'b']))
- To remove a specific level from the Index, use `level`.
- >>> s2.reset_index(level='a')
- a foo
- b
- one bar 0
- two bar 1
- one baz 2
- two baz 3
- If `level` is not set, all levels are removed from the Index.
- >>> s2.reset_index()
- a b foo
- 0 bar one 0
- 1 bar two 1
- 2 baz one 2
- 3 baz two 3
- """
- inplace = validate_bool_kwarg(inplace, 'inplace')
- if drop:
- new_index = ibase.default_index(len(self))
- if level is not None:
- if not isinstance(level, (tuple, list)):
- level = [level]
- level = [self.index._get_level_number(lev) for lev in level]
- if len(level) < self.index.nlevels:
- new_index = self.index.droplevel(level)
- if inplace:
- self.index = new_index
- # set name if it was passed, otherwise, keep the previous name
- self.name = name or self.name
- else:
- return self._constructor(self._values.copy(),
- index=new_index).__finalize__(self)
- elif inplace:
- raise TypeError('Cannot reset_index inplace on a Series '
- 'to create a DataFrame')
- else:
- df = self.to_frame(name)
- return df.reset_index(level=level, drop=drop)
- # ----------------------------------------------------------------------
- # Rendering Methods
- def __unicode__(self):
- """
- Return a string representation for a particular DataFrame.
- Invoked by unicode(df) in py2 only. Yields a Unicode String in both
- py2/py3.
- """
- buf = StringIO(u(""))
- width, height = get_terminal_size()
- max_rows = (height if get_option("display.max_rows") == 0 else
- get_option("display.max_rows"))
- show_dimensions = get_option("display.show_dimensions")
- self.to_string(buf=buf, name=self.name, dtype=self.dtype,
- max_rows=max_rows, length=show_dimensions)
- result = buf.getvalue()
- return result
- def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
- index=True, length=False, dtype=False, name=False,
- max_rows=None):
- """
- Render a string representation of the Series.
- Parameters
- ----------
- buf : StringIO-like, optional
- Buffer to write to.
- na_rep : str, optional
- String representation of NaN to use, default 'NaN'.
- float_format : one-parameter function, optional
- Formatter function to apply to columns' elements if they are
- floats, default None.
- header : bool, default True
- Add the Series header (index name).
- index : bool, optional
- Add index (row) labels, default True.
- length : bool, default False
- Add the Series length.
- dtype : bool, default False
- Add the Series dtype.
- name : bool, default False
- Add the Series name if not None.
- max_rows : int, optional
- Maximum number of rows to show before truncating. If None, show
- all.
- Returns
- -------
- str or None
- String representation of Series if ``buf=None``, otherwise None.
- """
- formatter = fmt.SeriesFormatter(self, name=name, length=length,
- header=header, index=index,
- dtype=dtype, na_rep=na_rep,
- float_format=float_format,
- max_rows=max_rows)
- result = formatter.to_string()
- # catch contract violations
- if not isinstance(result, compat.text_type):
- raise AssertionError("result must be of type unicode, type"
- " of result is {0!r}"
- "".format(result.__class__.__name__))
- if buf is None:
- return result
- else:
- try:
- buf.write(result)
- except AttributeError:
- with open(buf, 'w') as f:
- f.write(result)
- # ----------------------------------------------------------------------
- def iteritems(self):
- """
- Lazily iterate over (index, value) tuples.
- """
- return zip(iter(self.index), iter(self))
- items = iteritems
- # ----------------------------------------------------------------------
- # Misc public methods
- def keys(self):
- """
- Return alias for index.
- """
- return self.index
- def to_dict(self, into=dict):
- """
- Convert Series to {label -> value} dict or dict-like object.
- Parameters
- ----------
- into : class, default dict
- The collections.Mapping subclass to use as the return
- object. Can be the actual class or an empty
- instance of the mapping type you want. If you want a
- collections.defaultdict, you must pass it initialized.
- .. versionadded:: 0.21.0
- Returns
- -------
- collections.Mapping
- Key-value representation of Series.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.to_dict()
- {0: 1, 1: 2, 2: 3, 3: 4}
- >>> from collections import OrderedDict, defaultdict
- >>> s.to_dict(OrderedDict)
- OrderedDict([(0, 1)…
Large files files are truncated, but you can click here to view the full file