/pandas/core/series.py

http://github.com/wesm/pandas · Python · 4474 lines · 4009 code · 158 blank · 307 comment · 178 complexity · be421d2813837526f7340c9768686d1b MD5 · raw file

  1. """
  2. Data structure for 1-dimensional cross-sectional and time series data
  3. """
  4. from __future__ import division
  5. from collections import OrderedDict
  6. from textwrap import dedent
  7. import warnings
  8. import numpy as np
  9. from pandas._libs import iNaT, index as libindex, lib, tslibs
  10. import pandas.compat as compat
  11. from pandas.compat import PY36, StringIO, u, zip
  12. from pandas.compat.numpy import function as nv
  13. from pandas.util._decorators import Appender, Substitution, deprecate
  14. from pandas.util._validators import validate_bool_kwarg
  15. from pandas.core.dtypes.common import (
  16. _is_unorderable_exception, ensure_platform_int, is_bool,
  17. is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like,
  18. is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
  19. is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
  20. from pandas.core.dtypes.generic import (
  21. ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
  22. ABCSparseArray, ABCSparseSeries)
  23. from pandas.core.dtypes.missing import (
  24. isna, na_value_for_dtype, notna, remove_na_arraylike)
  25. from pandas.core import algorithms, base, generic, nanops, ops
  26. from pandas.core.accessor import CachedAccessor
  27. from pandas.core.arrays import ExtensionArray, SparseArray
  28. from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
  29. from pandas.core.arrays.sparse import SparseAccessor
  30. import pandas.core.common as com
  31. from pandas.core.config import get_option
  32. from pandas.core.index import (
  33. Float64Index, Index, InvalidIndexError, MultiIndex, ensure_index)
  34. from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
  35. import pandas.core.indexes.base as ibase
  36. from pandas.core.indexes.datetimes import DatetimeIndex
  37. from pandas.core.indexes.period import PeriodIndex
  38. from pandas.core.indexes.timedeltas import TimedeltaIndex
  39. from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
  40. from pandas.core.internals import SingleBlockManager
  41. from pandas.core.internals.construction import sanitize_array
  42. from pandas.core.strings import StringMethods
  43. from pandas.core.tools.datetimes import to_datetime
  44. import pandas.io.formats.format as fmt
  45. from pandas.io.formats.terminal import get_terminal_size
  46. import pandas.plotting._core as gfx
  47. # pylint: disable=E1101,E1103
  48. # pylint: disable=W0703,W0622,W0613,W0201
  49. __all__ = ['Series']
  50. _shared_doc_kwargs = dict(
  51. axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
  52. axis="""axis : {0 or 'index'}
  53. Parameter needed for compatibility with DataFrame.""",
  54. inplace="""inplace : boolean, default False
  55. If True, performs operation inplace and returns None.""",
  56. unique='np.ndarray', duplicated='Series',
  57. optional_by='', optional_mapper='', optional_labels='', optional_axis='',
  58. versionadded_to_excel='\n .. versionadded:: 0.20.0\n')
  59. # see gh-16971
  60. def remove_na(arr):
  61. """
  62. Remove null values from array like structure.
  63. .. deprecated:: 0.21.0
  64. Use s[s.notnull()] instead.
  65. """
  66. warnings.warn("remove_na is deprecated and is a private "
  67. "function. Do not use.", FutureWarning, stacklevel=2)
  68. return remove_na_arraylike(arr)
  69. def _coerce_method(converter):
  70. """
  71. Install the scalar coercion methods.
  72. """
  73. def wrapper(self):
  74. if len(self) == 1:
  75. return converter(self.iloc[0])
  76. raise TypeError("cannot convert the series to "
  77. "{0}".format(str(converter)))
  78. wrapper.__name__ = "__{name}__".format(name=converter.__name__)
  79. return wrapper
  80. # ----------------------------------------------------------------------
  81. # Series class
  82. class Series(base.IndexOpsMixin, generic.NDFrame):
  83. """
  84. One-dimensional ndarray with axis labels (including time series).
  85. Labels need not be unique but must be a hashable type. The object
  86. supports both integer- and label-based indexing and provides a host of
  87. methods for performing operations involving the index. Statistical
  88. methods from ndarray have been overridden to automatically exclude
  89. missing data (currently represented as NaN).
  90. Operations between Series (+, -, /, *, **) align values based on their
  91. associated index values-- they need not be the same length. The result
  92. index will be the sorted union of the two indexes.
  93. Parameters
  94. ----------
  95. data : array-like, Iterable, dict, or scalar value
  96. Contains data stored in Series.
  97. .. versionchanged :: 0.23.0
  98. If data is a dict, argument order is maintained for Python 3.6
  99. and later.
  100. index : array-like or Index (1d)
  101. Values must be hashable and have the same length as `data`.
  102. Non-unique index values are allowed. Will default to
  103. RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
  104. sequence are used, the index will override the keys found in the
  105. dict.
  106. dtype : str, numpy.dtype, or ExtensionDtype, optional
  107. Data type for the output Series. If not specified, this will be
  108. inferred from `data`.
  109. See the :ref:`user guide <basics.dtypes>` for more usages.
  110. copy : bool, default False
  111. Copy input data.
  112. """
  113. _metadata = ['name']
  114. _accessors = {'dt', 'cat', 'str', 'sparse'}
  115. # tolist is not actually deprecated, just suppressed in the __dir__
  116. _deprecations = generic.NDFrame._deprecations | frozenset(
  117. ['asobject', 'reshape', 'get_value', 'set_value',
  118. 'from_csv', 'valid', 'tolist'])
  119. # Override cache_readonly bc Series is mutable
  120. hasnans = property(base.IndexOpsMixin.hasnans.func,
  121. doc=base.IndexOpsMixin.hasnans.__doc__)
  122. # ----------------------------------------------------------------------
  123. # Constructors
  124. def __init__(self, data=None, index=None, dtype=None, name=None,
  125. copy=False, fastpath=False):
  126. # we are called internally, so short-circuit
  127. if fastpath:
  128. # data is an ndarray, index is defined
  129. if not isinstance(data, SingleBlockManager):
  130. data = SingleBlockManager(data, index, fastpath=True)
  131. if copy:
  132. data = data.copy()
  133. if index is None:
  134. index = data.index
  135. else:
  136. if index is not None:
  137. index = ensure_index(index)
  138. if data is None:
  139. data = {}
  140. if dtype is not None:
  141. dtype = self._validate_dtype(dtype)
  142. if isinstance(data, MultiIndex):
  143. raise NotImplementedError("initializing a Series from a "
  144. "MultiIndex is not supported")
  145. elif isinstance(data, Index):
  146. if name is None:
  147. name = data.name
  148. if dtype is not None:
  149. # astype copies
  150. data = data.astype(dtype)
  151. else:
  152. # need to copy to avoid aliasing issues
  153. data = data._values.copy()
  154. if (isinstance(data, ABCDatetimeIndex) and
  155. data.tz is not None):
  156. # GH#24096 need copy to be deep for datetime64tz case
  157. # TODO: See if we can avoid these copies
  158. data = data._values.copy(deep=True)
  159. copy = False
  160. elif isinstance(data, np.ndarray):
  161. pass
  162. elif isinstance(data, (ABCSeries, ABCSparseSeries)):
  163. if name is None:
  164. name = data.name
  165. if index is None:
  166. index = data.index
  167. else:
  168. data = data.reindex(index, copy=copy)
  169. data = data._data
  170. elif isinstance(data, dict):
  171. data, index = self._init_dict(data, index, dtype)
  172. dtype = None
  173. copy = False
  174. elif isinstance(data, SingleBlockManager):
  175. if index is None:
  176. index = data.index
  177. elif not data.index.equals(index) or copy:
  178. # GH#19275 SingleBlockManager input should only be called
  179. # internally
  180. raise AssertionError('Cannot pass both SingleBlockManager '
  181. '`data` argument and a different '
  182. '`index` argument. `copy` must '
  183. 'be False.')
  184. elif is_extension_array_dtype(data):
  185. pass
  186. elif isinstance(data, (set, frozenset)):
  187. raise TypeError("{0!r} type is unordered"
  188. "".format(data.__class__.__name__))
  189. # If data is Iterable but not list-like, consume into list.
  190. elif (isinstance(data, compat.Iterable)
  191. and not isinstance(data, compat.Sized)):
  192. data = list(data)
  193. else:
  194. # handle sparse passed here (and force conversion)
  195. if isinstance(data, ABCSparseArray):
  196. data = data.to_dense()
  197. if index is None:
  198. if not is_list_like(data):
  199. data = [data]
  200. index = ibase.default_index(len(data))
  201. elif is_list_like(data):
  202. # a scalar numpy array is list-like but doesn't
  203. # have a proper length
  204. try:
  205. if len(index) != len(data):
  206. raise ValueError(
  207. 'Length of passed values is {val}, '
  208. 'index implies {ind}'
  209. .format(val=len(data), ind=len(index)))
  210. except TypeError:
  211. pass
  212. # create/copy the manager
  213. if isinstance(data, SingleBlockManager):
  214. if dtype is not None:
  215. data = data.astype(dtype=dtype, errors='ignore',
  216. copy=copy)
  217. elif copy:
  218. data = data.copy()
  219. else:
  220. data = sanitize_array(data, index, dtype, copy,
  221. raise_cast_failure=True)
  222. data = SingleBlockManager(data, index, fastpath=True)
  223. generic.NDFrame.__init__(self, data, fastpath=True)
  224. self.name = name
  225. self._set_axis(0, index, fastpath=True)
  226. def _init_dict(self, data, index=None, dtype=None):
  227. """
  228. Derive the "_data" and "index" attributes of a new Series from a
  229. dictionary input.
  230. Parameters
  231. ----------
  232. data : dict or dict-like
  233. Data used to populate the new Series
  234. index : Index or index-like, default None
  235. index for the new Series: if None, use dict keys
  236. dtype : dtype, default None
  237. dtype for the new Series: if None, infer from data
  238. Returns
  239. -------
  240. _data : BlockManager for the new Series
  241. index : index for the new Series
  242. """
  243. # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
  244. # raises KeyError), so we iterate the entire dict, and align
  245. if data:
  246. keys, values = zip(*compat.iteritems(data))
  247. values = list(values)
  248. elif index is not None:
  249. # fastpath for Series(data=None). Just use broadcasting a scalar
  250. # instead of reindexing.
  251. values = na_value_for_dtype(dtype)
  252. keys = index
  253. else:
  254. keys, values = [], []
  255. # Input is now list-like, so rely on "standard" construction:
  256. s = Series(values, index=keys, dtype=dtype)
  257. # Now we just make sure the order is respected, if any
  258. if data and index is not None:
  259. s = s.reindex(index, copy=False)
  260. elif not PY36 and not isinstance(data, OrderedDict) and data:
  261. # Need the `and data` to avoid sorting Series(None, index=[...])
  262. # since that isn't really dict-like
  263. try:
  264. s = s.sort_index()
  265. except TypeError:
  266. pass
  267. return s._data, s.index
  268. @classmethod
  269. def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
  270. fastpath=False):
  271. """
  272. Construct Series from array.
  273. .. deprecated :: 0.23.0
  274. Use pd.Series(..) constructor instead.
  275. """
  276. warnings.warn("'from_array' is deprecated and will be removed in a "
  277. "future version. Please use the pd.Series(..) "
  278. "constructor instead.", FutureWarning, stacklevel=2)
  279. if isinstance(arr, ABCSparseArray):
  280. from pandas.core.sparse.series import SparseSeries
  281. cls = SparseSeries
  282. return cls(arr, index=index, name=name, dtype=dtype,
  283. copy=copy, fastpath=fastpath)
  284. # ----------------------------------------------------------------------
  285. @property
  286. def _constructor(self):
  287. return Series
  288. @property
  289. def _constructor_expanddim(self):
  290. from pandas.core.frame import DataFrame
  291. return DataFrame
  292. # types
  293. @property
  294. def _can_hold_na(self):
  295. return self._data._can_hold_na
  296. _index = None
  297. def _set_axis(self, axis, labels, fastpath=False):
  298. """
  299. Override generic, we want to set the _typ here.
  300. """
  301. if not fastpath:
  302. labels = ensure_index(labels)
  303. is_all_dates = labels.is_all_dates
  304. if is_all_dates:
  305. if not isinstance(labels,
  306. (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  307. try:
  308. labels = DatetimeIndex(labels)
  309. # need to set here because we changed the index
  310. if fastpath:
  311. self._data.set_axis(axis, labels)
  312. except (tslibs.OutOfBoundsDatetime, ValueError):
  313. # labels may exceeds datetime bounds,
  314. # or not be a DatetimeIndex
  315. pass
  316. self._set_subtyp(is_all_dates)
  317. object.__setattr__(self, '_index', labels)
  318. if not fastpath:
  319. self._data.set_axis(axis, labels)
  320. def _set_subtyp(self, is_all_dates):
  321. if is_all_dates:
  322. object.__setattr__(self, '_subtyp', 'time_series')
  323. else:
  324. object.__setattr__(self, '_subtyp', 'series')
  325. def _update_inplace(self, result, **kwargs):
  326. # we want to call the generic version and not the IndexOpsMixin
  327. return generic.NDFrame._update_inplace(self, result, **kwargs)
  328. @property
  329. def name(self):
  330. """
  331. Return name of the Series.
  332. """
  333. return self._name
  334. @name.setter
  335. def name(self, value):
  336. if value is not None and not is_hashable(value):
  337. raise TypeError('Series.name must be a hashable type')
  338. object.__setattr__(self, '_name', value)
  339. # ndarray compatibility
  340. @property
  341. def dtype(self):
  342. """
  343. Return the dtype object of the underlying data.
  344. """
  345. return self._data.dtype
  346. @property
  347. def dtypes(self):
  348. """
  349. Return the dtype object of the underlying data.
  350. """
  351. return self._data.dtype
  352. @property
  353. def ftype(self):
  354. """
  355. Return if the data is sparse|dense.
  356. """
  357. return self._data.ftype
  358. @property
  359. def ftypes(self):
  360. """
  361. Return if the data is sparse|dense.
  362. """
  363. return self._data.ftype
  364. @property
  365. def values(self):
  366. """
  367. Return Series as ndarray or ndarray-like depending on the dtype.
  368. .. warning::
  369. We recommend using :attr:`Series.array` or
  370. :meth:`Series.to_numpy`, depending on whether you need
  371. a reference to the underlying data or a NumPy array.
  372. Returns
  373. -------
  374. numpy.ndarray or ndarray-like
  375. See Also
  376. --------
  377. Series.array : Reference to the underlying data.
  378. Series.to_numpy : A NumPy array representing the underlying data.
  379. Examples
  380. --------
  381. >>> pd.Series([1, 2, 3]).values
  382. array([1, 2, 3])
  383. >>> pd.Series(list('aabc')).values
  384. array(['a', 'a', 'b', 'c'], dtype=object)
  385. >>> pd.Series(list('aabc')).astype('category').values
  386. [a, a, b, c]
  387. Categories (3, object): [a, b, c]
  388. Timezone aware datetime data is converted to UTC:
  389. >>> pd.Series(pd.date_range('20130101', periods=3,
  390. ... tz='US/Eastern')).values
  391. array(['2013-01-01T05:00:00.000000000',
  392. '2013-01-02T05:00:00.000000000',
  393. '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
  394. """
  395. return self._data.external_values()
  396. @property
  397. def _values(self):
  398. """
  399. Return the internal repr of this data.
  400. """
  401. return self._data.internal_values()
  402. def _formatting_values(self):
  403. """
  404. Return the values that can be formatted (used by SeriesFormatter
  405. and DataFrameFormatter).
  406. """
  407. return self._data.formatting_values()
  408. def get_values(self):
  409. """
  410. Same as values (but handles sparseness conversions); is a view.
  411. """
  412. return self._data.get_values()
  413. @property
  414. def asobject(self):
  415. """
  416. Return object Series which contains boxed values.
  417. .. deprecated :: 0.23.0
  418. Use ``astype(object)`` instead.
  419. *this is an internal non-public method*
  420. """
  421. warnings.warn("'asobject' is deprecated. Use 'astype(object)'"
  422. " instead", FutureWarning, stacklevel=2)
  423. return self.astype(object).values
  424. # ops
  425. def ravel(self, order='C'):
  426. """
  427. Return the flattened underlying data as an ndarray.
  428. Returns
  429. -------
  430. numpy.ndarray or ndarray-like
  431. Flattened data of the Series.
  432. See Also
  433. --------
  434. numpy.ndarray.ravel
  435. """
  436. return self._values.ravel(order=order)
  437. def compress(self, condition, *args, **kwargs):
  438. """
  439. Return selected slices of an array along given axis as a Series.
  440. .. deprecated:: 0.24.0
  441. See Also
  442. --------
  443. numpy.ndarray.compress
  444. """
  445. msg = ("Series.compress(condition) is deprecated. "
  446. "Use 'Series[condition]' or "
  447. "'np.asarray(series).compress(condition)' instead.")
  448. warnings.warn(msg, FutureWarning, stacklevel=2)
  449. nv.validate_compress(args, kwargs)
  450. return self[condition]
  451. def nonzero(self):
  452. """
  453. Return the *integer* indices of the elements that are non-zero.
  454. .. deprecated:: 0.24.0
  455. Please use .to_numpy().nonzero() as a replacement.
  456. This method is equivalent to calling `numpy.nonzero` on the
  457. series data. For compatibility with NumPy, the return value is
  458. the same (a tuple with an array of indices for each dimension),
  459. but it will always be a one-item tuple because series only have
  460. one dimension.
  461. See Also
  462. --------
  463. numpy.nonzero
  464. Examples
  465. --------
  466. >>> s = pd.Series([0, 3, 0, 4])
  467. >>> s.nonzero()
  468. (array([1, 3]),)
  469. >>> s.iloc[s.nonzero()[0]]
  470. 1 3
  471. 3 4
  472. dtype: int64
  473. >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd'])
  474. # same return although index of s is different
  475. >>> s.nonzero()
  476. (array([1, 3]),)
  477. >>> s.iloc[s.nonzero()[0]]
  478. b 3
  479. d 4
  480. dtype: int64
  481. """
  482. msg = ("Series.nonzero() is deprecated "
  483. "and will be removed in a future version."
  484. "Use Series.to_numpy().nonzero() instead")
  485. warnings.warn(msg, FutureWarning, stacklevel=2)
  486. return self._values.nonzero()
  487. def put(self, *args, **kwargs):
  488. """
  489. Apply the `put` method to its `values` attribute if it has one.
  490. See Also
  491. --------
  492. numpy.ndarray.put
  493. """
  494. self._values.put(*args, **kwargs)
  495. def __len__(self):
  496. """
  497. Return the length of the Series.
  498. """
  499. return len(self._data)
  500. def view(self, dtype=None):
  501. """
  502. Create a new view of the Series.
  503. This function will return a new Series with a view of the same
  504. underlying values in memory, optionally reinterpreted with a new data
  505. type. The new data type must preserve the same size in bytes as to not
  506. cause index misalignment.
  507. Parameters
  508. ----------
  509. dtype : data type
  510. Data type object or one of their string representations.
  511. Returns
  512. -------
  513. Series
  514. A new Series object as a view of the same data in memory.
  515. See Also
  516. --------
  517. numpy.ndarray.view : Equivalent numpy function to create a new view of
  518. the same data in memory.
  519. Notes
  520. -----
  521. Series are instantiated with ``dtype=float64`` by default. While
  522. ``numpy.ndarray.view()`` will return a view with the same data type as
  523. the original array, ``Series.view()`` (without specified dtype)
  524. will try using ``float64`` and may fail if the original data type size
  525. in bytes is not the same.
  526. Examples
  527. --------
  528. >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
  529. >>> s
  530. 0 -2
  531. 1 -1
  532. 2 0
  533. 3 1
  534. 4 2
  535. dtype: int8
  536. The 8 bit signed integer representation of `-1` is `0b11111111`, but
  537. the same bytes represent 255 if read as an 8 bit unsigned integer:
  538. >>> us = s.view('uint8')
  539. >>> us
  540. 0 254
  541. 1 255
  542. 2 0
  543. 3 1
  544. 4 2
  545. dtype: uint8
  546. The views share the same underlying values:
  547. >>> us[0] = 128
  548. >>> s
  549. 0 -128
  550. 1 -1
  551. 2 0
  552. 3 1
  553. 4 2
  554. dtype: int8
  555. """
  556. return self._constructor(self._values.view(dtype),
  557. index=self.index).__finalize__(self)
  558. # ----------------------------------------------------------------------
  559. # NDArray Compat
  560. def __array__(self, dtype=None):
  561. """
  562. Return the values as a NumPy array.
  563. Users should not call this directly. Rather, it is invoked by
  564. :func:`numpy.array` and :func:`numpy.asarray`.
  565. Parameters
  566. ----------
  567. dtype : str or numpy.dtype, optional
  568. The dtype to use for the resulting NumPy array. By default,
  569. the dtype is inferred from the data.
  570. Returns
  571. -------
  572. numpy.ndarray
  573. The values in the series converted to a :class:`numpy.ndarary`
  574. with the specified `dtype`.
  575. See Also
  576. --------
  577. array : Create a new array from data.
  578. Series.array : Zero-copy view to the array backing the Series.
  579. Series.to_numpy : Series method for similar behavior.
  580. Examples
  581. --------
  582. >>> ser = pd.Series([1, 2, 3])
  583. >>> np.asarray(ser)
  584. array([1, 2, 3])
  585. For timezone-aware data, the timezones may be retained with
  586. ``dtype='object'``
  587. >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
  588. >>> np.asarray(tzser, dtype="object")
  589. array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
  590. Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
  591. dtype=object)
  592. Or the values may be localized to UTC and the tzinfo discared with
  593. ``dtype='datetime64[ns]'``
  594. >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
  595. array(['1999-12-31T23:00:00.000000000', ...],
  596. dtype='datetime64[ns]')
  597. """
  598. if (dtype is None and isinstance(self.array, ABCDatetimeArray)
  599. and getattr(self.dtype, 'tz', None)):
  600. msg = (
  601. "Converting timezone-aware DatetimeArray to timezone-naive "
  602. "ndarray with 'datetime64[ns]' dtype. In the future, this "
  603. "will return an ndarray with 'object' dtype where each "
  604. "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t"
  605. "To accept the future behavior, pass 'dtype=object'.\n\t"
  606. "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'."
  607. )
  608. warnings.warn(msg, FutureWarning, stacklevel=3)
  609. dtype = 'M8[ns]'
  610. return np.asarray(self.array, dtype)
  611. def __array_wrap__(self, result, context=None):
  612. """
  613. Gets called after a ufunc.
  614. """
  615. return self._constructor(result, index=self.index,
  616. copy=False).__finalize__(self)
  617. def __array_prepare__(self, result, context=None):
  618. """
  619. Gets called prior to a ufunc.
  620. """
  621. # nice error message for non-ufunc types
  622. if (context is not None and
  623. (not isinstance(self._values, (np.ndarray, ExtensionArray))
  624. or isinstance(self._values, Categorical))):
  625. obj = context[1][0]
  626. raise TypeError("{obj} with dtype {dtype} cannot perform "
  627. "the numpy op {op}".format(
  628. obj=type(obj).__name__,
  629. dtype=getattr(obj, 'dtype', None),
  630. op=context[0].__name__))
  631. return result
  632. # ----------------------------------------------------------------------
  633. # Unary Methods
  634. @property
  635. def real(self):
  636. """
  637. Return the real value of vector.
  638. """
  639. return self.values.real
  640. @real.setter
  641. def real(self, v):
  642. self.values.real = v
  643. @property
  644. def imag(self):
  645. """
  646. Return imag value of vector.
  647. """
  648. return self.values.imag
  649. @imag.setter
  650. def imag(self, v):
  651. self.values.imag = v
  652. # coercion
  653. __float__ = _coerce_method(float)
  654. __long__ = _coerce_method(int)
  655. __int__ = _coerce_method(int)
  656. # ----------------------------------------------------------------------
  657. def _unpickle_series_compat(self, state):
  658. if isinstance(state, dict):
  659. self._data = state['_data']
  660. self.name = state['name']
  661. self.index = self._data.index
  662. elif isinstance(state, tuple):
  663. # < 0.12 series pickle
  664. nd_state, own_state = state
  665. # recreate the ndarray
  666. data = np.empty(nd_state[1], dtype=nd_state[2])
  667. np.ndarray.__setstate__(data, nd_state)
  668. # backwards compat
  669. index, name = own_state[0], None
  670. if len(own_state) > 1:
  671. name = own_state[1]
  672. # recreate
  673. self._data = SingleBlockManager(data, index, fastpath=True)
  674. self._index = index
  675. self.name = name
  676. else:
  677. raise Exception("cannot unpickle legacy formats -> [%s]" % state)
  678. # indexers
  679. @property
  680. def axes(self):
  681. """
  682. Return a list of the row axis labels.
  683. """
  684. return [self.index]
  685. def _ixs(self, i, axis=0):
  686. """
  687. Return the i-th value or values in the Series by location.
  688. Parameters
  689. ----------
  690. i : int, slice, or sequence of integers
  691. Returns
  692. -------
  693. scalar (int) or Series (slice, sequence)
  694. """
  695. try:
  696. # dispatch to the values if we need
  697. values = self._values
  698. if isinstance(values, np.ndarray):
  699. return libindex.get_value_at(values, i)
  700. else:
  701. return values[i]
  702. except IndexError:
  703. raise
  704. except Exception:
  705. if isinstance(i, slice):
  706. indexer = self.index._convert_slice_indexer(i, kind='iloc')
  707. return self._get_values(indexer)
  708. else:
  709. label = self.index[i]
  710. if isinstance(label, Index):
  711. return self.take(i, axis=axis, convert=True)
  712. else:
  713. return libindex.get_value_at(self, i)
  714. @property
  715. def _is_mixed_type(self):
  716. return False
  717. def _slice(self, slobj, axis=0, kind=None):
  718. slobj = self.index._convert_slice_indexer(slobj,
  719. kind=kind or 'getitem')
  720. return self._get_values(slobj)
  721. def __getitem__(self, key):
  722. key = com.apply_if_callable(key, self)
  723. try:
  724. result = self.index.get_value(self, key)
  725. if not is_scalar(result):
  726. if is_list_like(result) and not isinstance(result, Series):
  727. # we need to box if loc of the key isn't scalar here
  728. # otherwise have inline ndarray/lists
  729. try:
  730. if not is_scalar(self.index.get_loc(key)):
  731. result = self._constructor(
  732. result, index=[key] * len(result),
  733. dtype=self.dtype).__finalize__(self)
  734. except KeyError:
  735. pass
  736. return result
  737. except InvalidIndexError:
  738. pass
  739. except (KeyError, ValueError):
  740. if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
  741. # kludge
  742. pass
  743. elif key is Ellipsis:
  744. return self
  745. elif com.is_bool_indexer(key):
  746. pass
  747. else:
  748. # we can try to coerce the indexer (or this will raise)
  749. new_key = self.index._convert_scalar_indexer(key,
  750. kind='getitem')
  751. if type(new_key) != type(key):
  752. return self.__getitem__(new_key)
  753. raise
  754. except Exception:
  755. raise
  756. if is_iterator(key):
  757. key = list(key)
  758. if com.is_bool_indexer(key):
  759. key = check_bool_indexer(self.index, key)
  760. return self._get_with(key)
  761. def _get_with(self, key):
  762. # other: fancy integer or otherwise
  763. if isinstance(key, slice):
  764. indexer = self.index._convert_slice_indexer(key, kind='getitem')
  765. return self._get_values(indexer)
  766. elif isinstance(key, ABCDataFrame):
  767. raise TypeError('Indexing a Series with DataFrame is not '
  768. 'supported, use the appropriate DataFrame column')
  769. elif isinstance(key, tuple):
  770. try:
  771. return self._get_values_tuple(key)
  772. except Exception:
  773. if len(key) == 1:
  774. key = key[0]
  775. if isinstance(key, slice):
  776. return self._get_values(key)
  777. raise
  778. # pragma: no cover
  779. if not isinstance(key, (list, np.ndarray, Series, Index)):
  780. key = list(key)
  781. if isinstance(key, Index):
  782. key_type = key.inferred_type
  783. else:
  784. key_type = lib.infer_dtype(key, skipna=False)
  785. if key_type == 'integer':
  786. if self.index.is_integer() or self.index.is_floating():
  787. return self.loc[key]
  788. else:
  789. return self._get_values(key)
  790. elif key_type == 'boolean':
  791. return self._get_values(key)
  792. try:
  793. # handle the dup indexing case (GH 4246)
  794. if isinstance(key, (list, tuple)):
  795. return self.loc[key]
  796. return self.reindex(key)
  797. except Exception:
  798. # [slice(0, 5, None)] will break if you convert to ndarray,
  799. # e.g. as requested by np.median
  800. # hack
  801. if isinstance(key[0], slice):
  802. return self._get_values(key)
  803. raise
  804. def _get_values_tuple(self, key):
  805. # mpl hackaround
  806. if com._any_none(*key):
  807. return self._get_values(key)
  808. if not isinstance(self.index, MultiIndex):
  809. raise ValueError('Can only tuple-index with a MultiIndex')
  810. # If key is contained, would have returned by now
  811. indexer, new_index = self.index.get_loc_level(key)
  812. return self._constructor(self._values[indexer],
  813. index=new_index).__finalize__(self)
  814. def _get_values(self, indexer):
  815. try:
  816. return self._constructor(self._data.get_slice(indexer),
  817. fastpath=True).__finalize__(self)
  818. except Exception:
  819. return self._values[indexer]
  820. def __setitem__(self, key, value):
  821. key = com.apply_if_callable(key, self)
  822. def setitem(key, value):
  823. try:
  824. self._set_with_engine(key, value)
  825. return
  826. except com.SettingWithCopyError:
  827. raise
  828. except (KeyError, ValueError):
  829. values = self._values
  830. if (is_integer(key) and
  831. not self.index.inferred_type == 'integer'):
  832. values[key] = value
  833. return
  834. elif key is Ellipsis:
  835. self[:] = value
  836. return
  837. elif com.is_bool_indexer(key):
  838. pass
  839. elif is_timedelta64_dtype(self.dtype):
  840. # reassign a null value to iNaT
  841. if isna(value):
  842. value = iNaT
  843. try:
  844. self.index._engine.set_value(self._values, key,
  845. value)
  846. return
  847. except TypeError:
  848. pass
  849. self.loc[key] = value
  850. return
  851. except TypeError as e:
  852. if (isinstance(key, tuple) and
  853. not isinstance(self.index, MultiIndex)):
  854. raise ValueError("Can only tuple-index with a MultiIndex")
  855. # python 3 type errors should be raised
  856. if _is_unorderable_exception(e):
  857. raise IndexError(key)
  858. if com.is_bool_indexer(key):
  859. key = check_bool_indexer(self.index, key)
  860. try:
  861. self._where(~key, value, inplace=True)
  862. return
  863. except InvalidIndexError:
  864. pass
  865. self._set_with(key, value)
  866. # do the setitem
  867. cacher_needs_updating = self._check_is_chained_assignment_possible()
  868. setitem(key, value)
  869. if cacher_needs_updating:
  870. self._maybe_update_cacher()
  871. def _set_with_engine(self, key, value):
  872. values = self._values
  873. try:
  874. self.index._engine.set_value(values, key, value)
  875. return
  876. except KeyError:
  877. values[self.index.get_loc(key)] = value
  878. return
  879. def _set_with(self, key, value):
  880. # other: fancy integer or otherwise
  881. if isinstance(key, slice):
  882. indexer = self.index._convert_slice_indexer(key, kind='getitem')
  883. return self._set_values(indexer, value)
  884. else:
  885. if isinstance(key, tuple):
  886. try:
  887. self._set_values(key, value)
  888. except Exception:
  889. pass
  890. if is_scalar(key):
  891. key = [key]
  892. elif not isinstance(key, (list, Series, np.ndarray)):
  893. try:
  894. key = list(key)
  895. except Exception:
  896. key = [key]
  897. if isinstance(key, Index):
  898. key_type = key.inferred_type
  899. else:
  900. key_type = lib.infer_dtype(key, skipna=False)
  901. if key_type == 'integer':
  902. if self.index.inferred_type == 'integer':
  903. self._set_labels(key, value)
  904. else:
  905. return self._set_values(key, value)
  906. elif key_type == 'boolean':
  907. self._set_values(key.astype(np.bool_), value)
  908. else:
  909. self._set_labels(key, value)
  910. def _set_labels(self, key, value):
  911. if isinstance(key, Index):
  912. key = key.values
  913. else:
  914. key = com.asarray_tuplesafe(key)
  915. indexer = self.index.get_indexer(key)
  916. mask = indexer == -1
  917. if mask.any():
  918. raise ValueError('%s not contained in the index' % str(key[mask]))
  919. self._set_values(indexer, value)
  920. def _set_values(self, key, value):
  921. if isinstance(key, Series):
  922. key = key._values
  923. self._data = self._data.setitem(indexer=key, value=value)
  924. self._maybe_update_cacher()
  925. def repeat(self, repeats, axis=None):
  926. """
  927. Repeat elements of a Series.
  928. Returns a new Series where each element of the current Series
  929. is repeated consecutively a given number of times.
  930. Parameters
  931. ----------
  932. repeats : int or array of ints
  933. The number of repetitions for each element. This should be a
  934. non-negative integer. Repeating 0 times will return an empty
  935. Series.
  936. axis : None
  937. Must be ``None``. Has no effect but is accepted for compatibility
  938. with numpy.
  939. Returns
  940. -------
  941. Series
  942. Newly created Series with repeated elements.
  943. See Also
  944. --------
  945. Index.repeat : Equivalent function for Index.
  946. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  947. Examples
  948. --------
  949. >>> s = pd.Series(['a', 'b', 'c'])
  950. >>> s
  951. 0 a
  952. 1 b
  953. 2 c
  954. dtype: object
  955. >>> s.repeat(2)
  956. 0 a
  957. 0 a
  958. 1 b
  959. 1 b
  960. 2 c
  961. 2 c
  962. dtype: object
  963. >>> s.repeat([1, 2, 3])
  964. 0 a
  965. 1 b
  966. 1 b
  967. 2 c
  968. 2 c
  969. 2 c
  970. dtype: object
  971. """
  972. nv.validate_repeat(tuple(), dict(axis=axis))
  973. new_index = self.index.repeat(repeats)
  974. new_values = self._values.repeat(repeats)
  975. return self._constructor(new_values,
  976. index=new_index).__finalize__(self)
  977. def get_value(self, label, takeable=False):
  978. """
  979. Quickly retrieve single value at passed index label.
  980. .. deprecated:: 0.21.0
  981. Please use .at[] or .iat[] accessors.
  982. Parameters
  983. ----------
  984. label : object
  985. takeable : interpret the index as indexers, default False
  986. Returns
  987. -------
  988. scalar value
  989. """
  990. warnings.warn("get_value is deprecated and will be removed "
  991. "in a future release. Please use "
  992. ".at[] or .iat[] accessors instead", FutureWarning,
  993. stacklevel=2)
  994. return self._get_value(label, takeable=takeable)
  995. def _get_value(self, label, takeable=False):
  996. if takeable is True:
  997. return com.maybe_box_datetimelike(self._values[label])
  998. return self.index.get_value(self._values, label)
  999. _get_value.__doc__ = get_value.__doc__
  1000. def set_value(self, label, value, takeable=False):
  1001. """
  1002. Quickly set single value at passed label.
  1003. .. deprecated:: 0.21.0
  1004. Please use .at[] or .iat[] accessors.
  1005. If label is not contained, a new object is created with the label
  1006. placed at the end of the result index.
  1007. Parameters
  1008. ----------
  1009. label : object
  1010. Partial indexing with MultiIndex not allowed
  1011. value : object
  1012. Scalar value
  1013. takeable : interpret the index as indexers, default False
  1014. Returns
  1015. -------
  1016. Series
  1017. If label is contained, will be reference to calling Series,
  1018. otherwise a new object.
  1019. """
  1020. warnings.warn("set_value is deprecated and will be removed "
  1021. "in a future release. Please use "
  1022. ".at[] or .iat[] accessors instead", FutureWarning,
  1023. stacklevel=2)
  1024. return self._set_value(label, value, takeable=takeable)
  1025. def _set_value(self, label, value, takeable=False):
  1026. try:
  1027. if takeable:
  1028. self._values[label] = value
  1029. else:
  1030. self.index._engine.set_value(self._values, label, value)
  1031. except (KeyError, TypeError):
  1032. # set using a non-recursive method
  1033. self.loc[label] = value
  1034. return self
  1035. _set_value.__doc__ = set_value.__doc__
  1036. def reset_index(self, level=None, drop=False, name=None, inplace=False):
  1037. """
  1038. Generate a new DataFrame or Series with the index reset.
  1039. This is useful when the index needs to be treated as a column, or
  1040. when the index is meaningless and needs to be reset to the default
  1041. before another operation.
  1042. Parameters
  1043. ----------
  1044. level : int, str, tuple, or list, default optional
  1045. For a Series with a MultiIndex, only remove the specified levels
  1046. from the index. Removes all levels by default.
  1047. drop : bool, default False
  1048. Just reset the index, without inserting it as a column in
  1049. the new DataFrame.
  1050. name : object, optional
  1051. The name to use for the column containing the original Series
  1052. values. Uses ``self.name`` by default. This argument is ignored
  1053. when `drop` is True.
  1054. inplace : bool, default False
  1055. Modify the Series in place (do not create a new object).
  1056. Returns
  1057. -------
  1058. Series or DataFrame
  1059. When `drop` is False (the default), a DataFrame is returned.
  1060. The newly created columns will come first in the DataFrame,
  1061. followed by the original Series values.
  1062. When `drop` is True, a `Series` is returned.
  1063. In either case, if ``inplace=True``, no value is returned.
  1064. See Also
  1065. --------
  1066. DataFrame.reset_index: Analogous function for DataFrame.
  1067. Examples
  1068. --------
  1069. >>> s = pd.Series([1, 2, 3, 4], name='foo',
  1070. ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
  1071. Generate a DataFrame with default index.
  1072. >>> s.reset_index()
  1073. idx foo
  1074. 0 a 1
  1075. 1 b 2
  1076. 2 c 3
  1077. 3 d 4
  1078. To specify the name of the new column use `name`.
  1079. >>> s.reset_index(name='values')
  1080. idx values
  1081. 0 a 1
  1082. 1 b 2
  1083. 2 c 3
  1084. 3 d 4
  1085. To generate a new Series with the default set `drop` to True.
  1086. >>> s.reset_index(drop=True)
  1087. 0 1
  1088. 1 2
  1089. 2 3
  1090. 3 4
  1091. Name: foo, dtype: int64
  1092. To update the Series in place, without generating a new one
  1093. set `inplace` to True. Note that it also requires ``drop=True``.
  1094. >>> s.reset_index(inplace=True, drop=True)
  1095. >>> s
  1096. 0 1
  1097. 1 2
  1098. 2 3
  1099. 3 4
  1100. Name: foo, dtype: int64
  1101. The `level` parameter is interesting for Series with a multi-level
  1102. index.
  1103. >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
  1104. ... np.array(['one', 'two', 'one', 'two'])]
  1105. >>> s2 = pd.Series(
  1106. ... range(4), name='foo',
  1107. ... index=pd.MultiIndex.from_arrays(arrays,
  1108. ... names=['a', 'b']))
  1109. To remove a specific level from the Index, use `level`.
  1110. >>> s2.reset_index(level='a')
  1111. a foo
  1112. b
  1113. one bar 0
  1114. two bar 1
  1115. one baz 2
  1116. two baz 3
  1117. If `level` is not set, all levels are removed from the Index.
  1118. >>> s2.reset_index()
  1119. a b foo
  1120. 0 bar one 0
  1121. 1 bar two 1
  1122. 2 baz one 2
  1123. 3 baz two 3
  1124. """
  1125. inplace = validate_bool_kwarg(inplace, 'inplace')
  1126. if drop:
  1127. new_index = ibase.default_index(len(self))
  1128. if level is not None:
  1129. if not isinstance(level, (tuple, list)):
  1130. level = [level]
  1131. level = [self.index._get_level_number(lev) for lev in level]
  1132. if len(level) < self.index.nlevels:
  1133. new_index = self.index.droplevel(level)
  1134. if inplace:
  1135. self.index = new_index
  1136. # set name if it was passed, otherwise, keep the previous name
  1137. self.name = name or self.name
  1138. else:
  1139. return self._constructor(self._values.copy(),
  1140. index=new_index).__finalize__(self)
  1141. elif inplace:
  1142. raise TypeError('Cannot reset_index inplace on a Series '
  1143. 'to create a DataFrame')
  1144. else:
  1145. df = self.to_frame(name)
  1146. return df.reset_index(level=level, drop=drop)
  1147. # ----------------------------------------------------------------------
  1148. # Rendering Methods
  1149. def __unicode__(self):
  1150. """
  1151. Return a string representation for a particular DataFrame.
  1152. Invoked by unicode(df) in py2 only. Yields a Unicode String in both
  1153. py2/py3.
  1154. """
  1155. buf = StringIO(u(""))
  1156. width, height = get_terminal_size()
  1157. max_rows = (height if get_option("display.max_rows") == 0 else
  1158. get_option("display.max_rows"))
  1159. show_dimensions = get_option("display.show_dimensions")
  1160. self.to_string(buf=buf, name=self.name, dtype=self.dtype,
  1161. max_rows=max_rows, length=show_dimensions)
  1162. result = buf.getvalue()
  1163. return result
  1164. def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
  1165. index=True, length=False, dtype=False, name=False,
  1166. max_rows=None):
  1167. """
  1168. Render a string representation of the Series.
  1169. Parameters
  1170. ----------
  1171. buf : StringIO-like, optional
  1172. Buffer to write to.
  1173. na_rep : str, optional
  1174. String representation of NaN to use, default 'NaN'.
  1175. float_format : one-parameter function, optional
  1176. Formatter function to apply to columns' elements if they are
  1177. floats, default None.
  1178. header : bool, default True
  1179. Add the Series header (index name).
  1180. index : bool, optional
  1181. Add index (row) labels, default True.
  1182. length : bool, default False
  1183. Add the Series length.
  1184. dtype : bool, default False
  1185. Add the Series dtype.
  1186. name : bool, default False
  1187. Add the Series name if not None.
  1188. max_rows : int, optional
  1189. Maximum number of rows to show before truncating. If None, show
  1190. all.
  1191. Returns
  1192. -------
  1193. str or None
  1194. String representation of Series if ``buf=None``, otherwise None.
  1195. """
  1196. formatter = fmt.SeriesFormatter(self, name=name, length=length,
  1197. header=header, index=index,
  1198. dtype=dtype, na_rep=na_rep,
  1199. float_format=float_format,
  1200. max_rows=max_rows)
  1201. result = formatter.to_string()
  1202. # catch contract violations
  1203. if not isinstance(result, compat.text_type):
  1204. raise AssertionError("result must be of type unicode, type"
  1205. " of result is {0!r}"
  1206. "".format(result.__class__.__name__))
  1207. if buf is None:
  1208. return result
  1209. else:
  1210. try:
  1211. buf.write(result)
  1212. except AttributeError:
  1213. with open(buf, 'w') as f:
  1214. f.write(result)
  1215. # ----------------------------------------------------------------------
  1216. def iteritems(self):
  1217. """
  1218. Lazily iterate over (index, value) tuples.
  1219. """
  1220. return zip(iter(self.index), iter(self))
  1221. items = iteritems
  1222. # ----------------------------------------------------------------------
  1223. # Misc public methods
  1224. def keys(self):
  1225. """
  1226. Return alias for index.
  1227. """
  1228. return self.index
  1229. def to_dict(self, into=dict):
  1230. """
  1231. Convert Series to {label -> value} dict or dict-like object.
  1232. Parameters
  1233. ----------
  1234. into : class, default dict
  1235. The collections.Mapping subclass to use as the return
  1236. object. Can be the actual class or an empty
  1237. instance of the mapping type you want. If you want a
  1238. collections.defaultdict, you must pass it initialized.
  1239. .. versionadded:: 0.21.0
  1240. Returns
  1241. -------
  1242. collections.Mapping
  1243. Key-value representation of Series.
  1244. Examples
  1245. --------
  1246. >>> s = pd.Series([1, 2, 3, 4])
  1247. >>> s.to_dict()
  1248. {0: 1, 1: 2, 2: 3, 3: 4}
  1249. >>> from collections import OrderedDict, defaultdict
  1250. >>> s.to_dict(OrderedDict)
  1251. OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
  1252. >>> dd = defaultdict(list)
  1253. >>> s.to_dict(dd)
  1254. defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
  1255. """
  1256. # GH16122
  1257. into_c = com.standardize_mapping(into)
  1258. return into_c(compat.iteritems(self))
  1259. def to_frame(self, name=None):
  1260. """
  1261. Convert Series to DataFrame.
  1262. Parameters
  1263. ----------
  1264. name : object, default None
  1265. The passed name should substitute for the series name (if it has
  1266. one).
  1267. Returns
  1268. -------
  1269. DataFrame
  1270. DataFrame representation of Series.
  1271. Examples
  1272. --------
  1273. >>> s = pd.Series(["a", "b", "c"],
  1274. ... name="vals")
  1275. >>> s.to_frame()
  1276. vals
  1277. 0 a
  1278. 1 b
  1279. 2 c
  1280. """
  1281. if name is None:
  1282. df = self._constructor_expanddim(self)
  1283. else:
  1284. df = self._constructor_expanddim({name: self})
  1285. return df
  1286. def to_sparse(self, kind='block', fill_value=None):
  1287. """
  1288. Convert Series to SparseSeries.
  1289. Parameters
  1290. ----------
  1291. kind : {'block', 'integer'}, default 'block'
  1292. fill_value : float, defaults to NaN (missing)
  1293. Value to use for filling NaN values.
  1294. Returns
  1295. -------
  1296. SparseSeries
  1297. Sparse representation of the Series.
  1298. """
  1299. # TODO: deprecate
  1300. from pandas.core.sparse.series import SparseSeries
  1301. values = SparseArray(self, kind=kind, fill_value=fill_value)
  1302. return SparseSeries(
  1303. values, index=self.index, name=self.name
  1304. ).__finalize__(self)
  1305. def _set_name(self, name, inplace=False):
  1306. """
  1307. Set the Series name.
  1308. Parameters
  1309. ----------
  1310. name : str
  1311. inplace : bool
  1312. whether to modify `self` directly or return a copy
  1313. """
  1314. inplace = validate_bool_kwarg(inplace, 'inplace')
  1315. ser = self if inplace else self.copy()
  1316. ser.name = name
  1317. return ser
  1318. # ----------------------------------------------------------------------
  1319. # Statistics, overridden ndarray methods
  1320. # TODO: integrate bottleneck
  1321. def count(self, level=None):
  1322. """
  1323. Return number of non-NA/null observations in the Series.
  1324. Parameters
  1325. ----------
  1326. level : int or level name, default None
  1327. If the axis is a MultiIndex (hierarchical), count along a
  1328. particular level, collapsing into a smaller Series.
  1329. Returns
  1330. -------
  1331. int or Series (if level specified)
  1332. Number of non-null values in the Series.
  1333. Examples
  1334. --------
  1335. >>> s = pd.Series([0.0, 1.0, np.nan])
  1336. >>> s.count()
  1337. 2
  1338. """
  1339. if level is None:
  1340. return notna(com.values_from_object(self)).sum()
  1341. if isinstance(level, compat.string_types):
  1342. level = self.index._get_level_number(level)
  1343. lev = self.index.levels[level]
  1344. level_codes = np.array(self.index.codes[level], subok=False, copy=True)
  1345. mask = level_codes == -1
  1346. if mask.any():
  1347. level_codes[mask] = cnt = len(lev)
  1348. lev = lev.insert(cnt, lev._na_value)
  1349. obs = level_codes[notna(self.values)]
  1350. out = np.bincount(obs, minlength=len(lev) or None)
  1351. return self._constructor(out, index=lev,
  1352. dtype='int64').__finalize__(self)
  1353. def mode(self, dropna=True):
  1354. """
  1355. Return the mode(s) of the dataset.
  1356. Always returns Series even if only one value is returned.
  1357. Parameters
  1358. ----------
  1359. dropna : bool, default True
  1360. Don't consider counts of NaN/NaT.
  1361. .. versionadded:: 0.24.0
  1362. Returns
  1363. -------
  1364. Series
  1365. Modes of the Series in sorted order.
  1366. """
  1367. # TODO: Add option for bins like value_counts()
  1368. return algorithms.mode(self, dropna=dropna)
  1369. def unique(self):
  1370. """
  1371. Return unique values of Series object.
  1372. Uniques are returned in order of appearance. Hash table-based unique,
  1373. therefore does NOT sort.
  1374. Returns
  1375. -------
  1376. ndarray or ExtensionArray
  1377. The unique values returned as a NumPy array. See Notes.
  1378. See Also
  1379. --------
  1380. unique : Top-level unique method for any 1-d array-like object.
  1381. Index.unique : Return Index with unique values from an Index object.
  1382. Notes
  1383. -----
  1384. Returns the unique values as a NumPy array. In case of an
  1385. extension-array backed Series, a new
  1386. :class:`~api.extensions.ExtensionArray` of that type with just
  1387. the unique values is returned. This includes
  1388. * Categorical
  1389. * Period
  1390. * Datetime with Timezone
  1391. * Interval
  1392. * Sparse
  1393. * IntegerNA
  1394. See Examples section.
  1395. Examples
  1396. --------
  1397. >>> pd.Series([2, 1, 3, 3], name='A').unique()
  1398. array([2, 1, 3])
  1399. >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
  1400. array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
  1401. >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
  1402. ... for _ in range(3)]).unique()
  1403. <DatetimeArray>
  1404. ['2016-01-01 00:00:00-05:00']
  1405. Length: 1, dtype: datetime64[ns, US/Eastern]
  1406. An unordered Categorical will return categories in the order of
  1407. appearance.
  1408. >>> pd.Series(pd.Categorical(list('baabc'))).unique()
  1409. [b, a, c]
  1410. Categories (3, object): [b, a, c]
  1411. An ordered Categorical preserves the category ordering.
  1412. >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
  1413. ... ordered=True)).unique()
  1414. [b, a, c]
  1415. Categories (3, object): [a < b < c]
  1416. """
  1417. result = super(Series, self).unique()
  1418. return result
  1419. def drop_duplicates(self, keep='first', inplace=False):
  1420. """
  1421. Return Series with duplicate values removed.
  1422. Parameters
  1423. ----------
  1424. keep : {'first', 'last', ``False``}, default 'first'
  1425. - 'first' : Drop duplicates except for the first occurrence.
  1426. - 'last' : Drop duplicates except for the last occurrence.
  1427. - ``False`` : Drop all duplicates.
  1428. inplace : bool, default ``False``
  1429. If ``True``, performs operation inplace and returns None.
  1430. Returns
  1431. -------
  1432. Series
  1433. Series with duplicates dropped.
  1434. See Also
  1435. --------
  1436. Index.drop_duplicates : Equivalent method on Index.
  1437. DataFrame.drop_duplicates : Equivalent method on DataFrame.
  1438. Series.duplicated : Related method on Series, indicating duplicate
  1439. Series values.
  1440. Examples
  1441. --------
  1442. Generate an Series with duplicated entries.
  1443. >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
  1444. ... name='animal')
  1445. >>> s
  1446. 0 lama
  1447. 1 cow
  1448. 2 lama
  1449. 3 beetle
  1450. 4 lama
  1451. 5 hippo
  1452. Name: animal, dtype: object
  1453. With the 'keep' parameter, the selection behaviour of duplicated values
  1454. can be changed. The value 'first' keeps the first occurrence for each
  1455. set of duplicated entries. The default value of keep is 'first'.
  1456. >>> s.drop_duplicates()
  1457. 0 lama
  1458. 1 cow
  1459. 3 beetle
  1460. 5 hippo
  1461. Name: animal, dtype: object
  1462. The value 'last' for parameter 'keep' keeps the last occurrence for
  1463. each set of duplicated entries.
  1464. >>> s.drop_duplicates(keep='last')
  1465. 1 cow
  1466. 3 beetle
  1467. 4 lama
  1468. 5 hippo
  1469. Name: animal, dtype: object
  1470. The value ``False`` for parameter 'keep' discards all sets of
  1471. duplicated entries. Setting the value of 'inplace' to ``True`` performs
  1472. the operation inplace and returns ``None``.
  1473. >>> s.drop_duplicates(keep=False, inplace=True)
  1474. >>> s
  1475. 1 cow
  1476. 3 beetle
  1477. 5 hippo
  1478. Name: animal, dtype: object
  1479. """
  1480. return super(Series, self).drop_duplicates(keep=keep, inplace=inplace)
  1481. def duplicated(self, keep='first'):
  1482. """
  1483. Indicate duplicate Series values.
  1484. Duplicated values are indicated as ``True`` values in the resulting
  1485. Series. Either all duplicates, all except the first or all except the
  1486. last occurrence of duplicates can be indicated.
  1487. Parameters
  1488. ----------
  1489. keep : {'first', 'last', False}, default 'first'
  1490. - 'first' : Mark duplicates as ``True`` except for the first
  1491. occurrence.
  1492. - 'last' : Mark duplicates as ``True`` except for the last
  1493. occurrence.
  1494. - ``False`` : Mark all duplicates as ``True``.
  1495. Returns
  1496. -------
  1497. Series
  1498. Series indicating whether each value has occurred in the
  1499. preceding values.
  1500. See Also
  1501. --------
  1502. Index.duplicated : Equivalent method on pandas.Index.
  1503. DataFrame.duplicated : Equivalent method on pandas.DataFrame.
  1504. Series.drop_duplicates : Remove duplicate values from Series.
  1505. Examples
  1506. --------
  1507. By default, for each set of duplicated values, the first occurrence is
  1508. set on False and all others on True:
  1509. >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
  1510. >>> animals.duplicated()
  1511. 0 False
  1512. 1 False
  1513. 2 True
  1514. 3 False
  1515. 4 True
  1516. dtype: bool
  1517. which is equivalent to
  1518. >>> animals.duplicated(keep='first')
  1519. 0 False
  1520. 1 False
  1521. 2 True
  1522. 3 False
  1523. 4 True
  1524. dtype: bool
  1525. By using 'last', the last occurrence of each set of duplicated values
  1526. is set on False and all others on True:
  1527. >>> animals.duplicated(keep='last')
  1528. 0 True
  1529. 1 False
  1530. 2 True
  1531. 3 False
  1532. 4 False
  1533. dtype: bool
  1534. By setting keep on ``False``, all duplicates are True:
  1535. >>> animals.duplicated(keep=False)
  1536. 0 True
  1537. 1 False
  1538. 2 True
  1539. 3 False
  1540. 4 True
  1541. dtype: bool
  1542. """
  1543. return super(Series, self).duplicated(keep=keep)
  1544. def idxmin(self, axis=0, skipna=True, *args, **kwargs):
  1545. """
  1546. Return the row label of the minimum value.
  1547. If multiple values equal the minimum, the first row label with that
  1548. value is returned.
  1549. Parameters
  1550. ----------
  1551. skipna : bool, default True
  1552. Exclude NA/null values. If the entire Series is NA, the result
  1553. will be NA.
  1554. axis : int, default 0
  1555. For compatibility with DataFrame.idxmin. Redundant for application
  1556. on Series.
  1557. *args, **kwargs
  1558. Additional keywords have no effect but might be accepted
  1559. for compatibility with NumPy.
  1560. Returns
  1561. -------
  1562. Index
  1563. Label of the minimum value.
  1564. Raises
  1565. ------
  1566. ValueError
  1567. If the Series is empty.
  1568. See Also
  1569. --------
  1570. numpy.argmin : Return indices of the minimum values
  1571. along the given axis.
  1572. DataFrame.idxmin : Return index of first occurrence of minimum
  1573. over requested axis.
  1574. Series.idxmax : Return index *label* of the first occurrence
  1575. of maximum of values.
  1576. Notes
  1577. -----
  1578. This method is the Series version of ``ndarray.argmin``. This method
  1579. returns the label of the minimum, while ``ndarray.argmin`` returns
  1580. the position. To get the position, use ``series.values.argmin()``.
  1581. Examples
  1582. --------
  1583. >>> s = pd.Series(data=[1, None, 4, 1],
  1584. ... index=['A', 'B', 'C', 'D'])
  1585. >>> s
  1586. A 1.0
  1587. B NaN
  1588. C 4.0
  1589. D 1.0
  1590. dtype: float64
  1591. >>> s.idxmin()
  1592. 'A'
  1593. If `skipna` is False and there is an NA value in the data,
  1594. the function returns ``nan``.
  1595. >>> s.idxmin(skipna=False)
  1596. nan
  1597. """
  1598. skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
  1599. i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
  1600. if i == -1:
  1601. return np.nan
  1602. return self.index[i]
  1603. def idxmax(self, axis=0, skipna=True, *args, **kwargs):
  1604. """
  1605. Return the row label of the maximum value.
  1606. If multiple values equal the maximum, the first row label with that
  1607. value is returned.
  1608. Parameters
  1609. ----------
  1610. skipna : bool, default True
  1611. Exclude NA/null values. If the entire Series is NA, the result
  1612. will be NA.
  1613. axis : int, default 0
  1614. For compatibility with DataFrame.idxmax. Redundant for application
  1615. on Series.
  1616. *args, **kwargs
  1617. Additional keywords have no effect but might be accepted
  1618. for compatibility with NumPy.
  1619. Returns
  1620. -------
  1621. Index
  1622. Label of the maximum value.
  1623. Raises
  1624. ------
  1625. ValueError
  1626. If the Series is empty.
  1627. See Also
  1628. --------
  1629. numpy.argmax : Return indices of the maximum values
  1630. along the given axis.
  1631. DataFrame.idxmax : Return index of first occurrence of maximum
  1632. over requested axis.
  1633. Series.idxmin : Return index *label* of the first occurrence
  1634. of minimum of values.
  1635. Notes
  1636. -----
  1637. This method is the Series version of ``ndarray.argmax``. This method
  1638. returns the label of the maximum, while ``ndarray.argmax`` returns
  1639. the position. To get the position, use ``series.values.argmax()``.
  1640. Examples
  1641. --------
  1642. >>> s = pd.Series(data=[1, None, 4, 3, 4],
  1643. ... index=['A', 'B', 'C', 'D', 'E'])
  1644. >>> s
  1645. A 1.0
  1646. B NaN
  1647. C 4.0
  1648. D 3.0
  1649. E 4.0
  1650. dtype: float64
  1651. >>> s.idxmax()
  1652. 'C'
  1653. If `skipna` is False and there is an NA value in the data,
  1654. the function returns ``nan``.
  1655. >>> s.idxmax(skipna=False)
  1656. nan
  1657. """
  1658. skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
  1659. i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
  1660. if i == -1:
  1661. return np.nan
  1662. return self.index[i]
  1663. # ndarray compat
  1664. argmin = deprecate(
  1665. 'argmin', idxmin, '0.21.0',
  1666. msg=dedent("""
  1667. The current behaviour of 'Series.argmin' is deprecated, use 'idxmin'
  1668. instead.
  1669. The behavior of 'argmin' will be corrected to return the positional
  1670. minimum in the future. For now, use 'series.values.argmin' or
  1671. 'np.argmin(np.array(values))' to get the position of the minimum
  1672. row.""")
  1673. )
  1674. argmax = deprecate(
  1675. 'argmax', idxmax, '0.21.0',
  1676. msg=dedent("""
  1677. The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
  1678. instead.
  1679. The behavior of 'argmax' will be corrected to return the positional
  1680. maximum in the future. For now, use 'series.values.argmax' or
  1681. 'np.argmax(np.array(values))' to get the position of the maximum
  1682. row.""")
  1683. )
  1684. def round(self, decimals=0, *args, **kwargs):
  1685. """
  1686. Round each value in a Series to the given number of decimals.
  1687. Parameters
  1688. ----------
  1689. decimals : int
  1690. Number of decimal places to round to (default: 0).
  1691. If decimals is negative, it specifies the number of
  1692. positions to the left of the decimal point.
  1693. Returns
  1694. -------
  1695. Series
  1696. Rounded values of the Series.
  1697. See Also
  1698. --------
  1699. numpy.around : Round values of an np.array.
  1700. DataFrame.round : Round values of a DataFrame.
  1701. Examples
  1702. --------
  1703. >>> s = pd.Series([0.1, 1.3, 2.7])
  1704. >>> s.round()
  1705. 0 0.0
  1706. 1 1.0
  1707. 2 3.0
  1708. dtype: float64
  1709. """
  1710. nv.validate_round(args, kwargs)
  1711. result = com.values_from_object(self).round(decimals)
  1712. result = self._constructor(result, index=self.index).__finalize__(self)
  1713. return result
  1714. def quantile(self, q=0.5, interpolation='linear'):
  1715. """
  1716. Return value at the given quantile.
  1717. Parameters
  1718. ----------
  1719. q : float or array-like, default 0.5 (50% quantile)
  1720. 0 <= q <= 1, the quantile(s) to compute.
  1721. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
  1722. .. versionadded:: 0.18.0
  1723. This optional parameter specifies the interpolation method to use,
  1724. when the desired quantile lies between two data points `i` and `j`:
  1725. * linear: `i + (j - i) * fraction`, where `fraction` is the
  1726. fractional part of the index surrounded by `i` and `j`.
  1727. * lower: `i`.
  1728. * higher: `j`.
  1729. * nearest: `i` or `j` whichever is nearest.
  1730. * midpoint: (`i` + `j`) / 2.
  1731. Returns
  1732. -------
  1733. float or Series
  1734. If ``q`` is an array, a Series will be returned where the
  1735. index is ``q`` and the values are the quantiles, otherwise
  1736. a float will be returned.
  1737. See Also
  1738. --------
  1739. core.window.Rolling.quantile
  1740. numpy.percentile
  1741. Examples
  1742. --------
  1743. >>> s = pd.Series([1, 2, 3, 4])
  1744. >>> s.quantile(.5)
  1745. 2.5
  1746. >>> s.quantile([.25, .5, .75])
  1747. 0.25 1.75
  1748. 0.50 2.50
  1749. 0.75 3.25
  1750. dtype: float64
  1751. """
  1752. self._check_percentile(q)
  1753. # We dispatch to DataFrame so that core.internals only has to worry
  1754. # about 2D cases.
  1755. df = self.to_frame()
  1756. result = df.quantile(q=q, interpolation=interpolation,
  1757. numeric_only=False)
  1758. if result.ndim == 2:
  1759. result = result.iloc[:, 0]
  1760. if is_list_like(q):
  1761. result.name = self.name
  1762. return self._constructor(result,
  1763. index=Float64Index(q),
  1764. name=self.name)
  1765. else:
  1766. # scalar
  1767. return result.iloc[0]
  1768. def corr(self, other, method='pearson', min_periods=None):
  1769. """
  1770. Compute correlation with `other` Series, excluding missing values.
  1771. Parameters
  1772. ----------
  1773. other : Series
  1774. Series with which to compute the correlation.
  1775. method : {'pearson', 'kendall', 'spearman'} or callable
  1776. * pearson : standard correlation coefficient
  1777. * kendall : Kendall Tau correlation coefficient
  1778. * spearman : Spearman rank correlation
  1779. * callable: callable with input two 1d ndarray
  1780. and returning a float
  1781. .. versionadded:: 0.24.0
  1782. min_periods : int, optional
  1783. Minimum number of observations needed to have a valid result.
  1784. Returns
  1785. -------
  1786. float
  1787. Correlation with other.
  1788. Examples
  1789. --------
  1790. >>> def histogram_intersection(a, b):
  1791. ... v = np.minimum(a, b).sum().round(decimals=1)
  1792. ... return v
  1793. >>> s1 = pd.Series([.2, .0, .6, .2])
  1794. >>> s2 = pd.Series([.3, .6, .0, .1])
  1795. >>> s1.corr(s2, method=histogram_intersection)
  1796. 0.3
  1797. """
  1798. this, other = self.align(other, join='inner', copy=False)
  1799. if len(this) == 0:
  1800. return np.nan
  1801. if method in ['pearson', 'spearman', 'kendall'] or callable(method):
  1802. return nanops.nancorr(this.values, other.values, method=method,
  1803. min_periods=min_periods)
  1804. raise ValueError("method must be either 'pearson', "
  1805. "'spearman', or 'kendall', '{method}' "
  1806. "was supplied".format(method=method))
  1807. def cov(self, other, min_periods=None):
  1808. """
  1809. Compute covariance with Series, excluding missing values.
  1810. Parameters
  1811. ----------
  1812. other : Series
  1813. Series with which to compute the covariance.
  1814. min_periods : int, optional
  1815. Minimum number of observations needed to have a valid result.
  1816. Returns
  1817. -------
  1818. float
  1819. Covariance between Series and other normalized by N-1
  1820. (unbiased estimator).
  1821. Examples
  1822. --------
  1823. >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
  1824. >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
  1825. >>> s1.cov(s2)
  1826. -0.01685762652715874
  1827. """
  1828. this, other = self.align(other, join='inner', copy=False)
  1829. if len(this) == 0:
  1830. return np.nan
  1831. return nanops.nancov(this.values, other.values,
  1832. min_periods=min_periods)
  1833. def diff(self, periods=1):
  1834. """
  1835. First discrete difference of element.
  1836. Calculates the difference of a Series element compared with another
  1837. element in the Series (default is element in previous row).
  1838. Parameters
  1839. ----------
  1840. periods : int, default 1
  1841. Periods to shift for calculating difference, accepts negative
  1842. values.
  1843. Returns
  1844. -------
  1845. Series
  1846. First differences of the Series.
  1847. See Also
  1848. --------
  1849. Series.pct_change: Percent change over given number of periods.
  1850. Series.shift: Shift index by desired number of periods with an
  1851. optional time freq.
  1852. DataFrame.diff: First discrete difference of object.
  1853. Examples
  1854. --------
  1855. Difference with previous row
  1856. >>> s = pd.Series([1, 1, 2, 3, 5, 8])
  1857. >>> s.diff()
  1858. 0 NaN
  1859. 1 0.0
  1860. 2 1.0
  1861. 3 1.0
  1862. 4 2.0
  1863. 5 3.0
  1864. dtype: float64
  1865. Difference with 3rd previous row
  1866. >>> s.diff(periods=3)
  1867. 0 NaN
  1868. 1 NaN
  1869. 2 NaN
  1870. 3 2.0
  1871. 4 4.0
  1872. 5 6.0
  1873. dtype: float64
  1874. Difference with following row
  1875. >>> s.diff(periods=-1)
  1876. 0 0.0
  1877. 1 -1.0
  1878. 2 -1.0
  1879. 3 -2.0
  1880. 4 -3.0
  1881. 5 NaN
  1882. dtype: float64
  1883. """
  1884. result = algorithms.diff(com.values_from_object(self), periods)
  1885. return self._constructor(result, index=self.index).__finalize__(self)
  1886. def autocorr(self, lag=1):
  1887. """
  1888. Compute the lag-N autocorrelation.
  1889. This method computes the Pearson correlation between
  1890. the Series and its shifted self.
  1891. Parameters
  1892. ----------
  1893. lag : int, default 1
  1894. Number of lags to apply before performing autocorrelation.
  1895. Returns
  1896. -------
  1897. float
  1898. The Pearson correlation between self and self.shift(lag).
  1899. See Also
  1900. --------
  1901. Series.corr : Compute the correlation between two Series.
  1902. Series.shift : Shift index by desired number of periods.
  1903. DataFrame.corr : Compute pairwise correlation of columns.
  1904. DataFrame.corrwith : Compute pairwise correlation between rows or
  1905. columns of two DataFrame objects.
  1906. Notes
  1907. -----
  1908. If the Pearson correlation is not well defined return 'NaN'.
  1909. Examples
  1910. --------
  1911. >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
  1912. >>> s.autocorr() # doctest: +ELLIPSIS
  1913. 0.10355...
  1914. >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
  1915. -0.99999...
  1916. If the Pearson correlation is not well defined, then 'NaN' is returned.
  1917. >>> s = pd.Series([1, 0, 0, 0])
  1918. >>> s.autocorr()
  1919. nan
  1920. """
  1921. return self.corr(self.shift(lag))
  1922. def dot(self, other):
  1923. """
  1924. Compute the dot product between the Series and the columns of other.
  1925. This method computes the dot product between the Series and another
  1926. one, or the Series and each columns of a DataFrame, or the Series and
  1927. each columns of an array.
  1928. It can also be called using `self @ other` in Python >= 3.5.
  1929. Parameters
  1930. ----------
  1931. other : Series, DataFrame or array-like
  1932. The other object to compute the dot product with its columns.
  1933. Returns
  1934. -------
  1935. scalar, Series or numpy.ndarray
  1936. Return the dot product of the Series and other if other is a
  1937. Series, the Series of the dot product of Series and each rows of
  1938. other if other is a DataFrame or a numpy.ndarray between the Series
  1939. and each columns of the numpy array.
  1940. See Also
  1941. --------
  1942. DataFrame.dot: Compute the matrix product with the DataFrame.
  1943. Series.mul: Multiplication of series and other, element-wise.
  1944. Notes
  1945. -----
  1946. The Series and other has to share the same index if other is a Series
  1947. or a DataFrame.
  1948. Examples
  1949. --------
  1950. >>> s = pd.Series([0, 1, 2, 3])
  1951. >>> other = pd.Series([-1, 2, -3, 4])
  1952. >>> s.dot(other)
  1953. 8
  1954. >>> s @ other
  1955. 8
  1956. >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
  1957. >>> s.dot(df)
  1958. 0 24
  1959. 1 14
  1960. dtype: int64
  1961. >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
  1962. >>> s.dot(arr)
  1963. array([24, 14])
  1964. """
  1965. from pandas.core.frame import DataFrame
  1966. if isinstance(other, (Series, DataFrame)):
  1967. common = self.index.union(other.index)
  1968. if (len(common) > len(self.index) or
  1969. len(common) > len(other.index)):
  1970. raise ValueError('matrices are not aligned')
  1971. left = self.reindex(index=common, copy=False)
  1972. right = other.reindex(index=common, copy=False)
  1973. lvals = left.values
  1974. rvals = right.values
  1975. else:
  1976. lvals = self.values
  1977. rvals = np.asarray(other)
  1978. if lvals.shape[0] != rvals.shape[0]:
  1979. raise Exception('Dot product shape mismatch, %s vs %s' %
  1980. (lvals.shape, rvals.shape))
  1981. if isinstance(other, DataFrame):
  1982. return self._constructor(np.dot(lvals, rvals),
  1983. index=other.columns).__finalize__(self)
  1984. elif isinstance(other, Series):
  1985. return np.dot(lvals, rvals)
  1986. elif isinstance(rvals, np.ndarray):
  1987. return np.dot(lvals, rvals)
  1988. else: # pragma: no cover
  1989. raise TypeError('unsupported type: %s' % type(other))
  1990. def __matmul__(self, other):
  1991. """
  1992. Matrix multiplication using binary `@` operator in Python>=3.5.
  1993. """
  1994. return self.dot(other)
  1995. def __rmatmul__(self, other):
  1996. """
  1997. Matrix multiplication using binary `@` operator in Python>=3.5.
  1998. """
  1999. return self.dot(np.transpose(other))
  2000. @Substitution(klass='Series')
  2001. @Appender(base._shared_docs['searchsorted'])
  2002. def searchsorted(self, value, side='left', sorter=None):
  2003. return algorithms.searchsorted(self._values, value,
  2004. side=side, sorter=sorter)
  2005. # -------------------------------------------------------------------
  2006. # Combination
  2007. def append(self, to_append, ignore_index=False, verify_integrity=False):
  2008. """
  2009. Concatenate two or more Series.
  2010. Parameters
  2011. ----------
  2012. to_append : Series or list/tuple of Series
  2013. Series to append with self.
  2014. ignore_index : bool, default False
  2015. If True, do not use the index labels.
  2016. .. versionadded:: 0.19.0
  2017. verify_integrity : bool, default False
  2018. If True, raise Exception on creating index with duplicates.
  2019. Returns
  2020. -------
  2021. Series
  2022. Concatenated Series.
  2023. See Also
  2024. --------
  2025. concat : General function to concatenate DataFrame, Series
  2026. or Panel objects.
  2027. Notes
  2028. -----
  2029. Iteratively appending to a Series can be more computationally intensive
  2030. than a single concatenate. A better solution is to append values to a
  2031. list and then concatenate the list with the original Series all at
  2032. once.
  2033. Examples
  2034. --------
  2035. >>> s1 = pd.Series([1, 2, 3])
  2036. >>> s2 = pd.Series([4, 5, 6])
  2037. >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5])
  2038. >>> s1.append(s2)
  2039. 0 1
  2040. 1 2
  2041. 2 3
  2042. 0 4
  2043. 1 5
  2044. 2 6
  2045. dtype: int64
  2046. >>> s1.append(s3)
  2047. 0 1
  2048. 1 2
  2049. 2 3
  2050. 3 4
  2051. 4 5
  2052. 5 6
  2053. dtype: int64
  2054. With `ignore_index` set to True:
  2055. >>> s1.append(s2, ignore_index=True)
  2056. 0 1
  2057. 1 2
  2058. 2 3
  2059. 3 4
  2060. 4 5
  2061. 5 6
  2062. dtype: int64
  2063. With `verify_integrity` set to True:
  2064. >>> s1.append(s2, verify_integrity=True)
  2065. Traceback (most recent call last):
  2066. ...
  2067. ValueError: Indexes have overlapping values: [0, 1, 2]
  2068. """
  2069. from pandas.core.reshape.concat import concat
  2070. if isinstance(to_append, (list, tuple)):
  2071. to_concat = [self] + to_append
  2072. else:
  2073. to_concat = [self, to_append]
  2074. return concat(to_concat, ignore_index=ignore_index,
  2075. verify_integrity=verify_integrity)
  2076. def _binop(self, other, func, level=None, fill_value=None):
  2077. """
  2078. Perform generic binary operation with optional fill value.
  2079. Parameters
  2080. ----------
  2081. other : Series
  2082. func : binary operator
  2083. fill_value : float or object
  2084. Value to substitute for NA/null values. If both Series are NA in a
  2085. location, the result will be NA regardless of the passed fill value
  2086. level : int or level name, default None
  2087. Broadcast across a level, matching Index values on the
  2088. passed MultiIndex level
  2089. Returns
  2090. -------
  2091. Series
  2092. """
  2093. if not isinstance(other, Series):
  2094. raise AssertionError('Other operand must be Series')
  2095. new_index = self.index
  2096. this = self
  2097. if not self.index.equals(other.index):
  2098. this, other = self.align(other, level=level, join='outer',
  2099. copy=False)
  2100. new_index = this.index
  2101. this_vals, other_vals = ops.fill_binop(this.values, other.values,
  2102. fill_value)
  2103. with np.errstate(all='ignore'):
  2104. result = func(this_vals, other_vals)
  2105. name = ops.get_op_result_name(self, other)
  2106. result = self._constructor(result, index=new_index, name=name)
  2107. result = result.__finalize__(self)
  2108. if name is None:
  2109. # When name is None, __finalize__ overwrites current name
  2110. result.name = None
  2111. return result
  2112. def combine(self, other, func, fill_value=None):
  2113. """
  2114. Combine the Series with a Series or scalar according to `func`.
  2115. Combine the Series and `other` using `func` to perform elementwise
  2116. selection for combined Series.
  2117. `fill_value` is assumed when value is missing at some index
  2118. from one of the two objects being combined.
  2119. Parameters
  2120. ----------
  2121. other : Series or scalar
  2122. The value(s) to be combined with the `Series`.
  2123. func : function
  2124. Function that takes two scalars as inputs and returns an element.
  2125. fill_value : scalar, optional
  2126. The value to assume when an index is missing from
  2127. one Series or the other. The default specifies to use the
  2128. appropriate NaN value for the underlying dtype of the Series.
  2129. Returns
  2130. -------
  2131. Series
  2132. The result of combining the Series with the other object.
  2133. See Also
  2134. --------
  2135. Series.combine_first : Combine Series values, choosing the calling
  2136. Series' values first.
  2137. Examples
  2138. --------
  2139. Consider 2 Datasets ``s1`` and ``s2`` containing
  2140. highest clocked speeds of different birds.
  2141. >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
  2142. >>> s1
  2143. falcon 330.0
  2144. eagle 160.0
  2145. dtype: float64
  2146. >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
  2147. >>> s2
  2148. falcon 345.0
  2149. eagle 200.0
  2150. duck 30.0
  2151. dtype: float64
  2152. Now, to combine the two datasets and view the highest speeds
  2153. of the birds across the two datasets
  2154. >>> s1.combine(s2, max)
  2155. duck NaN
  2156. eagle 200.0
  2157. falcon 345.0
  2158. dtype: float64
  2159. In the previous example, the resulting value for duck is missing,
  2160. because the maximum of a NaN and a float is a NaN.
  2161. So, in the example, we set ``fill_value=0``,
  2162. so the maximum value returned will be the value from some dataset.
  2163. >>> s1.combine(s2, max, fill_value=0)
  2164. duck 30.0
  2165. eagle 200.0
  2166. falcon 345.0
  2167. dtype: float64
  2168. """
  2169. if fill_value is None:
  2170. fill_value = na_value_for_dtype(self.dtype, compat=False)
  2171. if isinstance(other, Series):
  2172. # If other is a Series, result is based on union of Series,
  2173. # so do this element by element
  2174. new_index = self.index.union(other.index)
  2175. new_name = ops.get_op_result_name(self, other)
  2176. new_values = []
  2177. for idx in new_index:
  2178. lv = self.get(idx, fill_value)
  2179. rv = other.get(idx, fill_value)
  2180. with np.errstate(all='ignore'):
  2181. new_values.append(func(lv, rv))
  2182. else:
  2183. # Assume that other is a scalar, so apply the function for
  2184. # each element in the Series
  2185. new_index = self.index
  2186. with np.errstate(all='ignore'):
  2187. new_values = [func(lv, other) for lv in self._values]
  2188. new_name = self.name
  2189. if is_categorical_dtype(self.values):
  2190. pass
  2191. elif is_extension_array_dtype(self.values):
  2192. # The function can return something of any type, so check
  2193. # if the type is compatible with the calling EA.
  2194. try:
  2195. new_values = self._values._from_sequence(new_values)
  2196. except Exception:
  2197. # https://github.com/pandas-dev/pandas/issues/22850
  2198. # pandas has no control over what 3rd-party ExtensionArrays
  2199. # do in _values_from_sequence. We still want ops to work
  2200. # though, so we catch any regular Exception.
  2201. pass
  2202. return self._constructor(new_values, index=new_index, name=new_name)
  2203. def combine_first(self, other):
  2204. """
  2205. Combine Series values, choosing the calling Series's values first.
  2206. Parameters
  2207. ----------
  2208. other : Series
  2209. The value(s) to be combined with the `Series`.
  2210. Returns
  2211. -------
  2212. Series
  2213. The result of combining the Series with the other object.
  2214. See Also
  2215. --------
  2216. Series.combine : Perform elementwise operation on two Series
  2217. using a given function.
  2218. Notes
  2219. -----
  2220. Result index will be the union of the two indexes.
  2221. Examples
  2222. --------
  2223. >>> s1 = pd.Series([1, np.nan])
  2224. >>> s2 = pd.Series([3, 4])
  2225. >>> s1.combine_first(s2)
  2226. 0 1.0
  2227. 1 4.0
  2228. dtype: float64
  2229. """
  2230. new_index = self.index.union(other.index)
  2231. this = self.reindex(new_index, copy=False)
  2232. other = other.reindex(new_index, copy=False)
  2233. if is_datetimelike(this) and not is_datetimelike(other):
  2234. other = to_datetime(other)
  2235. return this.where(notna(this), other)
  2236. def update(self, other):
  2237. """
  2238. Modify Series in place using non-NA values from passed
  2239. Series. Aligns on index.
  2240. Parameters
  2241. ----------
  2242. other : Series
  2243. Examples
  2244. --------
  2245. >>> s = pd.Series([1, 2, 3])
  2246. >>> s.update(pd.Series([4, 5, 6]))
  2247. >>> s
  2248. 0 4
  2249. 1 5
  2250. 2 6
  2251. dtype: int64
  2252. >>> s = pd.Series(['a', 'b', 'c'])
  2253. >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
  2254. >>> s
  2255. 0 d
  2256. 1 b
  2257. 2 e
  2258. dtype: object
  2259. >>> s = pd.Series([1, 2, 3])
  2260. >>> s.update(pd.Series([4, 5, 6, 7, 8]))
  2261. >>> s
  2262. 0 4
  2263. 1 5
  2264. 2 6
  2265. dtype: int64
  2266. If ``other`` contains NaNs the corresponding values are not updated
  2267. in the original Series.
  2268. >>> s = pd.Series([1, 2, 3])
  2269. >>> s.update(pd.Series([4, np.nan, 6]))
  2270. >>> s
  2271. 0 4
  2272. 1 2
  2273. 2 6
  2274. dtype: int64
  2275. """
  2276. other = other.reindex_like(self)
  2277. mask = notna(other)
  2278. self._data = self._data.putmask(mask=mask, new=other, inplace=True)
  2279. self._maybe_update_cacher()
  2280. # ----------------------------------------------------------------------
  2281. # Reindexing, sorting
  2282. def sort_values(self, axis=0, ascending=True, inplace=False,
  2283. kind='quicksort', na_position='last'):
  2284. """
  2285. Sort by the values.
  2286. Sort a Series in ascending or descending order by some
  2287. criterion.
  2288. Parameters
  2289. ----------
  2290. axis : {0 or 'index'}, default 0
  2291. Axis to direct sorting. The value 'index' is accepted for
  2292. compatibility with DataFrame.sort_values.
  2293. ascending : bool, default True
  2294. If True, sort values in ascending order, otherwise descending.
  2295. inplace : bool, default False
  2296. If True, perform operation in-place.
  2297. kind : {'quicksort', 'mergesort' or 'heapsort'}, default 'quicksort'
  2298. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  2299. information. 'mergesort' is the only stable algorithm.
  2300. na_position : {'first' or 'last'}, default 'last'
  2301. Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
  2302. the end.
  2303. Returns
  2304. -------
  2305. Series
  2306. Series ordered by values.
  2307. See Also
  2308. --------
  2309. Series.sort_index : Sort by the Series indices.
  2310. DataFrame.sort_values : Sort DataFrame by the values along either axis.
  2311. DataFrame.sort_index : Sort DataFrame by indices.
  2312. Examples
  2313. --------
  2314. >>> s = pd.Series([np.nan, 1, 3, 10, 5])
  2315. >>> s
  2316. 0 NaN
  2317. 1 1.0
  2318. 2 3.0
  2319. 3 10.0
  2320. 4 5.0
  2321. dtype: float64
  2322. Sort values ascending order (default behaviour)
  2323. >>> s.sort_values(ascending=True)
  2324. 1 1.0
  2325. 2 3.0
  2326. 4 5.0
  2327. 3 10.0
  2328. 0 NaN
  2329. dtype: float64
  2330. Sort values descending order
  2331. >>> s.sort_values(ascending=False)
  2332. 3 10.0
  2333. 4 5.0
  2334. 2 3.0
  2335. 1 1.0
  2336. 0 NaN
  2337. dtype: float64
  2338. Sort values inplace
  2339. >>> s.sort_values(ascending=False, inplace=True)
  2340. >>> s
  2341. 3 10.0
  2342. 4 5.0
  2343. 2 3.0
  2344. 1 1.0
  2345. 0 NaN
  2346. dtype: float64
  2347. Sort values putting NAs first
  2348. >>> s.sort_values(na_position='first')
  2349. 0 NaN
  2350. 1 1.0
  2351. 2 3.0
  2352. 4 5.0
  2353. 3 10.0
  2354. dtype: float64
  2355. Sort a series of strings
  2356. >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
  2357. >>> s
  2358. 0 z
  2359. 1 b
  2360. 2 d
  2361. 3 a
  2362. 4 c
  2363. dtype: object
  2364. >>> s.sort_values()
  2365. 3 a
  2366. 1 b
  2367. 4 c
  2368. 2 d
  2369. 0 z
  2370. dtype: object
  2371. """
  2372. inplace = validate_bool_kwarg(inplace, 'inplace')
  2373. # Validate the axis parameter
  2374. self._get_axis_number(axis)
  2375. # GH 5856/5853
  2376. if inplace and self._is_cached:
  2377. raise ValueError("This Series is a view of some other array, to "
  2378. "sort in-place you must create a copy")
  2379. def _try_kind_sort(arr):
  2380. # easier to ask forgiveness than permission
  2381. try:
  2382. # if kind==mergesort, it can fail for object dtype
  2383. return arr.argsort(kind=kind)
  2384. except TypeError:
  2385. # stable sort not available for object dtype
  2386. # uses the argsort default quicksort
  2387. return arr.argsort(kind='quicksort')
  2388. arr = self._values
  2389. sortedIdx = np.empty(len(self), dtype=np.int32)
  2390. bad = isna(arr)
  2391. good = ~bad
  2392. idx = ibase.default_index(len(self))
  2393. argsorted = _try_kind_sort(arr[good])
  2394. if is_list_like(ascending):
  2395. if len(ascending) != 1:
  2396. raise ValueError('Length of ascending (%d) must be 1 '
  2397. 'for Series' % (len(ascending)))
  2398. ascending = ascending[0]
  2399. if not is_bool(ascending):
  2400. raise ValueError('ascending must be boolean')
  2401. if not ascending:
  2402. argsorted = argsorted[::-1]
  2403. if na_position == 'last':
  2404. n = good.sum()
  2405. sortedIdx[:n] = idx[good][argsorted]
  2406. sortedIdx[n:] = idx[bad]
  2407. elif na_position == 'first':
  2408. n = bad.sum()
  2409. sortedIdx[n:] = idx[good][argsorted]
  2410. sortedIdx[:n] = idx[bad]
  2411. else:
  2412. raise ValueError('invalid na_position: {!r}'.format(na_position))
  2413. result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx])
  2414. if inplace:
  2415. self._update_inplace(result)
  2416. else:
  2417. return result.__finalize__(self)
  2418. def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
  2419. kind='quicksort', na_position='last', sort_remaining=True):
  2420. """
  2421. Sort Series by index labels.
  2422. Returns a new Series sorted by label if `inplace` argument is
  2423. ``False``, otherwise updates the original series and returns None.
  2424. Parameters
  2425. ----------
  2426. axis : int, default 0
  2427. Axis to direct sorting. This can only be 0 for Series.
  2428. level : int, optional
  2429. If not None, sort on values in specified index level(s).
  2430. ascending : bool, default true
  2431. Sort ascending vs. descending.
  2432. inplace : bool, default False
  2433. If True, perform operation in-place.
  2434. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  2435. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  2436. information. 'mergesort' is the only stable algorithm. For
  2437. DataFrames, this option is only applied when sorting on a single
  2438. column or label.
  2439. na_position : {'first', 'last'}, default 'last'
  2440. If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
  2441. Not implemented for MultiIndex.
  2442. sort_remaining : bool, default True
  2443. If True and sorting by level and index is multilevel, sort by other
  2444. levels too (in order) after sorting by specified level.
  2445. Returns
  2446. -------
  2447. Series
  2448. The original Series sorted by the labels.
  2449. See Also
  2450. --------
  2451. DataFrame.sort_index: Sort DataFrame by the index.
  2452. DataFrame.sort_values: Sort DataFrame by the value.
  2453. Series.sort_values : Sort Series by the value.
  2454. Examples
  2455. --------
  2456. >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
  2457. >>> s.sort_index()
  2458. 1 c
  2459. 2 b
  2460. 3 a
  2461. 4 d
  2462. dtype: object
  2463. Sort Descending
  2464. >>> s.sort_index(ascending=False)
  2465. 4 d
  2466. 3 a
  2467. 2 b
  2468. 1 c
  2469. dtype: object
  2470. Sort Inplace
  2471. >>> s.sort_index(inplace=True)
  2472. >>> s
  2473. 1 c
  2474. 2 b
  2475. 3 a
  2476. 4 d
  2477. dtype: object
  2478. By default NaNs are put at the end, but use `na_position` to place
  2479. them at the beginning
  2480. >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
  2481. >>> s.sort_index(na_position='first')
  2482. NaN d
  2483. 1.0 c
  2484. 2.0 b
  2485. 3.0 a
  2486. dtype: object
  2487. Specify index level to sort
  2488. >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
  2489. ... 'baz', 'baz', 'bar', 'bar']),
  2490. ... np.array(['two', 'one', 'two', 'one',
  2491. ... 'two', 'one', 'two', 'one'])]
  2492. >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
  2493. >>> s.sort_index(level=1)
  2494. bar one 8
  2495. baz one 6
  2496. foo one 4
  2497. qux one 2
  2498. bar two 7
  2499. baz two 5
  2500. foo two 3
  2501. qux two 1
  2502. dtype: int64
  2503. Does not sort by remaining levels when sorting by levels
  2504. >>> s.sort_index(level=1, sort_remaining=False)
  2505. qux one 2
  2506. foo one 4
  2507. baz one 6
  2508. bar one 8
  2509. qux two 1
  2510. foo two 3
  2511. baz two 5
  2512. bar two 7
  2513. dtype: int64
  2514. """
  2515. # TODO: this can be combined with DataFrame.sort_index impl as
  2516. # almost identical
  2517. inplace = validate_bool_kwarg(inplace, 'inplace')
  2518. # Validate the axis parameter
  2519. self._get_axis_number(axis)
  2520. index = self.index
  2521. if level is not None:
  2522. new_index, indexer = index.sortlevel(level, ascending=ascending,
  2523. sort_remaining=sort_remaining)
  2524. elif isinstance(index, MultiIndex):
  2525. from pandas.core.sorting import lexsort_indexer
  2526. labels = index._sort_levels_monotonic()
  2527. indexer = lexsort_indexer(labels._get_codes_for_sorting(),
  2528. orders=ascending,
  2529. na_position=na_position)
  2530. else:
  2531. from pandas.core.sorting import nargsort
  2532. # Check monotonic-ness before sort an index
  2533. # GH11080
  2534. if ((ascending and index.is_monotonic_increasing) or
  2535. (not ascending and index.is_monotonic_decreasing)):
  2536. if inplace:
  2537. return
  2538. else:
  2539. return self.copy()
  2540. indexer = nargsort(index, kind=kind, ascending=ascending,
  2541. na_position=na_position)
  2542. indexer = ensure_platform_int(indexer)
  2543. new_index = index.take(indexer)
  2544. new_index = new_index._sort_levels_monotonic()
  2545. new_values = self._values.take(indexer)
  2546. result = self._constructor(new_values, index=new_index)
  2547. if inplace:
  2548. self._update_inplace(result)
  2549. else:
  2550. return result.__finalize__(self)
  2551. def argsort(self, axis=0, kind='quicksort', order=None):
  2552. """
  2553. Override ndarray.argsort. Argsorts the value, omitting NA/null values,
  2554. and places the result in the same locations as the non-NA values.
  2555. Parameters
  2556. ----------
  2557. axis : int
  2558. Has no effect but is accepted for compatibility with numpy.
  2559. kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
  2560. Choice of sorting algorithm. See np.sort for more
  2561. information. 'mergesort' is the only stable algorithm
  2562. order : None
  2563. Has no effect but is accepted for compatibility with numpy.
  2564. Returns
  2565. -------
  2566. Series
  2567. Positions of values within the sort order with -1 indicating
  2568. nan values.
  2569. See Also
  2570. --------
  2571. numpy.ndarray.argsort
  2572. """
  2573. values = self._values
  2574. mask = isna(values)
  2575. if mask.any():
  2576. result = Series(-1, index=self.index, name=self.name,
  2577. dtype='int64')
  2578. notmask = ~mask
  2579. result[notmask] = np.argsort(values[notmask], kind=kind)
  2580. return self._constructor(result,
  2581. index=self.index).__finalize__(self)
  2582. else:
  2583. return self._constructor(
  2584. np.argsort(values, kind=kind), index=self.index,
  2585. dtype='int64').__finalize__(self)
  2586. def nlargest(self, n=5, keep='first'):
  2587. """
  2588. Return the largest `n` elements.
  2589. Parameters
  2590. ----------
  2591. n : int, default 5
  2592. Return this many descending sorted values.
  2593. keep : {'first', 'last', 'all'}, default 'first'
  2594. When there are duplicate values that cannot all fit in a
  2595. Series of `n` elements:
  2596. - ``first`` : return the first `n` occurrences in order
  2597. of appearance.
  2598. - ``last`` : return the last `n` occurrences in reverse
  2599. order of appearance.
  2600. - ``all`` : keep all occurrences. This can result in a Series of
  2601. size larger than `n`.
  2602. Returns
  2603. -------
  2604. Series
  2605. The `n` largest values in the Series, sorted in decreasing order.
  2606. See Also
  2607. --------
  2608. Series.nsmallest: Get the `n` smallest elements.
  2609. Series.sort_values: Sort Series by values.
  2610. Series.head: Return the first `n` rows.
  2611. Notes
  2612. -----
  2613. Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
  2614. relative to the size of the ``Series`` object.
  2615. Examples
  2616. --------
  2617. >>> countries_population = {"Italy": 59000000, "France": 65000000,
  2618. ... "Malta": 434000, "Maldives": 434000,
  2619. ... "Brunei": 434000, "Iceland": 337000,
  2620. ... "Nauru": 11300, "Tuvalu": 11300,
  2621. ... "Anguilla": 11300, "Monserat": 5200}
  2622. >>> s = pd.Series(countries_population)
  2623. >>> s
  2624. Italy 59000000
  2625. France 65000000
  2626. Malta 434000
  2627. Maldives 434000
  2628. Brunei 434000
  2629. Iceland 337000
  2630. Nauru 11300
  2631. Tuvalu 11300
  2632. Anguilla 11300
  2633. Monserat 5200
  2634. dtype: int64
  2635. The `n` largest elements where ``n=5`` by default.
  2636. >>> s.nlargest()
  2637. France 65000000
  2638. Italy 59000000
  2639. Malta 434000
  2640. Maldives 434000
  2641. Brunei 434000
  2642. dtype: int64
  2643. The `n` largest elements where ``n=3``. Default `keep` value is 'first'
  2644. so Malta will be kept.
  2645. >>> s.nlargest(3)
  2646. France 65000000
  2647. Italy 59000000
  2648. Malta 434000
  2649. dtype: int64
  2650. The `n` largest elements where ``n=3`` and keeping the last duplicates.
  2651. Brunei will be kept since it is the last with value 434000 based on
  2652. the index order.
  2653. >>> s.nlargest(3, keep='last')
  2654. France 65000000
  2655. Italy 59000000
  2656. Brunei 434000
  2657. dtype: int64
  2658. The `n` largest elements where ``n=3`` with all duplicates kept. Note
  2659. that the returned Series has five elements due to the three duplicates.
  2660. >>> s.nlargest(3, keep='all')
  2661. France 65000000
  2662. Italy 59000000
  2663. Malta 434000
  2664. Maldives 434000
  2665. Brunei 434000
  2666. dtype: int64
  2667. """
  2668. return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
  2669. def nsmallest(self, n=5, keep='first'):
  2670. """
  2671. Return the smallest `n` elements.
  2672. Parameters
  2673. ----------
  2674. n : int, default 5
  2675. Return this many ascending sorted values.
  2676. keep : {'first', 'last', 'all'}, default 'first'
  2677. When there are duplicate values that cannot all fit in a
  2678. Series of `n` elements:
  2679. - ``first`` : return the first `n` occurrences in order
  2680. of appearance.
  2681. - ``last`` : return the last `n` occurrences in reverse
  2682. order of appearance.
  2683. - ``all`` : keep all occurrences. This can result in a Series of
  2684. size larger than `n`.
  2685. Returns
  2686. -------
  2687. Series
  2688. The `n` smallest values in the Series, sorted in increasing order.
  2689. See Also
  2690. --------
  2691. Series.nlargest: Get the `n` largest elements.
  2692. Series.sort_values: Sort Series by values.
  2693. Series.head: Return the first `n` rows.
  2694. Notes
  2695. -----
  2696. Faster than ``.sort_values().head(n)`` for small `n` relative to
  2697. the size of the ``Series`` object.
  2698. Examples
  2699. --------
  2700. >>> countries_population = {"Italy": 59000000, "France": 65000000,
  2701. ... "Brunei": 434000, "Malta": 434000,
  2702. ... "Maldives": 434000, "Iceland": 337000,
  2703. ... "Nauru": 11300, "Tuvalu": 11300,
  2704. ... "Anguilla": 11300, "Monserat": 5200}
  2705. >>> s = pd.Series(countries_population)
  2706. >>> s
  2707. Italy 59000000
  2708. France 65000000
  2709. Brunei 434000
  2710. Malta 434000
  2711. Maldives 434000
  2712. Iceland 337000
  2713. Nauru 11300
  2714. Tuvalu 11300
  2715. Anguilla 11300
  2716. Monserat 5200
  2717. dtype: int64
  2718. The `n` smallest elements where ``n=5`` by default.
  2719. >>> s.nsmallest()
  2720. Monserat 5200
  2721. Nauru 11300
  2722. Tuvalu 11300
  2723. Anguilla 11300
  2724. Iceland 337000
  2725. dtype: int64
  2726. The `n` smallest elements where ``n=3``. Default `keep` value is
  2727. 'first' so Nauru and Tuvalu will be kept.
  2728. >>> s.nsmallest(3)
  2729. Monserat 5200
  2730. Nauru 11300
  2731. Tuvalu 11300
  2732. dtype: int64
  2733. The `n` smallest elements where ``n=3`` and keeping the last
  2734. duplicates. Anguilla and Tuvalu will be kept since they are the last
  2735. with value 11300 based on the index order.
  2736. >>> s.nsmallest(3, keep='last')
  2737. Monserat 5200
  2738. Anguilla 11300
  2739. Tuvalu 11300
  2740. dtype: int64
  2741. The `n` smallest elements where ``n=3`` with all duplicates kept. Note
  2742. that the returned Series has four elements due to the three duplicates.
  2743. >>> s.nsmallest(3, keep='all')
  2744. Monserat 5200
  2745. Nauru 11300
  2746. Tuvalu 11300
  2747. Anguilla 11300
  2748. dtype: int64
  2749. """
  2750. return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
  2751. def swaplevel(self, i=-2, j=-1, copy=True):
  2752. """
  2753. Swap levels i and j in a MultiIndex.
  2754. Parameters
  2755. ----------
  2756. i, j : int, str (can be mixed)
  2757. Level of index to be swapped. Can pass level name as string.
  2758. Returns
  2759. -------
  2760. Series
  2761. Series with levels swapped in MultiIndex.
  2762. .. versionchanged:: 0.18.1
  2763. The indexes ``i`` and ``j`` are now optional, and default to
  2764. the two innermost levels of the index.
  2765. """
  2766. new_index = self.index.swaplevel(i, j)
  2767. return self._constructor(self._values, index=new_index,
  2768. copy=copy).__finalize__(self)
  2769. def reorder_levels(self, order):
  2770. """
  2771. Rearrange index levels using input order.
  2772. May not drop or duplicate levels.
  2773. Parameters
  2774. ----------
  2775. order : list of int representing new level order
  2776. (reference level by number or key)
  2777. Returns
  2778. -------
  2779. type of caller (new object)
  2780. """
  2781. if not isinstance(self.index, MultiIndex): # pragma: no cover
  2782. raise Exception('Can only reorder levels on a hierarchical axis.')
  2783. result = self.copy()
  2784. result.index = result.index.reorder_levels(order)
  2785. return result
  2786. def unstack(self, level=-1, fill_value=None):
  2787. """
  2788. Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
  2789. The level involved will automatically get sorted.
  2790. Parameters
  2791. ----------
  2792. level : int, str, or list of these, default last level
  2793. Level(s) to unstack, can pass level name.
  2794. fill_value : scalar value, default None
  2795. Value to use when replacing NaN values.
  2796. .. versionadded:: 0.18.0
  2797. Returns
  2798. -------
  2799. DataFrame
  2800. Unstacked Series.
  2801. Examples
  2802. --------
  2803. >>> s = pd.Series([1, 2, 3, 4],
  2804. ... index=pd.MultiIndex.from_product([['one', 'two'],
  2805. ... ['a', 'b']]))
  2806. >>> s
  2807. one a 1
  2808. b 2
  2809. two a 3
  2810. b 4
  2811. dtype: int64
  2812. >>> s.unstack(level=-1)
  2813. a b
  2814. one 1 2
  2815. two 3 4
  2816. >>> s.unstack(level=0)
  2817. one two
  2818. a 1 3
  2819. b 2 4
  2820. """
  2821. from pandas.core.reshape.reshape import unstack
  2822. return unstack(self, level, fill_value)
  2823. # ----------------------------------------------------------------------
  2824. # function application
  2825. def map(self, arg, na_action=None):
  2826. """
  2827. Map values of Series according to input correspondence.
  2828. Used for substituting each value in a Series with another value,
  2829. that may be derived from a function, a ``dict`` or
  2830. a :class:`Series`.
  2831. Parameters
  2832. ----------
  2833. arg : function, dict, or Series
  2834. Mapping correspondence.
  2835. na_action : {None, 'ignore'}, default None
  2836. If 'ignore', propagate NaN values, without passing them to the
  2837. mapping correspondence.
  2838. Returns
  2839. -------
  2840. Series
  2841. Same index as caller.
  2842. See Also
  2843. --------
  2844. Series.apply : For applying more complex functions on a Series.
  2845. DataFrame.apply : Apply a function row-/column-wise.
  2846. DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
  2847. Notes
  2848. -----
  2849. When ``arg`` is a dictionary, values in Series that are not in the
  2850. dictionary (as keys) are converted to ``NaN``. However, if the
  2851. dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
  2852. provides a method for default values), then this default is used
  2853. rather than ``NaN``.
  2854. Examples
  2855. --------
  2856. >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
  2857. >>> s
  2858. 0 cat
  2859. 1 dog
  2860. 2 NaN
  2861. 3 rabbit
  2862. dtype: object
  2863. ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
  2864. in the ``dict`` are converted to ``NaN``, unless the dict has a default
  2865. value (e.g. ``defaultdict``):
  2866. >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
  2867. 0 kitten
  2868. 1 puppy
  2869. 2 NaN
  2870. 3 NaN
  2871. dtype: object
  2872. It also accepts a function:
  2873. >>> s.map('I am a {}'.format)
  2874. 0 I am a cat
  2875. 1 I am a dog
  2876. 2 I am a nan
  2877. 3 I am a rabbit
  2878. dtype: object
  2879. To avoid applying the function to missing values (and keep them as
  2880. ``NaN``) ``na_action='ignore'`` can be used:
  2881. >>> s.map('I am a {}'.format, na_action='ignore')
  2882. 0 I am a cat
  2883. 1 I am a dog
  2884. 2 NaN
  2885. 3 I am a rabbit
  2886. dtype: object
  2887. """
  2888. new_values = super(Series, self)._map_values(
  2889. arg, na_action=na_action)
  2890. return self._constructor(new_values,
  2891. index=self.index).__finalize__(self)
  2892. def _gotitem(self, key, ndim, subset=None):
  2893. """
  2894. Sub-classes to define. Return a sliced object.
  2895. Parameters
  2896. ----------
  2897. key : string / list of selections
  2898. ndim : 1,2
  2899. requested ndim of result
  2900. subset : object, default None
  2901. subset to act on
  2902. """
  2903. return self
  2904. _agg_see_also_doc = dedent("""
  2905. See Also
  2906. --------
  2907. Series.apply : Invoke function on a Series.
  2908. Series.transform : Transform function producing a Series with like indexes.
  2909. """)
  2910. _agg_examples_doc = dedent("""
  2911. Examples
  2912. --------
  2913. >>> s = pd.Series([1, 2, 3, 4])
  2914. >>> s
  2915. 0 1
  2916. 1 2
  2917. 2 3
  2918. 3 4
  2919. dtype: int64
  2920. >>> s.agg('min')
  2921. 1
  2922. >>> s.agg(['min', 'max'])
  2923. min 1
  2924. max 4
  2925. dtype: int64
  2926. """)
  2927. @Substitution(see_also=_agg_see_also_doc,
  2928. examples=_agg_examples_doc,
  2929. versionadded='.. versionadded:: 0.20.0',
  2930. **_shared_doc_kwargs)
  2931. @Appender(generic._shared_docs['aggregate'])
  2932. def aggregate(self, func, axis=0, *args, **kwargs):
  2933. # Validate the axis parameter
  2934. self._get_axis_number(axis)
  2935. result, how = self._aggregate(func, *args, **kwargs)
  2936. if result is None:
  2937. # we can be called from an inner function which
  2938. # passes this meta-data
  2939. kwargs.pop('_axis', None)
  2940. kwargs.pop('_level', None)
  2941. # try a regular apply, this evaluates lambdas
  2942. # row-by-row; however if the lambda is expected a Series
  2943. # expression, e.g.: lambda x: x-x.quantile(0.25)
  2944. # this will fail, so we can try a vectorized evaluation
  2945. # we cannot FIRST try the vectorized evaluation, because
  2946. # then .agg and .apply would have different semantics if the
  2947. # operation is actually defined on the Series, e.g. str
  2948. try:
  2949. result = self.apply(func, *args, **kwargs)
  2950. except (ValueError, AttributeError, TypeError):
  2951. result = func(self, *args, **kwargs)
  2952. return result
  2953. agg = aggregate
  2954. @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs)
  2955. def transform(self, func, axis=0, *args, **kwargs):
  2956. # Validate the axis parameter
  2957. self._get_axis_number(axis)
  2958. return super(Series, self).transform(func, *args, **kwargs)
  2959. def apply(self, func, convert_dtype=True, args=(), **kwds):
  2960. """
  2961. Invoke function on values of Series.
  2962. Can be ufunc (a NumPy function that applies to the entire Series)
  2963. or a Python function that only works on single values.
  2964. Parameters
  2965. ----------
  2966. func : function
  2967. Python function or NumPy ufunc to apply.
  2968. convert_dtype : bool, default True
  2969. Try to find better dtype for elementwise function results. If
  2970. False, leave as dtype=object.
  2971. args : tuple
  2972. Positional arguments passed to func after the series value.
  2973. **kwds
  2974. Additional keyword arguments passed to func.
  2975. Returns
  2976. -------
  2977. Series or DataFrame
  2978. If func returns a Series object the result will be a DataFrame.
  2979. See Also
  2980. --------
  2981. Series.map: For element-wise operations.
  2982. Series.agg: Only perform aggregating type operations.
  2983. Series.transform: Only perform transforming type operations.
  2984. Examples
  2985. --------
  2986. Create a series with typical summer temperatures for each city.
  2987. >>> s = pd.Series([20, 21, 12],
  2988. ... index=['London', 'New York', 'Helsinki'])
  2989. >>> s
  2990. London 20
  2991. New York 21
  2992. Helsinki 12
  2993. dtype: int64
  2994. Square the values by defining a function and passing it as an
  2995. argument to ``apply()``.
  2996. >>> def square(x):
  2997. ... return x ** 2
  2998. >>> s.apply(square)
  2999. London 400
  3000. New York 441
  3001. Helsinki 144
  3002. dtype: int64
  3003. Square the values by passing an anonymous function as an
  3004. argument to ``apply()``.
  3005. >>> s.apply(lambda x: x ** 2)
  3006. London 400
  3007. New York 441
  3008. Helsinki 144
  3009. dtype: int64
  3010. Define a custom function that needs additional positional
  3011. arguments and pass these additional arguments using the
  3012. ``args`` keyword.
  3013. >>> def subtract_custom_value(x, custom_value):
  3014. ... return x - custom_value
  3015. >>> s.apply(subtract_custom_value, args=(5,))
  3016. London 15
  3017. New York 16
  3018. Helsinki 7
  3019. dtype: int64
  3020. Define a custom function that takes keyword arguments
  3021. and pass these arguments to ``apply``.
  3022. >>> def add_custom_values(x, **kwargs):
  3023. ... for month in kwargs:
  3024. ... x += kwargs[month]
  3025. ... return x
  3026. >>> s.apply(add_custom_values, june=30, july=20, august=25)
  3027. London 95
  3028. New York 96
  3029. Helsinki 87
  3030. dtype: int64
  3031. Use a function from the Numpy library.
  3032. >>> s.apply(np.log)
  3033. London 2.995732
  3034. New York 3.044522
  3035. Helsinki 2.484907
  3036. dtype: float64
  3037. """
  3038. if len(self) == 0:
  3039. return self._constructor(dtype=self.dtype,
  3040. index=self.index).__finalize__(self)
  3041. # dispatch to agg
  3042. if isinstance(func, (list, dict)):
  3043. return self.aggregate(func, *args, **kwds)
  3044. # if we are a string, try to dispatch
  3045. if isinstance(func, compat.string_types):
  3046. return self._try_aggregate_string_function(func, *args, **kwds)
  3047. # handle ufuncs and lambdas
  3048. if kwds or args and not isinstance(func, np.ufunc):
  3049. def f(x):
  3050. return func(x, *args, **kwds)
  3051. else:
  3052. f = func
  3053. with np.errstate(all='ignore'):
  3054. if isinstance(f, np.ufunc):
  3055. return f(self)
  3056. # row-wise access
  3057. if is_extension_type(self.dtype):
  3058. mapped = self._values.map(f)
  3059. else:
  3060. values = self.astype(object).values
  3061. mapped = lib.map_infer(values, f, convert=convert_dtype)
  3062. if len(mapped) and isinstance(mapped[0], Series):
  3063. from pandas.core.frame import DataFrame
  3064. return DataFrame(mapped.tolist(), index=self.index)
  3065. else:
  3066. return self._constructor(mapped,
  3067. index=self.index).__finalize__(self)
  3068. def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
  3069. filter_type=None, **kwds):
  3070. """
  3071. Perform a reduction operation.
  3072. If we have an ndarray as a value, then simply perform the operation,
  3073. otherwise delegate to the object.
  3074. """
  3075. delegate = self._values
  3076. if axis is not None:
  3077. self._get_axis_number(axis)
  3078. if isinstance(delegate, Categorical):
  3079. # TODO deprecate numeric_only argument for Categorical and use
  3080. # skipna as well, see GH25303
  3081. return delegate._reduce(name, numeric_only=numeric_only, **kwds)
  3082. elif isinstance(delegate, ExtensionArray):
  3083. # dispatch to ExtensionArray interface
  3084. return delegate._reduce(name, skipna=skipna, **kwds)
  3085. elif is_datetime64_dtype(delegate):
  3086. # use DatetimeIndex implementation to handle skipna correctly
  3087. delegate = DatetimeIndex(delegate)
  3088. # dispatch to numpy arrays
  3089. elif isinstance(delegate, np.ndarray):
  3090. if numeric_only:
  3091. raise NotImplementedError('Series.{0} does not implement '
  3092. 'numeric_only.'.format(name))
  3093. with np.errstate(all='ignore'):
  3094. return op(delegate, skipna=skipna, **kwds)
  3095. # TODO(EA) dispatch to Index
  3096. # remove once all internals extension types are
  3097. # moved to ExtensionArrays
  3098. return delegate._reduce(op=op, name=name, axis=axis, skipna=skipna,
  3099. numeric_only=numeric_only,
  3100. filter_type=filter_type, **kwds)
  3101. def _reindex_indexer(self, new_index, indexer, copy):
  3102. if indexer is None:
  3103. if copy:
  3104. return self.copy()
  3105. return self
  3106. new_values = algorithms.take_1d(self._values, indexer,
  3107. allow_fill=True, fill_value=None)
  3108. return self._constructor(new_values, index=new_index)
  3109. def _needs_reindex_multi(self, axes, method, level):
  3110. """
  3111. Check if we do need a multi reindex; this is for compat with
  3112. higher dims.
  3113. """
  3114. return False
  3115. @Appender(generic._shared_docs['align'] % _shared_doc_kwargs)
  3116. def align(self, other, join='outer', axis=None, level=None, copy=True,
  3117. fill_value=None, method=None, limit=None, fill_axis=0,
  3118. broadcast_axis=None):
  3119. return super(Series, self).align(other, join=join, axis=axis,
  3120. level=level, copy=copy,
  3121. fill_value=fill_value, method=method,
  3122. limit=limit, fill_axis=fill_axis,
  3123. broadcast_axis=broadcast_axis)
  3124. def rename(self, index=None, **kwargs):
  3125. """
  3126. Alter Series index labels or name.
  3127. Function / dict values must be unique (1-to-1). Labels not contained in
  3128. a dict / Series will be left as-is. Extra labels listed don't throw an
  3129. error.
  3130. Alternatively, change ``Series.name`` with a scalar value.
  3131. See the :ref:`user guide <basics.rename>` for more.
  3132. Parameters
  3133. ----------
  3134. index : scalar, hashable sequence, dict-like or function, optional
  3135. dict-like or functions are transformations to apply to
  3136. the index.
  3137. Scalar or hashable sequence-like will alter the ``Series.name``
  3138. attribute.
  3139. copy : bool, default True
  3140. Whether to copy underlying data.
  3141. inplace : bool, default False
  3142. Whether to return a new Series. If True then value of copy is
  3143. ignored.
  3144. level : int or level name, default None
  3145. In case of a MultiIndex, only rename labels in the specified
  3146. level.
  3147. Returns
  3148. -------
  3149. Series
  3150. Series with index labels or name altered.
  3151. See Also
  3152. --------
  3153. Series.rename_axis : Set the name of the axis.
  3154. Examples
  3155. --------
  3156. >>> s = pd.Series([1, 2, 3])
  3157. >>> s
  3158. 0 1
  3159. 1 2
  3160. 2 3
  3161. dtype: int64
  3162. >>> s.rename("my_name") # scalar, changes Series.name
  3163. 0 1
  3164. 1 2
  3165. 2 3
  3166. Name: my_name, dtype: int64
  3167. >>> s.rename(lambda x: x ** 2) # function, changes labels
  3168. 0 1
  3169. 1 2
  3170. 4 3
  3171. dtype: int64
  3172. >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
  3173. 0 1
  3174. 3 2
  3175. 5 3
  3176. dtype: int64
  3177. """
  3178. kwargs['inplace'] = validate_bool_kwarg(kwargs.get('inplace', False),
  3179. 'inplace')
  3180. non_mapping = is_scalar(index) or (is_list_like(index) and
  3181. not is_dict_like(index))
  3182. if non_mapping:
  3183. return self._set_name(index, inplace=kwargs.get('inplace'))
  3184. return super(Series, self).rename(index=index, **kwargs)
  3185. @Substitution(**_shared_doc_kwargs)
  3186. @Appender(generic.NDFrame.reindex.__doc__)
  3187. def reindex(self, index=None, **kwargs):
  3188. return super(Series, self).reindex(index=index, **kwargs)
  3189. def drop(self, labels=None, axis=0, index=None, columns=None,
  3190. level=None, inplace=False, errors='raise'):
  3191. """
  3192. Return Series with specified index labels removed.
  3193. Remove elements of a Series based on specifying the index labels.
  3194. When using a multi-index, labels on different levels can be removed
  3195. by specifying the level.
  3196. Parameters
  3197. ----------
  3198. labels : single label or list-like
  3199. Index labels to drop.
  3200. axis : 0, default 0
  3201. Redundant for application on Series.
  3202. index, columns : None
  3203. Redundant for application on Series, but index can be used instead
  3204. of labels.
  3205. .. versionadded:: 0.21.0
  3206. level : int or level name, optional
  3207. For MultiIndex, level for which the labels will be removed.
  3208. inplace : bool, default False
  3209. If True, do operation inplace and return None.
  3210. errors : {'ignore', 'raise'}, default 'raise'
  3211. If 'ignore', suppress error and only existing labels are dropped.
  3212. Returns
  3213. -------
  3214. Series
  3215. Series with specified index labels removed.
  3216. Raises
  3217. ------
  3218. KeyError
  3219. If none of the labels are found in the index.
  3220. See Also
  3221. --------
  3222. Series.reindex : Return only specified index labels of Series.
  3223. Series.dropna : Return series without null values.
  3224. Series.drop_duplicates : Return Series with duplicate values removed.
  3225. DataFrame.drop : Drop specified labels from rows or columns.
  3226. Examples
  3227. --------
  3228. >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
  3229. >>> s
  3230. A 0
  3231. B 1
  3232. C 2
  3233. dtype: int64
  3234. Drop labels B en C
  3235. >>> s.drop(labels=['B', 'C'])
  3236. A 0
  3237. dtype: int64
  3238. Drop 2nd level label in MultiIndex Series
  3239. >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
  3240. ... ['speed', 'weight', 'length']],
  3241. ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
  3242. ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
  3243. >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
  3244. ... index=midx)
  3245. >>> s
  3246. lama speed 45.0
  3247. weight 200.0
  3248. length 1.2
  3249. cow speed 30.0
  3250. weight 250.0
  3251. length 1.5
  3252. falcon speed 320.0
  3253. weight 1.0
  3254. length 0.3
  3255. dtype: float64
  3256. >>> s.drop(labels='weight', level=1)
  3257. lama speed 45.0
  3258. length 1.2
  3259. cow speed 30.0
  3260. length 1.5
  3261. falcon speed 320.0
  3262. length 0.3
  3263. dtype: float64
  3264. """
  3265. return super(Series, self).drop(labels=labels, axis=axis, index=index,
  3266. columns=columns, level=level,
  3267. inplace=inplace, errors=errors)
  3268. @Substitution(**_shared_doc_kwargs)
  3269. @Appender(generic.NDFrame.fillna.__doc__)
  3270. def fillna(self, value=None, method=None, axis=None, inplace=False,
  3271. limit=None, downcast=None, **kwargs):
  3272. return super(Series, self).fillna(value=value, method=method,
  3273. axis=axis, inplace=inplace,
  3274. limit=limit, downcast=downcast,
  3275. **kwargs)
  3276. @Appender(generic._shared_docs['replace'] % _shared_doc_kwargs)
  3277. def replace(self, to_replace=None, value=None, inplace=False, limit=None,
  3278. regex=False, method='pad'):
  3279. return super(Series, self).replace(to_replace=to_replace, value=value,
  3280. inplace=inplace, limit=limit,
  3281. regex=regex, method=method)
  3282. @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
  3283. def shift(self, periods=1, freq=None, axis=0, fill_value=None):
  3284. return super(Series, self).shift(periods=periods, freq=freq, axis=axis,
  3285. fill_value=fill_value)
  3286. def reindex_axis(self, labels, axis=0, **kwargs):
  3287. """
  3288. Conform Series to new index with optional filling logic.
  3289. .. deprecated:: 0.21.0
  3290. Use ``Series.reindex`` instead.
  3291. """
  3292. # for compatibility with higher dims
  3293. if axis != 0:
  3294. raise ValueError("cannot reindex series on non-zero axis!")
  3295. msg = ("'.reindex_axis' is deprecated and will be removed in a future "
  3296. "version. Use '.reindex' instead.")
  3297. warnings.warn(msg, FutureWarning, stacklevel=2)
  3298. return self.reindex(index=labels, **kwargs)
  3299. def memory_usage(self, index=True, deep=False):
  3300. """
  3301. Return the memory usage of the Series.
  3302. The memory usage can optionally include the contribution of
  3303. the index and of elements of `object` dtype.
  3304. Parameters
  3305. ----------
  3306. index : bool, default True
  3307. Specifies whether to include the memory usage of the Series index.
  3308. deep : bool, default False
  3309. If True, introspect the data deeply by interrogating
  3310. `object` dtypes for system-level memory consumption, and include
  3311. it in the returned value.
  3312. Returns
  3313. -------
  3314. int
  3315. Bytes of memory consumed.
  3316. See Also
  3317. --------
  3318. numpy.ndarray.nbytes : Total bytes consumed by the elements of the
  3319. array.
  3320. DataFrame.memory_usage : Bytes consumed by a DataFrame.
  3321. Examples
  3322. --------
  3323. >>> s = pd.Series(range(3))
  3324. >>> s.memory_usage()
  3325. 104
  3326. Not including the index gives the size of the rest of the data, which
  3327. is necessarily smaller:
  3328. >>> s.memory_usage(index=False)
  3329. 24
  3330. The memory footprint of `object` values is ignored by default:
  3331. >>> s = pd.Series(["a", "b"])
  3332. >>> s.values
  3333. array(['a', 'b'], dtype=object)
  3334. >>> s.memory_usage()
  3335. 96
  3336. >>> s.memory_usage(deep=True)
  3337. 212
  3338. """
  3339. v = super(Series, self).memory_usage(deep=deep)
  3340. if index:
  3341. v += self.index.memory_usage(deep=deep)
  3342. return v
  3343. @Appender(generic.NDFrame._take.__doc__)
  3344. def _take(self, indices, axis=0, is_copy=False):
  3345. indices = ensure_platform_int(indices)
  3346. new_index = self.index.take(indices)
  3347. if is_categorical_dtype(self):
  3348. # https://github.com/pandas-dev/pandas/issues/20664
  3349. # TODO: remove when the default Categorical.take behavior changes
  3350. indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
  3351. kwargs = {'allow_fill': False}
  3352. else:
  3353. kwargs = {}
  3354. new_values = self._values.take(indices, **kwargs)
  3355. result = (self._constructor(new_values, index=new_index,
  3356. fastpath=True).__finalize__(self))
  3357. # Maybe set copy if we didn't actually change the index.
  3358. if is_copy:
  3359. if not result._get_axis(axis).equals(self._get_axis(axis)):
  3360. result._set_is_copy(self)
  3361. return result
  3362. def isin(self, values):
  3363. """
  3364. Check whether `values` are contained in Series.
  3365. Return a boolean Series showing whether each element in the Series
  3366. matches an element in the passed sequence of `values` exactly.
  3367. Parameters
  3368. ----------
  3369. values : set or list-like
  3370. The sequence of values to test. Passing in a single string will
  3371. raise a ``TypeError``. Instead, turn a single string into a
  3372. list of one element.
  3373. .. versionadded:: 0.18.1
  3374. Support for values as a set.
  3375. Returns
  3376. -------
  3377. Series
  3378. Series of booleans indicating if each element is in values.
  3379. Raises
  3380. ------
  3381. TypeError
  3382. * If `values` is a string
  3383. See Also
  3384. --------
  3385. DataFrame.isin : Equivalent method on DataFrame.
  3386. Examples
  3387. --------
  3388. >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
  3389. ... 'hippo'], name='animal')
  3390. >>> s.isin(['cow', 'lama'])
  3391. 0 True
  3392. 1 True
  3393. 2 True
  3394. 3 False
  3395. 4 True
  3396. 5 False
  3397. Name: animal, dtype: bool
  3398. Passing a single string as ``s.isin('lama')`` will raise an error. Use
  3399. a list of one element instead:
  3400. >>> s.isin(['lama'])
  3401. 0 True
  3402. 1 False
  3403. 2 True
  3404. 3 False
  3405. 4 True
  3406. 5 False
  3407. Name: animal, dtype: bool
  3408. """
  3409. result = algorithms.isin(self, values)
  3410. return self._constructor(result, index=self.index).__finalize__(self)
  3411. def between(self, left, right, inclusive=True):
  3412. """
  3413. Return boolean Series equivalent to left <= series <= right.
  3414. This function returns a boolean vector containing `True` wherever the
  3415. corresponding Series element is between the boundary values `left` and
  3416. `right`. NA values are treated as `False`.
  3417. Parameters
  3418. ----------
  3419. left : scalar
  3420. Left boundary.
  3421. right : scalar
  3422. Right boundary.
  3423. inclusive : bool, default True
  3424. Include boundaries.
  3425. Returns
  3426. -------
  3427. Series
  3428. Series representing whether each element is between left and
  3429. right (inclusive).
  3430. See Also
  3431. --------
  3432. Series.gt : Greater than of series and other.
  3433. Series.lt : Less than of series and other.
  3434. Notes
  3435. -----
  3436. This function is equivalent to ``(left <= ser) & (ser <= right)``
  3437. Examples
  3438. --------
  3439. >>> s = pd.Series([2, 0, 4, 8, np.nan])
  3440. Boundary values are included by default:
  3441. >>> s.between(1, 4)
  3442. 0 True
  3443. 1 False
  3444. 2 True
  3445. 3 False
  3446. 4 False
  3447. dtype: bool
  3448. With `inclusive` set to ``False`` boundary values are excluded:
  3449. >>> s.between(1, 4, inclusive=False)
  3450. 0 True
  3451. 1 False
  3452. 2 False
  3453. 3 False
  3454. 4 False
  3455. dtype: bool
  3456. `left` and `right` can be any scalar value:
  3457. >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
  3458. >>> s.between('Anna', 'Daniel')
  3459. 0 False
  3460. 1 True
  3461. 2 True
  3462. 3 False
  3463. dtype: bool
  3464. """
  3465. if inclusive:
  3466. lmask = self >= left
  3467. rmask = self <= right
  3468. else:
  3469. lmask = self > left
  3470. rmask = self < right
  3471. return lmask & rmask
  3472. @classmethod
  3473. def from_csv(cls, path, sep=',', parse_dates=True, header=None,
  3474. index_col=0, encoding=None, infer_datetime_format=False):
  3475. """
  3476. Read CSV file.
  3477. .. deprecated:: 0.21.0
  3478. Use :func:`pandas.read_csv` instead.
  3479. It is preferable to use the more powerful :func:`pandas.read_csv`
  3480. for most general purposes, but ``from_csv`` makes for an easy
  3481. roundtrip to and from a file (the exact counterpart of
  3482. ``to_csv``), especially with a time Series.
  3483. This method only differs from :func:`pandas.read_csv` in some defaults:
  3484. - `index_col` is ``0`` instead of ``None`` (take first column as index
  3485. by default)
  3486. - `header` is ``None`` instead of ``0`` (the first row is not used as
  3487. the column names)
  3488. - `parse_dates` is ``True`` instead of ``False`` (try parsing the index
  3489. as datetime by default)
  3490. With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used
  3491. to return a Series like ``from_csv``.
  3492. Parameters
  3493. ----------
  3494. path : str, file path, or file handle / StringIO
  3495. sep : str, default ','
  3496. Field delimiter.
  3497. parse_dates : bool, default True
  3498. Parse dates. Different default from read_table.
  3499. header : int, default None
  3500. Row to use as header (skip prior rows).
  3501. index_col : int or sequence, default 0
  3502. Column to use for index. If a sequence is given, a MultiIndex
  3503. is used. Different default from read_table.
  3504. encoding : str, optional
  3505. A string representing the encoding to use if the contents are
  3506. non-ascii, for python versions prior to 3.
  3507. infer_datetime_format : bool, default False
  3508. If True and `parse_dates` is True for a column, try to infer the
  3509. datetime format based on the first datetime string. If the format
  3510. can be inferred, there often will be a large parsing speed-up.
  3511. Returns
  3512. -------
  3513. Series
  3514. See Also
  3515. --------
  3516. read_csv
  3517. """
  3518. # We're calling `DataFrame.from_csv` in the implementation,
  3519. # which will propagate a warning regarding `from_csv` deprecation.
  3520. from pandas.core.frame import DataFrame
  3521. df = DataFrame.from_csv(path, header=header, index_col=index_col,
  3522. sep=sep, parse_dates=parse_dates,
  3523. encoding=encoding,
  3524. infer_datetime_format=infer_datetime_format)
  3525. result = df.iloc[:, 0]
  3526. if header is None:
  3527. result.index.name = result.name = None
  3528. return result
  3529. @Appender(generic.NDFrame.to_csv.__doc__)
  3530. def to_csv(self, *args, **kwargs):
  3531. names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
  3532. "header", "index", "index_label", "mode", "encoding",
  3533. "compression", "quoting", "quotechar", "line_terminator",
  3534. "chunksize", "tupleize_cols", "date_format", "doublequote",
  3535. "escapechar", "decimal"]
  3536. old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",
  3537. "header", "index_label", "mode", "encoding",
  3538. "compression", "date_format", "decimal"]
  3539. if "path" in kwargs:
  3540. warnings.warn("The signature of `Series.to_csv` was aligned "
  3541. "to that of `DataFrame.to_csv`, and argument "
  3542. "'path' will be renamed to 'path_or_buf'.",
  3543. FutureWarning, stacklevel=2)
  3544. kwargs["path_or_buf"] = kwargs.pop("path")
  3545. if len(args) > 1:
  3546. # Either "index" (old signature) or "sep" (new signature) is being
  3547. # passed as second argument (while the first is the same)
  3548. maybe_sep = args[1]
  3549. if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
  3550. # old signature
  3551. warnings.warn("The signature of `Series.to_csv` was aligned "
  3552. "to that of `DataFrame.to_csv`. Note that the "
  3553. "order of arguments changed, and the new one "
  3554. "has 'sep' in first place, for which \"{}\" is "
  3555. "not a valid value. The old order will cease to "
  3556. "be supported in a future version. Please refer "
  3557. "to the documentation for `DataFrame.to_csv` "
  3558. "when updating your function "
  3559. "calls.".format(maybe_sep),
  3560. FutureWarning, stacklevel=2)
  3561. names = old_names
  3562. pos_args = dict(zip(names[:len(args)], args))
  3563. for key in pos_args:
  3564. if key in kwargs:
  3565. raise ValueError("Argument given by name ('{}') and position "
  3566. "({})".format(key, names.index(key)))
  3567. kwargs[key] = pos_args[key]
  3568. if kwargs.get("header", None) is None:
  3569. warnings.warn("The signature of `Series.to_csv` was aligned "
  3570. "to that of `DataFrame.to_csv`, and argument "
  3571. "'header' will change its default value from False "
  3572. "to True: please pass an explicit value to suppress "
  3573. "this warning.", FutureWarning,
  3574. stacklevel=2)
  3575. kwargs["header"] = False # Backwards compatibility.
  3576. return self.to_frame().to_csv(**kwargs)
  3577. @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs)
  3578. def isna(self):
  3579. return super(Series, self).isna()
  3580. @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs)
  3581. def isnull(self):
  3582. return super(Series, self).isnull()
  3583. @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs)
  3584. def notna(self):
  3585. return super(Series, self).notna()
  3586. @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs)
  3587. def notnull(self):
  3588. return super(Series, self).notnull()
  3589. def dropna(self, axis=0, inplace=False, **kwargs):
  3590. """
  3591. Return a new Series with missing values removed.
  3592. See the :ref:`User Guide <missing_data>` for more on which values are
  3593. considered missing, and how to work with missing data.
  3594. Parameters
  3595. ----------
  3596. axis : {0 or 'index'}, default 0
  3597. There is only one axis to drop values from.
  3598. inplace : bool, default False
  3599. If True, do operation inplace and return None.
  3600. **kwargs
  3601. Not in use.
  3602. Returns
  3603. -------
  3604. Series
  3605. Series with NA entries dropped from it.
  3606. See Also
  3607. --------
  3608. Series.isna: Indicate missing values.
  3609. Series.notna : Indicate existing (non-missing) values.
  3610. Series.fillna : Replace missing values.
  3611. DataFrame.dropna : Drop rows or columns which contain NA values.
  3612. Index.dropna : Drop missing indices.
  3613. Examples
  3614. --------
  3615. >>> ser = pd.Series([1., 2., np.nan])
  3616. >>> ser
  3617. 0 1.0
  3618. 1 2.0
  3619. 2 NaN
  3620. dtype: float64
  3621. Drop NA values from a Series.
  3622. >>> ser.dropna()
  3623. 0 1.0
  3624. 1 2.0
  3625. dtype: float64
  3626. Keep the Series with valid entries in the same variable.
  3627. >>> ser.dropna(inplace=True)
  3628. >>> ser
  3629. 0 1.0
  3630. 1 2.0
  3631. dtype: float64
  3632. Empty strings are not considered NA values. ``None`` is considered an
  3633. NA value.
  3634. >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
  3635. >>> ser
  3636. 0 NaN
  3637. 1 2
  3638. 2 NaT
  3639. 3
  3640. 4 None
  3641. 5 I stay
  3642. dtype: object
  3643. >>> ser.dropna()
  3644. 1 2
  3645. 3
  3646. 5 I stay
  3647. dtype: object
  3648. """
  3649. inplace = validate_bool_kwarg(inplace, 'inplace')
  3650. kwargs.pop('how', None)
  3651. if kwargs:
  3652. raise TypeError('dropna() got an unexpected keyword '
  3653. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  3654. # Validate the axis parameter
  3655. self._get_axis_number(axis or 0)
  3656. if self._can_hold_na:
  3657. result = remove_na_arraylike(self)
  3658. if inplace:
  3659. self._update_inplace(result)
  3660. else:
  3661. return result
  3662. else:
  3663. if inplace:
  3664. # do nothing
  3665. pass
  3666. else:
  3667. return self.copy()
  3668. def valid(self, inplace=False, **kwargs):
  3669. """
  3670. Return Series without null values.
  3671. .. deprecated:: 0.23.0
  3672. Use :meth:`Series.dropna` instead.
  3673. """
  3674. warnings.warn("Method .valid will be removed in a future version. "
  3675. "Use .dropna instead.", FutureWarning, stacklevel=2)
  3676. return self.dropna(inplace=inplace, **kwargs)
  3677. # ----------------------------------------------------------------------
  3678. # Time series-oriented methods
  3679. def to_timestamp(self, freq=None, how='start', copy=True):
  3680. """
  3681. Cast to DatetimeIndex of Timestamps, at *beginning* of period.
  3682. Parameters
  3683. ----------
  3684. freq : str, default frequency of PeriodIndex
  3685. Desired frequency.
  3686. how : {'s', 'e', 'start', 'end'}
  3687. Convention for converting period to timestamp; start of period
  3688. vs. end.
  3689. copy : bool, default True
  3690. Whether or not to return a copy.
  3691. Returns
  3692. -------
  3693. Series with DatetimeIndex
  3694. """
  3695. new_values = self._values
  3696. if copy:
  3697. new_values = new_values.copy()
  3698. new_index = self.index.to_timestamp(freq=freq, how=how)
  3699. return self._constructor(new_values,
  3700. index=new_index).__finalize__(self)
  3701. def to_period(self, freq=None, copy=True):
  3702. """
  3703. Convert Series from DatetimeIndex to PeriodIndex with desired
  3704. frequency (inferred from index if not passed).
  3705. Parameters
  3706. ----------
  3707. freq : str, default None
  3708. Frequency associated with the PeriodIndex.
  3709. copy : bool, default True
  3710. Whether or not to return a copy.
  3711. Returns
  3712. -------
  3713. Series
  3714. Series with index converted to PeriodIndex.
  3715. """
  3716. new_values = self._values
  3717. if copy:
  3718. new_values = new_values.copy()
  3719. new_index = self.index.to_period(freq=freq)
  3720. return self._constructor(new_values,
  3721. index=new_index).__finalize__(self)
  3722. # ----------------------------------------------------------------------
  3723. # Accessor Methods
  3724. # ----------------------------------------------------------------------
  3725. str = CachedAccessor("str", StringMethods)
  3726. dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
  3727. cat = CachedAccessor("cat", CategoricalAccessor)
  3728. plot = CachedAccessor("plot", gfx.SeriesPlotMethods)
  3729. sparse = CachedAccessor("sparse", SparseAccessor)
  3730. # ----------------------------------------------------------------------
  3731. # Add plotting methods to Series
  3732. hist = gfx.hist_series
  3733. Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0},
  3734. docs={'index': 'The index (axis labels) of the Series.'})
  3735. Series._add_numeric_operations()
  3736. Series._add_series_only_operations()
  3737. Series._add_series_or_dataframe_operations()
  3738. # Add arithmetic!
  3739. ops.add_flex_arithmetic_methods(Series)
  3740. ops.add_special_arithmetic_methods(Series)