PageRenderTime 64ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/core/series.py

http://github.com/wesm/pandas
Python | 4474 lines | 4459 code | 10 blank | 5 comment | 20 complexity | be421d2813837526f7340c9768686d1b MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """
  2. Data structure for 1-dimensional cross-sectional and time series data
  3. """
  4. from __future__ import division
  5. from collections import OrderedDict
  6. from textwrap import dedent
  7. import warnings
  8. import numpy as np
  9. from pandas._libs import iNaT, index as libindex, lib, tslibs
  10. import pandas.compat as compat
  11. from pandas.compat import PY36, StringIO, u, zip
  12. from pandas.compat.numpy import function as nv
  13. from pandas.util._decorators import Appender, Substitution, deprecate
  14. from pandas.util._validators import validate_bool_kwarg
  15. from pandas.core.dtypes.common import (
  16. _is_unorderable_exception, ensure_platform_int, is_bool,
  17. is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like,
  18. is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
  19. is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
  20. from pandas.core.dtypes.generic import (
  21. ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
  22. ABCSparseArray, ABCSparseSeries)
  23. from pandas.core.dtypes.missing import (
  24. isna, na_value_for_dtype, notna, remove_na_arraylike)
  25. from pandas.core import algorithms, base, generic, nanops, ops
  26. from pandas.core.accessor import CachedAccessor
  27. from pandas.core.arrays import ExtensionArray, SparseArray
  28. from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
  29. from pandas.core.arrays.sparse import SparseAccessor
  30. import pandas.core.common as com
  31. from pandas.core.config import get_option
  32. from pandas.core.index import (
  33. Float64Index, Index, InvalidIndexError, MultiIndex, ensure_index)
  34. from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
  35. import pandas.core.indexes.base as ibase
  36. from pandas.core.indexes.datetimes import DatetimeIndex
  37. from pandas.core.indexes.period import PeriodIndex
  38. from pandas.core.indexes.timedeltas import TimedeltaIndex
  39. from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
  40. from pandas.core.internals import SingleBlockManager
  41. from pandas.core.internals.construction import sanitize_array
  42. from pandas.core.strings import StringMethods
  43. from pandas.core.tools.datetimes import to_datetime
  44. import pandas.io.formats.format as fmt
  45. from pandas.io.formats.terminal import get_terminal_size
  46. import pandas.plotting._core as gfx
  47. # pylint: disable=E1101,E1103
  48. # pylint: disable=W0703,W0622,W0613,W0201
  49. __all__ = ['Series']
  50. _shared_doc_kwargs = dict(
  51. axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
  52. axis="""axis : {0 or 'index'}
  53. Parameter needed for compatibility with DataFrame.""",
  54. inplace="""inplace : boolean, default False
  55. If True, performs operation inplace and returns None.""",
  56. unique='np.ndarray', duplicated='Series',
  57. optional_by='', optional_mapper='', optional_labels='', optional_axis='',
  58. versionadded_to_excel='\n .. versionadded:: 0.20.0\n')
  59. # see gh-16971
  60. def remove_na(arr):
  61. """
  62. Remove null values from array like structure.
  63. .. deprecated:: 0.21.0
  64. Use s[s.notnull()] instead.
  65. """
  66. warnings.warn("remove_na is deprecated and is a private "
  67. "function. Do not use.", FutureWarning, stacklevel=2)
  68. return remove_na_arraylike(arr)
  69. def _coerce_method(converter):
  70. """
  71. Install the scalar coercion methods.
  72. """
  73. def wrapper(self):
  74. if len(self) == 1:
  75. return converter(self.iloc[0])
  76. raise TypeError("cannot convert the series to "
  77. "{0}".format(str(converter)))
  78. wrapper.__name__ = "__{name}__".format(name=converter.__name__)
  79. return wrapper
  80. # ----------------------------------------------------------------------
  81. # Series class
  82. class Series(base.IndexOpsMixin, generic.NDFrame):
  83. """
  84. One-dimensional ndarray with axis labels (including time series).
  85. Labels need not be unique but must be a hashable type. The object
  86. supports both integer- and label-based indexing and provides a host of
  87. methods for performing operations involving the index. Statistical
  88. methods from ndarray have been overridden to automatically exclude
  89. missing data (currently represented as NaN).
  90. Operations between Series (+, -, /, *, **) align values based on their
  91. associated index values-- they need not be the same length. The result
  92. index will be the sorted union of the two indexes.
  93. Parameters
  94. ----------
  95. data : array-like, Iterable, dict, or scalar value
  96. Contains data stored in Series.
  97. .. versionchanged :: 0.23.0
  98. If data is a dict, argument order is maintained for Python 3.6
  99. and later.
  100. index : array-like or Index (1d)
  101. Values must be hashable and have the same length as `data`.
  102. Non-unique index values are allowed. Will default to
  103. RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
  104. sequence are used, the index will override the keys found in the
  105. dict.
  106. dtype : str, numpy.dtype, or ExtensionDtype, optional
  107. Data type for the output Series. If not specified, this will be
  108. inferred from `data`.
  109. See the :ref:`user guide <basics.dtypes>` for more usages.
  110. copy : bool, default False
  111. Copy input data.
  112. """
  113. _metadata = ['name']
  114. _accessors = {'dt', 'cat', 'str', 'sparse'}
  115. # tolist is not actually deprecated, just suppressed in the __dir__
  116. _deprecations = generic.NDFrame._deprecations | frozenset(
  117. ['asobject', 'reshape', 'get_value', 'set_value',
  118. 'from_csv', 'valid', 'tolist'])
  119. # Override cache_readonly bc Series is mutable
  120. hasnans = property(base.IndexOpsMixin.hasnans.func,
  121. doc=base.IndexOpsMixin.hasnans.__doc__)
  122. # ----------------------------------------------------------------------
  123. # Constructors
  124. def __init__(self, data=None, index=None, dtype=None, name=None,
  125. copy=False, fastpath=False):
  126. # we are called internally, so short-circuit
  127. if fastpath:
  128. # data is an ndarray, index is defined
  129. if not isinstance(data, SingleBlockManager):
  130. data = SingleBlockManager(data, index, fastpath=True)
  131. if copy:
  132. data = data.copy()
  133. if index is None:
  134. index = data.index
  135. else:
  136. if index is not None:
  137. index = ensure_index(index)
  138. if data is None:
  139. data = {}
  140. if dtype is not None:
  141. dtype = self._validate_dtype(dtype)
  142. if isinstance(data, MultiIndex):
  143. raise NotImplementedError("initializing a Series from a "
  144. "MultiIndex is not supported")
  145. elif isinstance(data, Index):
  146. if name is None:
  147. name = data.name
  148. if dtype is not None:
  149. # astype copies
  150. data = data.astype(dtype)
  151. else:
  152. # need to copy to avoid aliasing issues
  153. data = data._values.copy()
  154. if (isinstance(data, ABCDatetimeIndex) and
  155. data.tz is not None):
  156. # GH#24096 need copy to be deep for datetime64tz case
  157. # TODO: See if we can avoid these copies
  158. data = data._values.copy(deep=True)
  159. copy = False
  160. elif isinstance(data, np.ndarray):
  161. pass
  162. elif isinstance(data, (ABCSeries, ABCSparseSeries)):
  163. if name is None:
  164. name = data.name
  165. if index is None:
  166. index = data.index
  167. else:
  168. data = data.reindex(index, copy=copy)
  169. data = data._data
  170. elif isinstance(data, dict):
  171. data, index = self._init_dict(data, index, dtype)
  172. dtype = None
  173. copy = False
  174. elif isinstance(data, SingleBlockManager):
  175. if index is None:
  176. index = data.index
  177. elif not data.index.equals(index) or copy:
  178. # GH#19275 SingleBlockManager input should only be called
  179. # internally
  180. raise AssertionError('Cannot pass both SingleBlockManager '
  181. '`data` argument and a different '
  182. '`index` argument. `copy` must '
  183. 'be False.')
  184. elif is_extension_array_dtype(data):
  185. pass
  186. elif isinstance(data, (set, frozenset)):
  187. raise TypeError("{0!r} type is unordered"
  188. "".format(data.__class__.__name__))
  189. # If data is Iterable but not list-like, consume into list.
  190. elif (isinstance(data, compat.Iterable)
  191. and not isinstance(data, compat.Sized)):
  192. data = list(data)
  193. else:
  194. # handle sparse passed here (and force conversion)
  195. if isinstance(data, ABCSparseArray):
  196. data = data.to_dense()
  197. if index is None:
  198. if not is_list_like(data):
  199. data = [data]
  200. index = ibase.default_index(len(data))
  201. elif is_list_like(data):
  202. # a scalar numpy array is list-like but doesn't
  203. # have a proper length
  204. try:
  205. if len(index) != len(data):
  206. raise ValueError(
  207. 'Length of passed values is {val}, '
  208. 'index implies {ind}'
  209. .format(val=len(data), ind=len(index)))
  210. except TypeError:
  211. pass
  212. # create/copy the manager
  213. if isinstance(data, SingleBlockManager):
  214. if dtype is not None:
  215. data = data.astype(dtype=dtype, errors='ignore',
  216. copy=copy)
  217. elif copy:
  218. data = data.copy()
  219. else:
  220. data = sanitize_array(data, index, dtype, copy,
  221. raise_cast_failure=True)
  222. data = SingleBlockManager(data, index, fastpath=True)
  223. generic.NDFrame.__init__(self, data, fastpath=True)
  224. self.name = name
  225. self._set_axis(0, index, fastpath=True)
  226. def _init_dict(self, data, index=None, dtype=None):
  227. """
  228. Derive the "_data" and "index" attributes of a new Series from a
  229. dictionary input.
  230. Parameters
  231. ----------
  232. data : dict or dict-like
  233. Data used to populate the new Series
  234. index : Index or index-like, default None
  235. index for the new Series: if None, use dict keys
  236. dtype : dtype, default None
  237. dtype for the new Series: if None, infer from data
  238. Returns
  239. -------
  240. _data : BlockManager for the new Series
  241. index : index for the new Series
  242. """
  243. # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
  244. # raises KeyError), so we iterate the entire dict, and align
  245. if data:
  246. keys, values = zip(*compat.iteritems(data))
  247. values = list(values)
  248. elif index is not None:
  249. # fastpath for Series(data=None). Just use broadcasting a scalar
  250. # instead of reindexing.
  251. values = na_value_for_dtype(dtype)
  252. keys = index
  253. else:
  254. keys, values = [], []
  255. # Input is now list-like, so rely on "standard" construction:
  256. s = Series(values, index=keys, dtype=dtype)
  257. # Now we just make sure the order is respected, if any
  258. if data and index is not None:
  259. s = s.reindex(index, copy=False)
  260. elif not PY36 and not isinstance(data, OrderedDict) and data:
  261. # Need the `and data` to avoid sorting Series(None, index=[...])
  262. # since that isn't really dict-like
  263. try:
  264. s = s.sort_index()
  265. except TypeError:
  266. pass
  267. return s._data, s.index
  268. @classmethod
  269. def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
  270. fastpath=False):
  271. """
  272. Construct Series from array.
  273. .. deprecated :: 0.23.0
  274. Use pd.Series(..) constructor instead.
  275. """
  276. warnings.warn("'from_array' is deprecated and will be removed in a "
  277. "future version. Please use the pd.Series(..) "
  278. "constructor instead.", FutureWarning, stacklevel=2)
  279. if isinstance(arr, ABCSparseArray):
  280. from pandas.core.sparse.series import SparseSeries
  281. cls = SparseSeries
  282. return cls(arr, index=index, name=name, dtype=dtype,
  283. copy=copy, fastpath=fastpath)
  284. # ----------------------------------------------------------------------
  285. @property
  286. def _constructor(self):
  287. return Series
  288. @property
  289. def _constructor_expanddim(self):
  290. from pandas.core.frame import DataFrame
  291. return DataFrame
  292. # types
  293. @property
  294. def _can_hold_na(self):
  295. return self._data._can_hold_na
  296. _index = None
  297. def _set_axis(self, axis, labels, fastpath=False):
  298. """
  299. Override generic, we want to set the _typ here.
  300. """
  301. if not fastpath:
  302. labels = ensure_index(labels)
  303. is_all_dates = labels.is_all_dates
  304. if is_all_dates:
  305. if not isinstance(labels,
  306. (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  307. try:
  308. labels = DatetimeIndex(labels)
  309. # need to set here because we changed the index
  310. if fastpath:
  311. self._data.set_axis(axis, labels)
  312. except (tslibs.OutOfBoundsDatetime, ValueError):
  313. # labels may exceeds datetime bounds,
  314. # or not be a DatetimeIndex
  315. pass
  316. self._set_subtyp(is_all_dates)
  317. object.__setattr__(self, '_index', labels)
  318. if not fastpath:
  319. self._data.set_axis(axis, labels)
  320. def _set_subtyp(self, is_all_dates):
  321. if is_all_dates:
  322. object.__setattr__(self, '_subtyp', 'time_series')
  323. else:
  324. object.__setattr__(self, '_subtyp', 'series')
  325. def _update_inplace(self, result, **kwargs):
  326. # we want to call the generic version and not the IndexOpsMixin
  327. return generic.NDFrame._update_inplace(self, result, **kwargs)
  328. @property
  329. def name(self):
  330. """
  331. Return name of the Series.
  332. """
  333. return self._name
  334. @name.setter
  335. def name(self, value):
  336. if value is not None and not is_hashable(value):
  337. raise TypeError('Series.name must be a hashable type')
  338. object.__setattr__(self, '_name', value)
  339. # ndarray compatibility
  340. @property
  341. def dtype(self):
  342. """
  343. Return the dtype object of the underlying data.
  344. """
  345. return self._data.dtype
  346. @property
  347. def dtypes(self):
  348. """
  349. Return the dtype object of the underlying data.
  350. """
  351. return self._data.dtype
  352. @property
  353. def ftype(self):
  354. """
  355. Return if the data is sparse|dense.
  356. """
  357. return self._data.ftype
  358. @property
  359. def ftypes(self):
  360. """
  361. Return if the data is sparse|dense.
  362. """
  363. return self._data.ftype
  364. @property
  365. def values(self):
  366. """
  367. Return Series as ndarray or ndarray-like depending on the dtype.
  368. .. warning::
  369. We recommend using :attr:`Series.array` or
  370. :meth:`Series.to_numpy`, depending on whether you need
  371. a reference to the underlying data or a NumPy array.
  372. Returns
  373. -------
  374. numpy.ndarray or ndarray-like
  375. See Also
  376. --------
  377. Series.array : Reference to the underlying data.
  378. Series.to_numpy : A NumPy array representing the underlying data.
  379. Examples
  380. --------
  381. >>> pd.Series([1, 2, 3]).values
  382. array([1, 2, 3])
  383. >>> pd.Series(list('aabc')).values
  384. array(['a', 'a', 'b', 'c'], dtype=object)
  385. >>> pd.Series(list('aabc')).astype('category').values
  386. [a, a, b, c]
  387. Categories (3, object): [a, b, c]
  388. Timezone aware datetime data is converted to UTC:
  389. >>> pd.Series(pd.date_range('20130101', periods=3,
  390. ... tz='US/Eastern')).values
  391. array(['2013-01-01T05:00:00.000000000',
  392. '2013-01-02T05:00:00.000000000',
  393. '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
  394. """
  395. return self._data.external_values()
  396. @property
  397. def _values(self):
  398. """
  399. Return the internal repr of this data.
  400. """
  401. return self._data.internal_values()
  402. def _formatting_values(self):
  403. """
  404. Return the values that can be formatted (used by SeriesFormatter
  405. and DataFrameFormatter).
  406. """
  407. return self._data.formatting_values()
  408. def get_values(self):
  409. """
  410. Same as values (but handles sparseness conversions); is a view.
  411. """
  412. return self._data.get_values()
  413. @property
  414. def asobject(self):
  415. """
  416. Return object Series which contains boxed values.
  417. .. deprecated :: 0.23.0
  418. Use ``astype(object)`` instead.
  419. *this is an internal non-public method*
  420. """
  421. warnings.warn("'asobject' is deprecated. Use 'astype(object)'"
  422. " instead", FutureWarning, stacklevel=2)
  423. return self.astype(object).values
  424. # ops
  425. def ravel(self, order='C'):
  426. """
  427. Return the flattened underlying data as an ndarray.
  428. Returns
  429. -------
  430. numpy.ndarray or ndarray-like
  431. Flattened data of the Series.
  432. See Also
  433. --------
  434. numpy.ndarray.ravel
  435. """
  436. return self._values.ravel(order=order)
  437. def compress(self, condition, *args, **kwargs):
  438. """
  439. Return selected slices of an array along given axis as a Series.
  440. .. deprecated:: 0.24.0
  441. See Also
  442. --------
  443. numpy.ndarray.compress
  444. """
  445. msg = ("Series.compress(condition) is deprecated. "
  446. "Use 'Series[condition]' or "
  447. "'np.asarray(series).compress(condition)' instead.")
  448. warnings.warn(msg, FutureWarning, stacklevel=2)
  449. nv.validate_compress(args, kwargs)
  450. return self[condition]
  451. def nonzero(self):
  452. """
  453. Return the *integer* indices of the elements that are non-zero.
  454. .. deprecated:: 0.24.0
  455. Please use .to_numpy().nonzero() as a replacement.
  456. This method is equivalent to calling `numpy.nonzero` on the
  457. series data. For compatibility with NumPy, the return value is
  458. the same (a tuple with an array of indices for each dimension),
  459. but it will always be a one-item tuple because series only have
  460. one dimension.
  461. See Also
  462. --------
  463. numpy.nonzero
  464. Examples
  465. --------
  466. >>> s = pd.Series([0, 3, 0, 4])
  467. >>> s.nonzero()
  468. (array([1, 3]),)
  469. >>> s.iloc[s.nonzero()[0]]
  470. 1 3
  471. 3 4
  472. dtype: int64
  473. >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd'])
  474. # same return although index of s is different
  475. >>> s.nonzero()
  476. (array([1, 3]),)
  477. >>> s.iloc[s.nonzero()[0]]
  478. b 3
  479. d 4
  480. dtype: int64
  481. """
  482. msg = ("Series.nonzero() is deprecated "
  483. "and will be removed in a future version."
  484. "Use Series.to_numpy().nonzero() instead")
  485. warnings.warn(msg, FutureWarning, stacklevel=2)
  486. return self._values.nonzero()
  487. def put(self, *args, **kwargs):
  488. """
  489. Apply the `put` method to its `values` attribute if it has one.
  490. See Also
  491. --------
  492. numpy.ndarray.put
  493. """
  494. self._values.put(*args, **kwargs)
  495. def __len__(self):
  496. """
  497. Return the length of the Series.
  498. """
  499. return len(self._data)
  500. def view(self, dtype=None):
  501. """
  502. Create a new view of the Series.
  503. This function will return a new Series with a view of the same
  504. underlying values in memory, optionally reinterpreted with a new data
  505. type. The new data type must preserve the same size in bytes as to not
  506. cause index misalignment.
  507. Parameters
  508. ----------
  509. dtype : data type
  510. Data type object or one of their string representations.
  511. Returns
  512. -------
  513. Series
  514. A new Series object as a view of the same data in memory.
  515. See Also
  516. --------
  517. numpy.ndarray.view : Equivalent numpy function to create a new view of
  518. the same data in memory.
  519. Notes
  520. -----
  521. Series are instantiated with ``dtype=float64`` by default. While
  522. ``numpy.ndarray.view()`` will return a view with the same data type as
  523. the original array, ``Series.view()`` (without specified dtype)
  524. will try using ``float64`` and may fail if the original data type size
  525. in bytes is not the same.
  526. Examples
  527. --------
  528. >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
  529. >>> s
  530. 0 -2
  531. 1 -1
  532. 2 0
  533. 3 1
  534. 4 2
  535. dtype: int8
  536. The 8 bit signed integer representation of `-1` is `0b11111111`, but
  537. the same bytes represent 255 if read as an 8 bit unsigned integer:
  538. >>> us = s.view('uint8')
  539. >>> us
  540. 0 254
  541. 1 255
  542. 2 0
  543. 3 1
  544. 4 2
  545. dtype: uint8
  546. The views share the same underlying values:
  547. >>> us[0] = 128
  548. >>> s
  549. 0 -128
  550. 1 -1
  551. 2 0
  552. 3 1
  553. 4 2
  554. dtype: int8
  555. """
  556. return self._constructor(self._values.view(dtype),
  557. index=self.index).__finalize__(self)
  558. # ----------------------------------------------------------------------
  559. # NDArray Compat
  560. def __array__(self, dtype=None):
  561. """
  562. Return the values as a NumPy array.
  563. Users should not call this directly. Rather, it is invoked by
  564. :func:`numpy.array` and :func:`numpy.asarray`.
  565. Parameters
  566. ----------
  567. dtype : str or numpy.dtype, optional
  568. The dtype to use for the resulting NumPy array. By default,
  569. the dtype is inferred from the data.
  570. Returns
  571. -------
  572. numpy.ndarray
  573. The values in the series converted to a :class:`numpy.ndarary`
  574. with the specified `dtype`.
  575. See Also
  576. --------
  577. array : Create a new array from data.
  578. Series.array : Zero-copy view to the array backing the Series.
  579. Series.to_numpy : Series method for similar behavior.
  580. Examples
  581. --------
  582. >>> ser = pd.Series([1, 2, 3])
  583. >>> np.asarray(ser)
  584. array([1, 2, 3])
  585. For timezone-aware data, the timezones may be retained with
  586. ``dtype='object'``
  587. >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
  588. >>> np.asarray(tzser, dtype="object")
  589. array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
  590. Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
  591. dtype=object)
  592. Or the values may be localized to UTC and the tzinfo discared with
  593. ``dtype='datetime64[ns]'``
  594. >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
  595. array(['1999-12-31T23:00:00.000000000', ...],
  596. dtype='datetime64[ns]')
  597. """
  598. if (dtype is None and isinstance(self.array, ABCDatetimeArray)
  599. and getattr(self.dtype, 'tz', None)):
  600. msg = (
  601. "Converting timezone-aware DatetimeArray to timezone-naive "
  602. "ndarray with 'datetime64[ns]' dtype. In the future, this "
  603. "will return an ndarray with 'object' dtype where each "
  604. "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t"
  605. "To accept the future behavior, pass 'dtype=object'.\n\t"
  606. "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'."
  607. )
  608. warnings.warn(msg, FutureWarning, stacklevel=3)
  609. dtype = 'M8[ns]'
  610. return np.asarray(self.array, dtype)
  611. def __array_wrap__(self, result, context=None):
  612. """
  613. Gets called after a ufunc.
  614. """
  615. return self._constructor(result, index=self.index,
  616. copy=False).__finalize__(self)
  617. def __array_prepare__(self, result, context=None):
  618. """
  619. Gets called prior to a ufunc.
  620. """
  621. # nice error message for non-ufunc types
  622. if (context is not None and
  623. (not isinstance(self._values, (np.ndarray, ExtensionArray))
  624. or isinstance(self._values, Categorical))):
  625. obj = context[1][0]
  626. raise TypeError("{obj} with dtype {dtype} cannot perform "
  627. "the numpy op {op}".format(
  628. obj=type(obj).__name__,
  629. dtype=getattr(obj, 'dtype', None),
  630. op=context[0].__name__))
  631. return result
  632. # ----------------------------------------------------------------------
  633. # Unary Methods
  634. @property
  635. def real(self):
  636. """
  637. Return the real value of vector.
  638. """
  639. return self.values.real
  640. @real.setter
  641. def real(self, v):
  642. self.values.real = v
  643. @property
  644. def imag(self):
  645. """
  646. Return imag value of vector.
  647. """
  648. return self.values.imag
  649. @imag.setter
  650. def imag(self, v):
  651. self.values.imag = v
  652. # coercion
  653. __float__ = _coerce_method(float)
  654. __long__ = _coerce_method(int)
  655. __int__ = _coerce_method(int)
  656. # ----------------------------------------------------------------------
  657. def _unpickle_series_compat(self, state):
  658. if isinstance(state, dict):
  659. self._data = state['_data']
  660. self.name = state['name']
  661. self.index = self._data.index
  662. elif isinstance(state, tuple):
  663. # < 0.12 series pickle
  664. nd_state, own_state = state
  665. # recreate the ndarray
  666. data = np.empty(nd_state[1], dtype=nd_state[2])
  667. np.ndarray.__setstate__(data, nd_state)
  668. # backwards compat
  669. index, name = own_state[0], None
  670. if len(own_state) > 1:
  671. name = own_state[1]
  672. # recreate
  673. self._data = SingleBlockManager(data, index, fastpath=True)
  674. self._index = index
  675. self.name = name
  676. else:
  677. raise Exception("cannot unpickle legacy formats -> [%s]" % state)
  678. # indexers
  679. @property
  680. def axes(self):
  681. """
  682. Return a list of the row axis labels.
  683. """
  684. return [self.index]
  685. def _ixs(self, i, axis=0):
  686. """
  687. Return the i-th value or values in the Series by location.
  688. Parameters
  689. ----------
  690. i : int, slice, or sequence of integers
  691. Returns
  692. -------
  693. scalar (int) or Series (slice, sequence)
  694. """
  695. try:
  696. # dispatch to the values if we need
  697. values = self._values
  698. if isinstance(values, np.ndarray):
  699. return libindex.get_value_at(values, i)
  700. else:
  701. return values[i]
  702. except IndexError:
  703. raise
  704. except Exception:
  705. if isinstance(i, slice):
  706. indexer = self.index._convert_slice_indexer(i, kind='iloc')
  707. return self._get_values(indexer)
  708. else:
  709. label = self.index[i]
  710. if isinstance(label, Index):
  711. return self.take(i, axis=axis, convert=True)
  712. else:
  713. return libindex.get_value_at(self, i)
  714. @property
  715. def _is_mixed_type(self):
  716. return False
  717. def _slice(self, slobj, axis=0, kind=None):
  718. slobj = self.index._convert_slice_indexer(slobj,
  719. kind=kind or 'getitem')
  720. return self._get_values(slobj)
  721. def __getitem__(self, key):
  722. key = com.apply_if_callable(key, self)
  723. try:
  724. result = self.index.get_value(self, key)
  725. if not is_scalar(result):
  726. if is_list_like(result) and not isinstance(result, Series):
  727. # we need to box if loc of the key isn't scalar here
  728. # otherwise have inline ndarray/lists
  729. try:
  730. if not is_scalar(self.index.get_loc(key)):
  731. result = self._constructor(
  732. result, index=[key] * len(result),
  733. dtype=self.dtype).__finalize__(self)
  734. except KeyError:
  735. pass
  736. return result
  737. except InvalidIndexError:
  738. pass
  739. except (KeyError, ValueError):
  740. if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
  741. # kludge
  742. pass
  743. elif key is Ellipsis:
  744. return self
  745. elif com.is_bool_indexer(key):
  746. pass
  747. else:
  748. # we can try to coerce the indexer (or this will raise)
  749. new_key = self.index._convert_scalar_indexer(key,
  750. kind='getitem')
  751. if type(new_key) != type(key):
  752. return self.__getitem__(new_key)
  753. raise
  754. except Exception:
  755. raise
  756. if is_iterator(key):
  757. key = list(key)
  758. if com.is_bool_indexer(key):
  759. key = check_bool_indexer(self.index, key)
  760. return self._get_with(key)
  761. def _get_with(self, key):
  762. # other: fancy integer or otherwise
  763. if isinstance(key, slice):
  764. indexer = self.index._convert_slice_indexer(key, kind='getitem')
  765. return self._get_values(indexer)
  766. elif isinstance(key, ABCDataFrame):
  767. raise TypeError('Indexing a Series with DataFrame is not '
  768. 'supported, use the appropriate DataFrame column')
  769. elif isinstance(key, tuple):
  770. try:
  771. return self._get_values_tuple(key)
  772. except Exception:
  773. if len(key) == 1:
  774. key = key[0]
  775. if isinstance(key, slice):
  776. return self._get_values(key)
  777. raise
  778. # pragma: no cover
  779. if not isinstance(key, (list, np.ndarray, Series, Index)):
  780. key = list(key)
  781. if isinstance(key, Index):
  782. key_type = key.inferred_type
  783. else:
  784. key_type = lib.infer_dtype(key, skipna=False)
  785. if key_type == 'integer':
  786. if self.index.is_integer() or self.index.is_floating():
  787. return self.loc[key]
  788. else:
  789. return self._get_values(key)
  790. elif key_type == 'boolean':
  791. return self._get_values(key)
  792. try:
  793. # handle the dup indexing case (GH 4246)
  794. if isinstance(key, (list, tuple)):
  795. return self.loc[key]
  796. return self.reindex(key)
  797. except Exception:
  798. # [slice(0, 5, None)] will break if you convert to ndarray,
  799. # e.g. as requested by np.median
  800. # hack
  801. if isinstance(key[0], slice):
  802. return self._get_values(key)
  803. raise
  804. def _get_values_tuple(self, key):
  805. # mpl hackaround
  806. if com._any_none(*key):
  807. return self._get_values(key)
  808. if not isinstance(self.index, MultiIndex):
  809. raise ValueError('Can only tuple-index with a MultiIndex')
  810. # If key is contained, would have returned by now
  811. indexer, new_index = self.index.get_loc_level(key)
  812. return self._constructor(self._values[indexer],
  813. index=new_index).__finalize__(self)
  814. def _get_values(self, indexer):
  815. try:
  816. return self._constructor(self._data.get_slice(indexer),
  817. fastpath=True).__finalize__(self)
  818. except Exception:
  819. return self._values[indexer]
  820. def __setitem__(self, key, value):
  821. key = com.apply_if_callable(key, self)
  822. def setitem(key, value):
  823. try:
  824. self._set_with_engine(key, value)
  825. return
  826. except com.SettingWithCopyError:
  827. raise
  828. except (KeyError, ValueError):
  829. values = self._values
  830. if (is_integer(key) and
  831. not self.index.inferred_type == 'integer'):
  832. values[key] = value
  833. return
  834. elif key is Ellipsis:
  835. self[:] = value
  836. return
  837. elif com.is_bool_indexer(key):
  838. pass
  839. elif is_timedelta64_dtype(self.dtype):
  840. # reassign a null value to iNaT
  841. if isna(value):
  842. value = iNaT
  843. try:
  844. self.index._engine.set_value(self._values, key,
  845. value)
  846. return
  847. except TypeError:
  848. pass
  849. self.loc[key] = value
  850. return
  851. except TypeError as e:
  852. if (isinstance(key, tuple) and
  853. not isinstance(self.index, MultiIndex)):
  854. raise ValueError("Can only tuple-index with a MultiIndex")
  855. # python 3 type errors should be raised
  856. if _is_unorderable_exception(e):
  857. raise IndexError(key)
  858. if com.is_bool_indexer(key):
  859. key = check_bool_indexer(self.index, key)
  860. try:
  861. self._where(~key, value, inplace=True)
  862. return
  863. except InvalidIndexError:
  864. pass
  865. self._set_with(key, value)
  866. # do the setitem
  867. cacher_needs_updating = self._check_is_chained_assignment_possible()
  868. setitem(key, value)
  869. if cacher_needs_updating:
  870. self._maybe_update_cacher()
  871. def _set_with_engine(self, key, value):
  872. values = self._values
  873. try:
  874. self.index._engine.set_value(values, key, value)
  875. return
  876. except KeyError:
  877. values[self.index.get_loc(key)] = value
  878. return
  879. def _set_with(self, key, value):
  880. # other: fancy integer or otherwise
  881. if isinstance(key, slice):
  882. indexer = self.index._convert_slice_indexer(key, kind='getitem')
  883. return self._set_values(indexer, value)
  884. else:
  885. if isinstance(key, tuple):
  886. try:
  887. self._set_values(key, value)
  888. except Exception:
  889. pass
  890. if is_scalar(key):
  891. key = [key]
  892. elif not isinstance(key, (list, Series, np.ndarray)):
  893. try:
  894. key = list(key)
  895. except Exception:
  896. key = [key]
  897. if isinstance(key, Index):
  898. key_type = key.inferred_type
  899. else:
  900. key_type = lib.infer_dtype(key, skipna=False)
  901. if key_type == 'integer':
  902. if self.index.inferred_type == 'integer':
  903. self._set_labels(key, value)
  904. else:
  905. return self._set_values(key, value)
  906. elif key_type == 'boolean':
  907. self._set_values(key.astype(np.bool_), value)
  908. else:
  909. self._set_labels(key, value)
  910. def _set_labels(self, key, value):
  911. if isinstance(key, Index):
  912. key = key.values
  913. else:
  914. key = com.asarray_tuplesafe(key)
  915. indexer = self.index.get_indexer(key)
  916. mask = indexer == -1
  917. if mask.any():
  918. raise ValueError('%s not contained in the index' % str(key[mask]))
  919. self._set_values(indexer, value)
  920. def _set_values(self, key, value):
  921. if isinstance(key, Series):
  922. key = key._values
  923. self._data = self._data.setitem(indexer=key, value=value)
  924. self._maybe_update_cacher()
  925. def repeat(self, repeats, axis=None):
  926. """
  927. Repeat elements of a Series.
  928. Returns a new Series where each element of the current Series
  929. is repeated consecutively a given number of times.
  930. Parameters
  931. ----------
  932. repeats : int or array of ints
  933. The number of repetitions for each element. This should be a
  934. non-negative integer. Repeating 0 times will return an empty
  935. Series.
  936. axis : None
  937. Must be ``None``. Has no effect but is accepted for compatibility
  938. with numpy.
  939. Returns
  940. -------
  941. Series
  942. Newly created Series with repeated elements.
  943. See Also
  944. --------
  945. Index.repeat : Equivalent function for Index.
  946. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  947. Examples
  948. --------
  949. >>> s = pd.Series(['a', 'b', 'c'])
  950. >>> s
  951. 0 a
  952. 1 b
  953. 2 c
  954. dtype: object
  955. >>> s.repeat(2)
  956. 0 a
  957. 0 a
  958. 1 b
  959. 1 b
  960. 2 c
  961. 2 c
  962. dtype: object
  963. >>> s.repeat([1, 2, 3])
  964. 0 a
  965. 1 b
  966. 1 b
  967. 2 c
  968. 2 c
  969. 2 c
  970. dtype: object
  971. """
  972. nv.validate_repeat(tuple(), dict(axis=axis))
  973. new_index = self.index.repeat(repeats)
  974. new_values = self._values.repeat(repeats)
  975. return self._constructor(new_values,
  976. index=new_index).__finalize__(self)
  977. def get_value(self, label, takeable=False):
  978. """
  979. Quickly retrieve single value at passed index label.
  980. .. deprecated:: 0.21.0
  981. Please use .at[] or .iat[] accessors.
  982. Parameters
  983. ----------
  984. label : object
  985. takeable : interpret the index as indexers, default False
  986. Returns
  987. -------
  988. scalar value
  989. """
  990. warnings.warn("get_value is deprecated and will be removed "
  991. "in a future release. Please use "
  992. ".at[] or .iat[] accessors instead", FutureWarning,
  993. stacklevel=2)
  994. return self._get_value(label, takeable=takeable)
  995. def _get_value(self, label, takeable=False):
  996. if takeable is True:
  997. return com.maybe_box_datetimelike(self._values[label])
  998. return self.index.get_value(self._values, label)
  999. _get_value.__doc__ = get_value.__doc__
  1000. def set_value(self, label, value, takeable=False):
  1001. """
  1002. Quickly set single value at passed label.
  1003. .. deprecated:: 0.21.0
  1004. Please use .at[] or .iat[] accessors.
  1005. If label is not contained, a new object is created with the label
  1006. placed at the end of the result index.
  1007. Parameters
  1008. ----------
  1009. label : object
  1010. Partial indexing with MultiIndex not allowed
  1011. value : object
  1012. Scalar value
  1013. takeable : interpret the index as indexers, default False
  1014. Returns
  1015. -------
  1016. Series
  1017. If label is contained, will be reference to calling Series,
  1018. otherwise a new object.
  1019. """
  1020. warnings.warn("set_value is deprecated and will be removed "
  1021. "in a future release. Please use "
  1022. ".at[] or .iat[] accessors instead", FutureWarning,
  1023. stacklevel=2)
  1024. return self._set_value(label, value, takeable=takeable)
  1025. def _set_value(self, label, value, takeable=False):
  1026. try:
  1027. if takeable:
  1028. self._values[label] = value
  1029. else:
  1030. self.index._engine.set_value(self._values, label, value)
  1031. except (KeyError, TypeError):
  1032. # set using a non-recursive method
  1033. self.loc[label] = value
  1034. return self
  1035. _set_value.__doc__ = set_value.__doc__
  1036. def reset_index(self, level=None, drop=False, name=None, inplace=False):
  1037. """
  1038. Generate a new DataFrame or Series with the index reset.
  1039. This is useful when the index needs to be treated as a column, or
  1040. when the index is meaningless and needs to be reset to the default
  1041. before another operation.
  1042. Parameters
  1043. ----------
  1044. level : int, str, tuple, or list, default optional
  1045. For a Series with a MultiIndex, only remove the specified levels
  1046. from the index. Removes all levels by default.
  1047. drop : bool, default False
  1048. Just reset the index, without inserting it as a column in
  1049. the new DataFrame.
  1050. name : object, optional
  1051. The name to use for the column containing the original Series
  1052. values. Uses ``self.name`` by default. This argument is ignored
  1053. when `drop` is True.
  1054. inplace : bool, default False
  1055. Modify the Series in place (do not create a new object).
  1056. Returns
  1057. -------
  1058. Series or DataFrame
  1059. When `drop` is False (the default), a DataFrame is returned.
  1060. The newly created columns will come first in the DataFrame,
  1061. followed by the original Series values.
  1062. When `drop` is True, a `Series` is returned.
  1063. In either case, if ``inplace=True``, no value is returned.
  1064. See Also
  1065. --------
  1066. DataFrame.reset_index: Analogous function for DataFrame.
  1067. Examples
  1068. --------
  1069. >>> s = pd.Series([1, 2, 3, 4], name='foo',
  1070. ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
  1071. Generate a DataFrame with default index.
  1072. >>> s.reset_index()
  1073. idx foo
  1074. 0 a 1
  1075. 1 b 2
  1076. 2 c 3
  1077. 3 d 4
  1078. To specify the name of the new column use `name`.
  1079. >>> s.reset_index(name='values')
  1080. idx values
  1081. 0 a 1
  1082. 1 b 2
  1083. 2 c 3
  1084. 3 d 4
  1085. To generate a new Series with the default set `drop` to True.
  1086. >>> s.reset_index(drop=True)
  1087. 0 1
  1088. 1 2
  1089. 2 3
  1090. 3 4
  1091. Name: foo, dtype: int64
  1092. To update the Series in place, without generating a new one
  1093. set `inplace` to True. Note that it also requires ``drop=True``.
  1094. >>> s.reset_index(inplace=True, drop=True)
  1095. >>> s
  1096. 0 1
  1097. 1 2
  1098. 2 3
  1099. 3 4
  1100. Name: foo, dtype: int64
  1101. The `level` parameter is interesting for Series with a multi-level
  1102. index.
  1103. >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
  1104. ... np.array(['one', 'two', 'one', 'two'])]
  1105. >>> s2 = pd.Series(
  1106. ... range(4), name='foo',
  1107. ... index=pd.MultiIndex.from_arrays(arrays,
  1108. ... names=['a', 'b']))
  1109. To remove a specific level from the Index, use `level`.
  1110. >>> s2.reset_index(level='a')
  1111. a foo
  1112. b
  1113. one bar 0
  1114. two bar 1
  1115. one baz 2
  1116. two baz 3
  1117. If `level` is not set, all levels are removed from the Index.
  1118. >>> s2.reset_index()
  1119. a b foo
  1120. 0 bar one 0
  1121. 1 bar two 1
  1122. 2 baz one 2
  1123. 3 baz two 3
  1124. """
  1125. inplace = validate_bool_kwarg(inplace, 'inplace')
  1126. if drop:
  1127. new_index = ibase.default_index(len(self))
  1128. if level is not None:
  1129. if not isinstance(level, (tuple, list)):
  1130. level = [level]
  1131. level = [self.index._get_level_number(lev) for lev in level]
  1132. if len(level) < self.index.nlevels:
  1133. new_index = self.index.droplevel(level)
  1134. if inplace:
  1135. self.index = new_index
  1136. # set name if it was passed, otherwise, keep the previous name
  1137. self.name = name or self.name
  1138. else:
  1139. return self._constructor(self._values.copy(),
  1140. index=new_index).__finalize__(self)
  1141. elif inplace:
  1142. raise TypeError('Cannot reset_index inplace on a Series '
  1143. 'to create a DataFrame')
  1144. else:
  1145. df = self.to_frame(name)
  1146. return df.reset_index(level=level, drop=drop)
  1147. # ----------------------------------------------------------------------
  1148. # Rendering Methods
  1149. def __unicode__(self):
  1150. """
  1151. Return a string representation for a particular DataFrame.
  1152. Invoked by unicode(df) in py2 only. Yields a Unicode String in both
  1153. py2/py3.
  1154. """
  1155. buf = StringIO(u(""))
  1156. width, height = get_terminal_size()
  1157. max_rows = (height if get_option("display.max_rows") == 0 else
  1158. get_option("display.max_rows"))
  1159. show_dimensions = get_option("display.show_dimensions")
  1160. self.to_string(buf=buf, name=self.name, dtype=self.dtype,
  1161. max_rows=max_rows, length=show_dimensions)
  1162. result = buf.getvalue()
  1163. return result
  1164. def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
  1165. index=True, length=False, dtype=False, name=False,
  1166. max_rows=None):
  1167. """
  1168. Render a string representation of the Series.
  1169. Parameters
  1170. ----------
  1171. buf : StringIO-like, optional
  1172. Buffer to write to.
  1173. na_rep : str, optional
  1174. String representation of NaN to use, default 'NaN'.
  1175. float_format : one-parameter function, optional
  1176. Formatter function to apply to columns' elements if they are
  1177. floats, default None.
  1178. header : bool, default True
  1179. Add the Series header (index name).
  1180. index : bool, optional
  1181. Add index (row) labels, default True.
  1182. length : bool, default False
  1183. Add the Series length.
  1184. dtype : bool, default False
  1185. Add the Series dtype.
  1186. name : bool, default False
  1187. Add the Series name if not None.
  1188. max_rows : int, optional
  1189. Maximum number of rows to show before truncating. If None, show
  1190. all.
  1191. Returns
  1192. -------
  1193. str or None
  1194. String representation of Series if ``buf=None``, otherwise None.
  1195. """
  1196. formatter = fmt.SeriesFormatter(self, name=name, length=length,
  1197. header=header, index=index,
  1198. dtype=dtype, na_rep=na_rep,
  1199. float_format=float_format,
  1200. max_rows=max_rows)
  1201. result = formatter.to_string()
  1202. # catch contract violations
  1203. if not isinstance(result, compat.text_type):
  1204. raise AssertionError("result must be of type unicode, type"
  1205. " of result is {0!r}"
  1206. "".format(result.__class__.__name__))
  1207. if buf is None:
  1208. return result
  1209. else:
  1210. try:
  1211. buf.write(result)
  1212. except AttributeError:
  1213. with open(buf, 'w') as f:
  1214. f.write(result)
  1215. # ----------------------------------------------------------------------
  1216. def iteritems(self):
  1217. """
  1218. Lazily iterate over (index, value) tuples.
  1219. """
  1220. return zip(iter(self.index), iter(self))
  1221. items = iteritems
  1222. # ----------------------------------------------------------------------
  1223. # Misc public methods
  1224. def keys(self):
  1225. """
  1226. Return alias for index.
  1227. """
  1228. return self.index
  1229. def to_dict(self, into=dict):
  1230. """
  1231. Convert Series to {label -> value} dict or dict-like object.
  1232. Parameters
  1233. ----------
  1234. into : class, default dict
  1235. The collections.Mapping subclass to use as the return
  1236. object. Can be the actual class or an empty
  1237. instance of the mapping type you want. If you want a
  1238. collections.defaultdict, you must pass it initialized.
  1239. .. versionadded:: 0.21.0
  1240. Returns
  1241. -------
  1242. collections.Mapping
  1243. Key-value representation of Series.
  1244. Examples
  1245. --------
  1246. >>> s = pd.Series([1, 2, 3, 4])
  1247. >>> s.to_dict()
  1248. {0: 1, 1: 2, 2: 3, 3: 4}
  1249. >>> from collections import OrderedDict, defaultdict
  1250. >>> s.to_dict(OrderedDict)
  1251. OrderedDict([(0, 1)

Large files files are truncated, but you can click here to view the full file