PageRenderTime 79ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/indexes/base.py

http://github.com/wesm/pandas
Python | 3638 lines | 3257 code | 106 blank | 275 comment | 156 complexity | d7b2bb923b284eccd79964d1a0e22d83 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import datetime
  2. import warnings
  3. import operator
  4. import numpy as np
  5. import pandas.tslib as tslib
  6. import pandas.lib as lib
  7. import pandas._join as _join
  8. import pandas.algos as _algos
  9. import pandas.index as _index
  10. from pandas.lib import Timestamp, Timedelta, is_datetime_array
  11. from pandas.compat import range, u
  12. from pandas.compat.numpy import function as nv
  13. from pandas import compat
  14. from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex
  15. from pandas.types.missing import isnull, array_equivalent
  16. from pandas.types.common import (_ensure_int64, _ensure_object,
  17. _ensure_platform_int,
  18. is_integer,
  19. is_float,
  20. is_dtype_equal,
  21. is_object_dtype,
  22. is_categorical_dtype,
  23. is_bool_dtype,
  24. is_integer_dtype, is_float_dtype,
  25. is_datetime64_any_dtype,
  26. is_timedelta64_dtype,
  27. needs_i8_conversion,
  28. is_iterator, is_list_like,
  29. is_scalar)
  30. from pandas.types.cast import _coerce_indexer_dtype
  31. from pandas.core.common import (is_bool_indexer,
  32. _values_from_object,
  33. _asarray_tuplesafe)
  34. from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray,
  35. IndexOpsMixin)
  36. import pandas.core.base as base
  37. from pandas.util.decorators import (Appender, Substitution, cache_readonly,
  38. deprecate, deprecate_kwarg)
  39. import pandas.core.common as com
  40. import pandas.types.concat as _concat
  41. import pandas.core.missing as missing
  42. import pandas.core.algorithms as algos
  43. from pandas.formats.printing import pprint_thing
  44. from pandas.core.ops import _comp_method_OBJECT_ARRAY
  45. from pandas.core.strings import StringAccessorMixin
  46. from pandas.core.config import get_option
  47. # simplify
  48. default_pprint = lambda x, max_seq_items=None: \
  49. pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
  50. max_seq_items=max_seq_items)
  51. __all__ = ['Index']
  52. _unsortable_types = frozenset(('mixed', 'mixed-integer'))
  53. _index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array')
  54. _index_shared_docs = dict()
  55. def _try_get_item(x):
  56. try:
  57. return x.item()
  58. except AttributeError:
  59. return x
  60. class InvalidIndexError(Exception):
  61. pass
  62. _o_dtype = np.dtype(object)
  63. _Identity = object
  64. def _new_Index(cls, d):
  65. """ This is called upon unpickling, rather than the default which doesn't
  66. have arguments and breaks __new__
  67. """
  68. return cls.__new__(cls, **d)
  69. class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
  70. """
  71. Immutable ndarray implementing an ordered, sliceable set. The basic object
  72. storing axis labels for all pandas objects
  73. Parameters
  74. ----------
  75. data : array-like (1-dimensional)
  76. dtype : NumPy dtype (default: object)
  77. copy : bool
  78. Make a copy of input ndarray
  79. name : object
  80. Name to be stored in the index
  81. tupleize_cols : bool (default: True)
  82. When True, attempt to create a MultiIndex if possible
  83. Notes
  84. -----
  85. An Index instance can **only** contain hashable objects
  86. """
  87. # To hand over control to subclasses
  88. _join_precedence = 1
  89. # Cython methods
  90. _groupby = _algos.groupby_object
  91. _arrmap = _algos.arrmap_object
  92. _left_indexer_unique = _join.left_join_indexer_unique_object
  93. _left_indexer = _join.left_join_indexer_object
  94. _inner_indexer = _join.inner_join_indexer_object
  95. _outer_indexer = _join.outer_join_indexer_object
  96. _box_scalars = False
  97. _typ = 'index'
  98. _data = None
  99. _id = None
  100. name = None
  101. asi8 = None
  102. _comparables = ['name']
  103. _attributes = ['name']
  104. _allow_index_ops = True
  105. _allow_datetime_index_ops = False
  106. _allow_period_index_ops = False
  107. _is_numeric_dtype = False
  108. _can_hold_na = True
  109. # prioritize current class for _shallow_copy_with_infer,
  110. # used to infer integers as datetime-likes
  111. _infer_as_myclass = False
  112. _engine_type = _index.ObjectEngine
  113. def __new__(cls, data=None, dtype=None, copy=False, name=None,
  114. fastpath=False, tupleize_cols=True, **kwargs):
  115. if name is None and hasattr(data, 'name'):
  116. name = data.name
  117. if fastpath:
  118. return cls._simple_new(data, name)
  119. from .range import RangeIndex
  120. # range
  121. if isinstance(data, RangeIndex):
  122. return RangeIndex(start=data, copy=copy, dtype=dtype, name=name)
  123. elif isinstance(data, range):
  124. return RangeIndex.from_range(data, copy=copy, dtype=dtype,
  125. name=name)
  126. # categorical
  127. if is_categorical_dtype(data) or is_categorical_dtype(dtype):
  128. from .category import CategoricalIndex
  129. return CategoricalIndex(data, copy=copy, name=name, **kwargs)
  130. # index-like
  131. elif isinstance(data, (np.ndarray, Index, ABCSeries)):
  132. if (is_datetime64_any_dtype(data) or
  133. (dtype is not None and is_datetime64_any_dtype(dtype)) or
  134. 'tz' in kwargs):
  135. from pandas.tseries.index import DatetimeIndex
  136. result = DatetimeIndex(data, copy=copy, name=name,
  137. dtype=dtype, **kwargs)
  138. if dtype is not None and is_dtype_equal(_o_dtype, dtype):
  139. return Index(result.to_pydatetime(), dtype=_o_dtype)
  140. else:
  141. return result
  142. elif (is_timedelta64_dtype(data) or
  143. (dtype is not None and is_timedelta64_dtype(dtype))):
  144. from pandas.tseries.tdi import TimedeltaIndex
  145. result = TimedeltaIndex(data, copy=copy, name=name, **kwargs)
  146. if dtype is not None and _o_dtype == dtype:
  147. return Index(result.to_pytimedelta(), dtype=_o_dtype)
  148. else:
  149. return result
  150. if dtype is not None:
  151. try:
  152. # we need to avoid having numpy coerce
  153. # things that look like ints/floats to ints unless
  154. # they are actually ints, e.g. '0' and 0.0
  155. # should not be coerced
  156. # GH 11836
  157. if is_integer_dtype(dtype):
  158. inferred = lib.infer_dtype(data)
  159. if inferred == 'integer':
  160. data = np.array(data, copy=copy, dtype=dtype)
  161. elif inferred in ['floating', 'mixed-integer-float']:
  162. # if we are actually all equal to integers
  163. # then coerce to integer
  164. from .numeric import Int64Index, Float64Index
  165. try:
  166. res = data.astype('i8')
  167. if (res == data).all():
  168. return Int64Index(res, copy=copy,
  169. name=name)
  170. except (TypeError, ValueError):
  171. pass
  172. # return an actual float index
  173. return Float64Index(data, copy=copy, dtype=dtype,
  174. name=name)
  175. elif inferred == 'string':
  176. pass
  177. else:
  178. data = data.astype(dtype)
  179. elif is_float_dtype(dtype):
  180. inferred = lib.infer_dtype(data)
  181. if inferred == 'string':
  182. pass
  183. else:
  184. data = data.astype(dtype)
  185. else:
  186. data = np.array(data, dtype=dtype, copy=copy)
  187. except (TypeError, ValueError):
  188. pass
  189. # maybe coerce to a sub-class
  190. from pandas.tseries.period import (PeriodIndex,
  191. IncompatibleFrequency)
  192. if isinstance(data, PeriodIndex):
  193. return PeriodIndex(data, copy=copy, name=name, **kwargs)
  194. if issubclass(data.dtype.type, np.integer):
  195. from .numeric import Int64Index
  196. return Int64Index(data, copy=copy, dtype=dtype, name=name)
  197. elif issubclass(data.dtype.type, np.floating):
  198. from .numeric import Float64Index
  199. return Float64Index(data, copy=copy, dtype=dtype, name=name)
  200. elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
  201. subarr = data.astype('object')
  202. else:
  203. subarr = _asarray_tuplesafe(data, dtype=object)
  204. # _asarray_tuplesafe does not always copy underlying data,
  205. # so need to make sure that this happens
  206. if copy:
  207. subarr = subarr.copy()
  208. if dtype is None:
  209. inferred = lib.infer_dtype(subarr)
  210. if inferred == 'integer':
  211. from .numeric import Int64Index
  212. return Int64Index(subarr.astype('i8'), copy=copy,
  213. name=name)
  214. elif inferred in ['floating', 'mixed-integer-float']:
  215. from .numeric import Float64Index
  216. return Float64Index(subarr, copy=copy, name=name)
  217. elif inferred == 'boolean':
  218. # don't support boolean explicity ATM
  219. pass
  220. elif inferred != 'string':
  221. if inferred.startswith('datetime'):
  222. if (lib.is_datetime_with_singletz_array(subarr) or
  223. 'tz' in kwargs):
  224. # only when subarr has the same tz
  225. from pandas.tseries.index import DatetimeIndex
  226. try:
  227. return DatetimeIndex(subarr, copy=copy,
  228. name=name, **kwargs)
  229. except tslib.OutOfBoundsDatetime:
  230. pass
  231. elif inferred.startswith('timedelta'):
  232. from pandas.tseries.tdi import TimedeltaIndex
  233. return TimedeltaIndex(subarr, copy=copy, name=name,
  234. **kwargs)
  235. elif inferred == 'period':
  236. try:
  237. return PeriodIndex(subarr, name=name, **kwargs)
  238. except IncompatibleFrequency:
  239. pass
  240. return cls._simple_new(subarr, name)
  241. elif hasattr(data, '__array__'):
  242. return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
  243. **kwargs)
  244. elif data is None or is_scalar(data):
  245. cls._scalar_data_error(data)
  246. else:
  247. if (tupleize_cols and isinstance(data, list) and data and
  248. isinstance(data[0], tuple)):
  249. # we must be all tuples, otherwise don't construct
  250. # 10697
  251. if all(isinstance(e, tuple) for e in data):
  252. try:
  253. # must be orderable in py3
  254. if compat.PY3:
  255. sorted(data)
  256. from .multi import MultiIndex
  257. return MultiIndex.from_tuples(
  258. data, names=name or kwargs.get('names'))
  259. except (TypeError, KeyError):
  260. # python2 - MultiIndex fails on mixed types
  261. pass
  262. # other iterable of some kind
  263. subarr = _asarray_tuplesafe(data, dtype=object)
  264. return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
  265. """
  266. NOTE for new Index creation:
  267. - _simple_new: It returns new Index with the same type as the caller.
  268. All metadata (such as name) must be provided by caller's responsibility.
  269. Using _shallow_copy is recommended because it fills these metadata
  270. otherwise specified.
  271. - _shallow_copy: It returns new Index with the same type (using
  272. _simple_new), but fills caller's metadata otherwise specified. Passed
  273. kwargs will overwrite corresponding metadata.
  274. - _shallow_copy_with_infer: It returns new Index inferring its type
  275. from passed values. It fills caller's metadata otherwise specified as the
  276. same as _shallow_copy.
  277. See each method's docstring.
  278. """
  279. @classmethod
  280. def _simple_new(cls, values, name=None, dtype=None, **kwargs):
  281. """
  282. we require the we have a dtype compat for the values
  283. if we are passed a non-dtype compat, then coerce using the constructor
  284. Must be careful not to recurse.
  285. """
  286. if not hasattr(values, 'dtype'):
  287. if values is None and dtype is not None:
  288. values = np.empty(0, dtype=dtype)
  289. else:
  290. values = np.array(values, copy=False)
  291. if is_object_dtype(values):
  292. values = cls(values, name=name, dtype=dtype,
  293. **kwargs)._values
  294. result = object.__new__(cls)
  295. result._data = values
  296. result.name = name
  297. for k, v in compat.iteritems(kwargs):
  298. setattr(result, k, v)
  299. return result._reset_identity()
  300. _index_shared_docs['_shallow_copy'] = """
  301. create a new Index with the same class as the caller, don't copy the
  302. data, use the same object attributes with passed in attributes taking
  303. precedence
  304. *this is an internal non-public method*
  305. Parameters
  306. ----------
  307. values : the values to create the new Index, optional
  308. kwargs : updates the default attributes for this Index
  309. """
  310. @Appender(_index_shared_docs['_shallow_copy'])
  311. def _shallow_copy(self, values=None, **kwargs):
  312. if values is None:
  313. values = self.values
  314. attributes = self._get_attributes_dict()
  315. attributes.update(kwargs)
  316. return self._simple_new(values, **attributes)
  317. def _shallow_copy_with_infer(self, values=None, **kwargs):
  318. """
  319. create a new Index inferring the class with passed value, don't copy
  320. the data, use the same object attributes with passed in attributes
  321. taking precedence
  322. *this is an internal non-public method*
  323. Parameters
  324. ----------
  325. values : the values to create the new Index, optional
  326. kwargs : updates the default attributes for this Index
  327. """
  328. if values is None:
  329. values = self.values
  330. attributes = self._get_attributes_dict()
  331. attributes.update(kwargs)
  332. attributes['copy'] = False
  333. if self._infer_as_myclass:
  334. try:
  335. return self._constructor(values, **attributes)
  336. except (TypeError, ValueError):
  337. pass
  338. return Index(values, **attributes)
  339. def _deepcopy_if_needed(self, orig, copy=False):
  340. """
  341. .. versionadded:: 0.19.0
  342. Make a copy of self if data coincides (in memory) with orig.
  343. Subclasses should override this if self._base is not an ndarray.
  344. Parameters
  345. ----------
  346. orig : ndarray
  347. other ndarray to compare self._data against
  348. copy : boolean, default False
  349. when False, do not run any check, just return self
  350. Returns
  351. -------
  352. A copy of self if needed, otherwise self : Index
  353. """
  354. if copy:
  355. # Retrieve the "base objects", i.e. the original memory allocations
  356. orig = orig if orig.base is None else orig.base
  357. new = self._data if self._data.base is None else self._data.base
  358. if orig is new:
  359. return self.copy(deep=True)
  360. return self
  361. def _update_inplace(self, result, **kwargs):
  362. # guard when called from IndexOpsMixin
  363. raise TypeError("Index can't be updated inplace")
  364. def is_(self, other):
  365. """
  366. More flexible, faster check like ``is`` but that works through views
  367. Note: this is *not* the same as ``Index.identical()``, which checks
  368. that metadata is also the same.
  369. Parameters
  370. ----------
  371. other : object
  372. other object to compare against.
  373. Returns
  374. -------
  375. True if both have same underlying data, False otherwise : bool
  376. """
  377. # use something other than None to be clearer
  378. return self._id is getattr(
  379. other, '_id', Ellipsis) and self._id is not None
  380. def _reset_identity(self):
  381. """Initializes or resets ``_id`` attribute with new object"""
  382. self._id = _Identity()
  383. return self
  384. # ndarray compat
  385. def __len__(self):
  386. """
  387. return the length of the Index
  388. """
  389. return len(self._data)
  390. def __array__(self, dtype=None):
  391. """ the array interface, return my values """
  392. return self._data.view(np.ndarray)
  393. def __array_wrap__(self, result, context=None):
  394. """
  395. Gets called after a ufunc
  396. """
  397. if is_bool_dtype(result):
  398. return result
  399. attrs = self._get_attributes_dict()
  400. attrs = self._maybe_update_attributes(attrs)
  401. return Index(result, **attrs)
  402. @cache_readonly
  403. def dtype(self):
  404. """ return the dtype object of the underlying data """
  405. return self._data.dtype
  406. @cache_readonly
  407. def dtype_str(self):
  408. """ return the dtype str of the underlying data """
  409. return str(self.dtype)
  410. @property
  411. def values(self):
  412. """ return the underlying data as an ndarray """
  413. return self._data.view(np.ndarray)
  414. def get_values(self):
  415. """ return the underlying data as an ndarray """
  416. return self.values
  417. # ops compat
  418. def tolist(self):
  419. """
  420. return a list of the Index values
  421. """
  422. return list(self.values)
  423. def repeat(self, n, *args, **kwargs):
  424. """
  425. Repeat elements of an Index. Refer to `numpy.ndarray.repeat`
  426. for more information about the `n` argument.
  427. See also
  428. --------
  429. numpy.ndarray.repeat
  430. """
  431. nv.validate_repeat(args, kwargs)
  432. return self._shallow_copy(self._values.repeat(n))
  433. def where(self, cond, other=None):
  434. """
  435. .. versionadded:: 0.19.0
  436. Return an Index of same shape as self and whose corresponding
  437. entries are from self where cond is True and otherwise are from
  438. other.
  439. Parameters
  440. ----------
  441. cond : boolean same length as self
  442. other : scalar, or array-like
  443. """
  444. if other is None:
  445. other = self._na_value
  446. values = np.where(cond, self.values, other)
  447. return self._shallow_copy_with_infer(values, dtype=self.dtype)
  448. def ravel(self, order='C'):
  449. """
  450. return an ndarray of the flattened values of the underlying data
  451. See also
  452. --------
  453. numpy.ndarray.ravel
  454. """
  455. return self._values.ravel(order=order)
  456. # construction helpers
  457. @classmethod
  458. def _scalar_data_error(cls, data):
  459. raise TypeError('{0}(...) must be called with a collection of some '
  460. 'kind, {1} was passed'.format(cls.__name__,
  461. repr(data)))
  462. @classmethod
  463. def _string_data_error(cls, data):
  464. raise TypeError('String dtype not supported, you may need '
  465. 'to explicitly cast to a numeric type')
  466. @classmethod
  467. def _coerce_to_ndarray(cls, data):
  468. """coerces data to ndarray, raises on scalar data. Converts other
  469. iterables to list first and then to array. Does not touch ndarrays.
  470. """
  471. if not isinstance(data, (np.ndarray, Index)):
  472. if data is None or is_scalar(data):
  473. cls._scalar_data_error(data)
  474. # other iterable of some kind
  475. if not isinstance(data, (ABCSeries, list, tuple)):
  476. data = list(data)
  477. data = np.asarray(data)
  478. return data
  479. def _get_attributes_dict(self):
  480. """ return an attributes dict for my class """
  481. return dict([(k, getattr(self, k, None)) for k in self._attributes])
  482. def view(self, cls=None):
  483. # we need to see if we are subclassing an
  484. # index type here
  485. if cls is not None and not hasattr(cls, '_typ'):
  486. result = self._data.view(cls)
  487. else:
  488. result = self._shallow_copy()
  489. if isinstance(result, Index):
  490. result._id = self._id
  491. return result
  492. def _coerce_scalar_to_index(self, item):
  493. """
  494. we need to coerce a scalar to a compat for our index type
  495. Parameters
  496. ----------
  497. item : scalar item to coerce
  498. """
  499. return Index([item], dtype=self.dtype, **self._get_attributes_dict())
  500. _index_shared_docs['copy'] = """
  501. Make a copy of this object. Name and dtype sets those attributes on
  502. the new object.
  503. Parameters
  504. ----------
  505. name : string, optional
  506. deep : boolean, default False
  507. dtype : numpy dtype or pandas type
  508. Returns
  509. -------
  510. copy : Index
  511. Notes
  512. -----
  513. In most cases, there should be no functional difference from using
  514. ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
  515. """
  516. @Appender(_index_shared_docs['copy'])
  517. def copy(self, name=None, deep=False, dtype=None, **kwargs):
  518. names = kwargs.get('names')
  519. if names is not None and name is not None:
  520. raise TypeError("Can only provide one of `names` and `name`")
  521. if deep:
  522. from copy import deepcopy
  523. new_index = self._shallow_copy(self._data.copy())
  524. name = name or deepcopy(self.name)
  525. else:
  526. new_index = self._shallow_copy()
  527. name = self.name
  528. if name is not None:
  529. names = [name]
  530. if names:
  531. new_index = new_index.set_names(names)
  532. if dtype:
  533. new_index = new_index.astype(dtype)
  534. return new_index
  535. __copy__ = copy
  536. def __unicode__(self):
  537. """
  538. Return a string representation for this object.
  539. Invoked by unicode(df) in py2 only. Yields a Unicode String in both
  540. py2/py3.
  541. """
  542. klass = self.__class__.__name__
  543. data = self._format_data()
  544. attrs = self._format_attrs()
  545. space = self._format_space()
  546. prepr = (u(",%s") %
  547. space).join([u("%s=%s") % (k, v) for k, v in attrs])
  548. # no data provided, just attributes
  549. if data is None:
  550. data = ''
  551. res = u("%s(%s%s)") % (klass, data, prepr)
  552. return res
  553. def _format_space(self):
  554. # using space here controls if the attributes
  555. # are line separated or not (the default)
  556. # max_seq_items = get_option('display.max_seq_items')
  557. # if len(self) > max_seq_items:
  558. # space = "\n%s" % (' ' * (len(klass) + 1))
  559. return " "
  560. @property
  561. def _formatter_func(self):
  562. """
  563. Return the formatted data as a unicode string
  564. """
  565. return default_pprint
  566. def _format_data(self):
  567. """
  568. Return the formatted data as a unicode string
  569. """
  570. from pandas.formats.format import get_console_size, _get_adjustment
  571. display_width, _ = get_console_size()
  572. if display_width is None:
  573. display_width = get_option('display.width') or 80
  574. space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
  575. space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
  576. n = len(self)
  577. sep = ','
  578. max_seq_items = get_option('display.max_seq_items') or n
  579. formatter = self._formatter_func
  580. # do we want to justify (only do so for non-objects)
  581. is_justify = not (self.inferred_type in ('string', 'unicode') or
  582. (self.inferred_type == 'categorical' and
  583. is_object_dtype(self.categories)))
  584. # are we a truncated display
  585. is_truncated = n > max_seq_items
  586. # adj can optionaly handle unicode eastern asian width
  587. adj = _get_adjustment()
  588. def _extend_line(s, line, value, display_width, next_line_prefix):
  589. if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
  590. display_width):
  591. s += line.rstrip()
  592. line = next_line_prefix
  593. line += value
  594. return s, line
  595. def best_len(values):
  596. if values:
  597. return max([adj.len(x) for x in values])
  598. else:
  599. return 0
  600. if n == 0:
  601. summary = '[], '
  602. elif n == 1:
  603. first = formatter(self[0])
  604. summary = '[%s], ' % first
  605. elif n == 2:
  606. first = formatter(self[0])
  607. last = formatter(self[-1])
  608. summary = '[%s, %s], ' % (first, last)
  609. else:
  610. if n > max_seq_items:
  611. n = min(max_seq_items // 2, 10)
  612. head = [formatter(x) for x in self[:n]]
  613. tail = [formatter(x) for x in self[-n:]]
  614. else:
  615. head = []
  616. tail = [formatter(x) for x in self]
  617. # adjust all values to max length if needed
  618. if is_justify:
  619. # however, if we are not truncated and we are only a single
  620. # line, then don't justify
  621. if (is_truncated or
  622. not (len(', '.join(head)) < display_width and
  623. len(', '.join(tail)) < display_width)):
  624. max_len = max(best_len(head), best_len(tail))
  625. head = [x.rjust(max_len) for x in head]
  626. tail = [x.rjust(max_len) for x in tail]
  627. summary = ""
  628. line = space2
  629. for i in range(len(head)):
  630. word = head[i] + sep + ' '
  631. summary, line = _extend_line(summary, line, word,
  632. display_width, space2)
  633. if is_truncated:
  634. # remove trailing space of last line
  635. summary += line.rstrip() + space2 + '...'
  636. line = space2
  637. for i in range(len(tail) - 1):
  638. word = tail[i] + sep + ' '
  639. summary, line = _extend_line(summary, line, word,
  640. display_width, space2)
  641. # last value: no sep added + 1 space of width used for trailing ','
  642. summary, line = _extend_line(summary, line, tail[-1],
  643. display_width - 2, space2)
  644. summary += line
  645. summary += '],'
  646. if len(summary) > (display_width):
  647. summary += space1
  648. else: # one row
  649. summary += ' '
  650. # remove initial space
  651. summary = '[' + summary[len(space2):]
  652. return summary
  653. def _format_attrs(self):
  654. """
  655. Return a list of tuples of the (attr,formatted_value)
  656. """
  657. attrs = []
  658. attrs.append(('dtype', "'%s'" % self.dtype))
  659. if self.name is not None:
  660. attrs.append(('name', default_pprint(self.name)))
  661. max_seq_items = get_option('display.max_seq_items') or len(self)
  662. if len(self) > max_seq_items:
  663. attrs.append(('length', len(self)))
  664. return attrs
  665. def to_series(self, **kwargs):
  666. """
  667. Create a Series with both index and values equal to the index keys
  668. useful with map for returning an indexer based on an index
  669. Returns
  670. -------
  671. Series : dtype will be based on the type of the Index values.
  672. """
  673. from pandas import Series
  674. return Series(self._to_embed(), index=self, name=self.name)
  675. def _to_embed(self, keep_tz=False):
  676. """
  677. *this is an internal non-public method*
  678. return an array repr of this object, potentially casting to object
  679. """
  680. return self.values.copy()
  681. _index_shared_docs['astype'] = """
  682. Create an Index with values cast to dtypes. The class of a new Index
  683. is determined by dtype. When conversion is impossible, a ValueError
  684. exception is raised.
  685. Parameters
  686. ----------
  687. dtype : numpy dtype or pandas type
  688. copy : bool, default True
  689. By default, astype always returns a newly allocated object.
  690. If copy is set to False and internal requirements on dtype are
  691. satisfied, the original data is used to create a new Index
  692. or the original Index is returned.
  693. .. versionadded:: 0.19.0
  694. """
  695. @Appender(_index_shared_docs['astype'])
  696. def astype(self, dtype, copy=True):
  697. return Index(self.values.astype(dtype, copy=copy), name=self.name,
  698. dtype=dtype)
  699. def _to_safe_for_reshape(self):
  700. """ convert to object if we are a categorical """
  701. return self
  702. def to_datetime(self, dayfirst=False):
  703. """
  704. For an Index containing strings or datetime.datetime objects, attempt
  705. conversion to DatetimeIndex
  706. """
  707. from pandas.tseries.index import DatetimeIndex
  708. if self.inferred_type == 'string':
  709. from dateutil.parser import parse
  710. parser = lambda x: parse(x, dayfirst=dayfirst)
  711. parsed = lib.try_parse_dates(self.values, parser=parser)
  712. return DatetimeIndex(parsed)
  713. else:
  714. return DatetimeIndex(self.values)
  715. def _assert_can_do_setop(self, other):
  716. if not is_list_like(other):
  717. raise TypeError('Input must be Index or array-like')
  718. return True
  719. def _convert_can_do_setop(self, other):
  720. if not isinstance(other, Index):
  721. other = Index(other, name=self.name)
  722. result_name = self.name
  723. else:
  724. result_name = self.name if self.name == other.name else None
  725. return other, result_name
  726. def _convert_for_op(self, value):
  727. """ Convert value to be insertable to ndarray """
  728. return value
  729. def _assert_can_do_op(self, value):
  730. """ Check value is valid for scalar op """
  731. if not lib.isscalar(value):
  732. msg = "'value' must be a scalar, passed: {0}"
  733. raise TypeError(msg.format(type(value).__name__))
  734. @property
  735. def nlevels(self):
  736. return 1
  737. def _get_names(self):
  738. return FrozenList((self.name, ))
  739. def _set_names(self, values, level=None):
  740. if len(values) != 1:
  741. raise ValueError('Length of new names must be 1, got %d' %
  742. len(values))
  743. self.name = values[0]
  744. names = property(fset=_set_names, fget=_get_names)
  745. def set_names(self, names, level=None, inplace=False):
  746. """
  747. Set new names on index. Defaults to returning new index.
  748. Parameters
  749. ----------
  750. names : str or sequence
  751. name(s) to set
  752. level : int, level name, or sequence of int/level names (default None)
  753. If the index is a MultiIndex (hierarchical), level(s) to set (None
  754. for all levels). Otherwise level must be None
  755. inplace : bool
  756. if True, mutates in place
  757. Returns
  758. -------
  759. new index (of same type and class...etc) [if inplace, returns None]
  760. Examples
  761. --------
  762. >>> Index([1, 2, 3, 4]).set_names('foo')
  763. Int64Index([1, 2, 3, 4], dtype='int64')
  764. >>> Index([1, 2, 3, 4]).set_names(['foo'])
  765. Int64Index([1, 2, 3, 4], dtype='int64')
  766. >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
  767. (2, u'one'), (2, u'two')],
  768. names=['foo', 'bar'])
  769. >>> idx.set_names(['baz', 'quz'])
  770. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  771. labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
  772. names=[u'baz', u'quz'])
  773. >>> idx.set_names('baz', level=0)
  774. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  775. labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
  776. names=[u'baz', u'bar'])
  777. """
  778. if level is not None and self.nlevels == 1:
  779. raise ValueError('Level must be None for non-MultiIndex')
  780. if level is not None and not is_list_like(level) and is_list_like(
  781. names):
  782. raise TypeError("Names must be a string")
  783. if not is_list_like(names) and level is None and self.nlevels > 1:
  784. raise TypeError("Must pass list-like as `names`.")
  785. if not is_list_like(names):
  786. names = [names]
  787. if level is not None and not is_list_like(level):
  788. level = [level]
  789. if inplace:
  790. idx = self
  791. else:
  792. idx = self._shallow_copy()
  793. idx._set_names(names, level=level)
  794. if not inplace:
  795. return idx
  796. def rename(self, name, inplace=False):
  797. """
  798. Set new names on index. Defaults to returning new index.
  799. Parameters
  800. ----------
  801. name : str or list
  802. name to set
  803. inplace : bool
  804. if True, mutates in place
  805. Returns
  806. -------
  807. new index (of same type and class...etc) [if inplace, returns None]
  808. """
  809. return self.set_names([name], inplace=inplace)
  810. def reshape(self, *args, **kwargs):
  811. """
  812. NOT IMPLEMENTED: do not call this method, as reshaping is not
  813. supported for Index objects and will raise an error.
  814. Reshape an Index.
  815. """
  816. raise NotImplementedError("reshaping is not supported "
  817. "for Index objects")
  818. @property
  819. def _has_complex_internals(self):
  820. # to disable groupby tricks in MultiIndex
  821. return False
  822. def summary(self, name=None):
  823. if len(self) > 0:
  824. head = self[0]
  825. if (hasattr(head, 'format') and
  826. not isinstance(head, compat.string_types)):
  827. head = head.format()
  828. tail = self[-1]
  829. if (hasattr(tail, 'format') and
  830. not isinstance(tail, compat.string_types)):
  831. tail = tail.format()
  832. index_summary = ', %s to %s' % (pprint_thing(head),
  833. pprint_thing(tail))
  834. else:
  835. index_summary = ''
  836. if name is None:
  837. name = type(self).__name__
  838. return '%s: %s entries%s' % (name, len(self), index_summary)
  839. def _mpl_repr(self):
  840. # how to represent ourselves to matplotlib
  841. return self.values
  842. _na_value = np.nan
  843. """The expected NA value to use with this index."""
  844. # introspection
  845. @property
  846. def is_monotonic(self):
  847. """ alias for is_monotonic_increasing (deprecated) """
  848. return self._engine.is_monotonic_increasing
  849. @property
  850. def is_monotonic_increasing(self):
  851. """
  852. return if the index is monotonic increasing (only equal or
  853. increasing) values.
  854. """
  855. return self._engine.is_monotonic_increasing
  856. @property
  857. def is_monotonic_decreasing(self):
  858. """
  859. return if the index is monotonic decreasing (only equal or
  860. decreasing) values.
  861. """
  862. return self._engine.is_monotonic_decreasing
  863. def is_lexsorted_for_tuple(self, tup):
  864. return True
  865. @cache_readonly(allow_setting=True)
  866. def is_unique(self):
  867. """ return if the index has unique values """
  868. return self._engine.is_unique
  869. @property
  870. def has_duplicates(self):
  871. return not self.is_unique
  872. def is_boolean(self):
  873. return self.inferred_type in ['boolean']
  874. def is_integer(self):
  875. return self.inferred_type in ['integer']
  876. def is_floating(self):
  877. return self.inferred_type in ['floating', 'mixed-integer-float']
  878. def is_numeric(self):
  879. return self.inferred_type in ['integer', 'floating']
  880. def is_object(self):
  881. return is_object_dtype(self.dtype)
  882. def is_categorical(self):
  883. return self.inferred_type in ['categorical']
  884. def is_mixed(self):
  885. return self.inferred_type in ['mixed']
  886. def holds_integer(self):
  887. return self.inferred_type in ['integer', 'mixed-integer']
  888. # validate / convert indexers
  889. def _convert_scalar_indexer(self, key, kind=None):
  890. """
  891. convert a scalar indexer
  892. Parameters
  893. ----------
  894. key : label of the slice bound
  895. kind : {'ix', 'loc', 'getitem', 'iloc'} or None
  896. """
  897. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  898. if kind == 'iloc':
  899. return self._validate_indexer('positional', key, kind)
  900. if len(self) and not isinstance(self, ABCMultiIndex,):
  901. # we can raise here if we are definitive that this
  902. # is positional indexing (eg. .ix on with a float)
  903. # or label indexing if we are using a type able
  904. # to be represented in the index
  905. if kind in ['getitem', 'ix'] and is_float(key):
  906. if not self.is_floating():
  907. return self._invalid_indexer('label', key)
  908. elif kind in ['loc'] and is_float(key):
  909. # we want to raise KeyError on string/mixed here
  910. # technically we *could* raise a TypeError
  911. # on anything but mixed though
  912. if self.inferred_type not in ['floating',
  913. 'mixed-integer-float',
  914. 'string',
  915. 'unicode',
  916. 'mixed']:
  917. return self._invalid_indexer('label', key)
  918. elif kind in ['loc'] and is_integer(key):
  919. if not self.holds_integer():
  920. return self._invalid_indexer('label', key)
  921. return key
  922. def _convert_slice_indexer(self, key, kind=None):
  923. """
  924. convert a slice indexer. disallow floats in the start/stop/step
  925. Parameters
  926. ----------
  927. key : label of the slice bound
  928. kind : {'ix', 'loc', 'getitem', 'iloc'} or None
  929. """
  930. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  931. # if we are not a slice, then we are done
  932. if not isinstance(key, slice):
  933. return key
  934. # validate iloc
  935. if kind == 'iloc':
  936. return slice(self._validate_indexer('slice', key.start, kind),
  937. self._validate_indexer('slice', key.stop, kind),
  938. self._validate_indexer('slice', key.step, kind))
  939. # potentially cast the bounds to integers
  940. start, stop, step = key.start, key.stop, key.step
  941. # figure out if this is a positional indexer
  942. def is_int(v):
  943. return v is None or is_integer(v)
  944. is_null_slicer = start is None and stop is None
  945. is_index_slice = is_int(start) and is_int(stop)
  946. is_positional = is_index_slice and not self.is_integer()
  947. if kind == 'getitem':
  948. """
  949. called from the getitem slicers, validate that we are in fact
  950. integers
  951. """
  952. if self.is_integer() or is_index_slice:
  953. return slice(self._validate_indexer('slice', key.start, kind),
  954. self._validate_indexer('slice', key.stop, kind),
  955. self._validate_indexer('slice', key.step, kind))
  956. # convert the slice to an indexer here
  957. # if we are mixed and have integers
  958. try:
  959. if is_positional and self.is_mixed():
  960. # TODO: i, j are not used anywhere
  961. if start is not None:
  962. i = self.get_loc(start) # noqa
  963. if stop is not None:
  964. j = self.get_loc(stop) # noqa
  965. is_positional = False
  966. except KeyError:
  967. if self.inferred_type == 'mixed-integer-float':
  968. raise
  969. if is_null_slicer:
  970. indexer = key
  971. elif is_positional:
  972. indexer = key
  973. else:
  974. try:
  975. indexer = self.slice_indexer(start, stop, step, kind=kind)
  976. except Exception:
  977. if is_index_slice:
  978. if self.is_integer():
  979. raise
  980. else:
  981. indexer = key
  982. else:
  983. raise
  984. return indexer
  985. def _convert_list_indexer(self, keyarr, kind=None):
  986. """
  987. passed a key that is tuplesafe that is integer based
  988. and we have a mixed index (e.g. number/labels). figure out
  989. the indexer. return None if we can't help
  990. """
  991. if (kind in [None, 'iloc', 'ix'] and
  992. is_integer_dtype(keyarr) and not self.is_floating() and
  993. not isinstance(keyarr, ABCPeriodIndex)):
  994. if self.inferred_type == 'mixed-integer':
  995. indexer = self.get_indexer(keyarr)
  996. if (indexer >= 0).all():
  997. return indexer
  998. # missing values are flagged as -1 by get_indexer and negative
  999. # indices are already converted to positive indices in the
  1000. # above if-statement, so the negative flags are changed to
  1001. # values outside the range of indices so as to trigger an
  1002. # IndexError in maybe_convert_indices
  1003. indexer[indexer < 0] = len(self)
  1004. from pandas.core.indexing import maybe_convert_indices
  1005. return maybe_convert_indices(indexer, len(self))
  1006. elif not self.inferred_type == 'integer':
  1007. keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
  1008. return keyarr
  1009. return None
  1010. def _invalid_indexer(self, form, key):
  1011. """ consistent invalid indexer message """
  1012. raise TypeError("cannot do {form} indexing on {klass} with these "
  1013. "indexers [{key}] of {kind}".format(
  1014. form=form, klass=type(self), key=key,
  1015. kind=type(key)))
  1016. def get_duplicates(self):
  1017. from collections import defaultdict
  1018. counter = defaultdict(lambda: 0)
  1019. for k in self.values:
  1020. counter[k] += 1
  1021. return sorted(k for k, v in compat.iteritems(counter) if v > 1)
  1022. _get_duplicates = get_duplicates
  1023. def _cleanup(self):
  1024. self._engine.clear_mapping()
  1025. @cache_readonly
  1026. def _constructor(self):
  1027. return type(self)
  1028. @cache_readonly
  1029. def _engine(self):
  1030. # property, for now, slow to look up
  1031. return self._engine_type(lambda: self.values, len(self))
  1032. def _validate_index_level(self, level):
  1033. """
  1034. Validate index level.
  1035. For single-level Index getting level number is a no-op, but some
  1036. verification must be done like in MultiIndex.
  1037. """
  1038. if isinstance(level, int):
  1039. if level < 0 and level != -1:
  1040. raise IndexError("Too many levels: Index has only 1 level,"
  1041. " %d is not a valid level number" % (level, ))
  1042. elif level > 0:
  1043. raise IndexError("Too many levels:"
  1044. " Index has only 1 level, not %d" %
  1045. (level + 1))
  1046. elif level != self.name:
  1047. raise KeyError('Level %s must be same as name (%s)' %
  1048. (level, self.name))
  1049. def _get_level_number(self, level):
  1050. self._validate_index_level(level)
  1051. return 0
  1052. @cache_readonly
  1053. def inferred_type(self):
  1054. """ return a string of the type inferred from the values """
  1055. return lib.infer_dtype(self)
  1056. def is_type_compatible(self, kind):
  1057. return kind == self.inferred_type
  1058. @cache_readonly
  1059. def is_all_dates(self):
  1060. if self._data is None:
  1061. return False
  1062. return is_datetime_array(_ensure_object(self.values))
  1063. def __iter__(self):
  1064. return iter(self.values)
  1065. def __reduce__(self):
  1066. d = dict(data=self._data)
  1067. d.update(self._get_attributes_dict())
  1068. return _new_Index, (self.__class__, d), None
  1069. def __setstate__(self, state):
  1070. """Necessary for making this object picklable"""
  1071. if isinstance(state, dict):
  1072. self._data = state.pop('data')
  1073. for k, v in compat.iteritems(state):
  1074. setattr(self, k, v)
  1075. elif isinstance(state, tuple):
  1076. if len(state) == 2:
  1077. nd_state, own_state = state
  1078. data = np.empty(nd_state[1], dtype=nd_state[2])
  1079. np.ndarray.__setstate__(data, nd_state)
  1080. self.name = own_state[0]
  1081. else: # pragma: no cover
  1082. data = np.empty(state)
  1083. np.ndarray.__setstate__(data, state)
  1084. self._data = data
  1085. self._reset_identity()
  1086. else:
  1087. raise Exception("invalid pickle state")
  1088. _unpickle_compat = __setstate__
  1089. def __deepcopy__(self, memo=None):
  1090. if memo is None:
  1091. memo = {}
  1092. return self.copy(deep=True)
  1093. def __nonzero__(self):
  1094. raise ValueError("The truth value of a {0} is ambiguous. "
  1095. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  1096. .format(self.__class__.__name__))
  1097. __bool__ = __nonzero__
  1098. def __contains__(self, key):
  1099. hash(key)
  1100. # work around some kind of odd cython bug
  1101. try:
  1102. return key in self._engine
  1103. except TypeError:
  1104. return False
  1105. def __hash__(self):
  1106. raise TypeError("unhashable type: %r" % type(self).__name__)
  1107. def __setitem__(self, key, value):
  1108. raise TypeError("Index does not support mutable operations")
  1109. def __getitem__(self, key):
  1110. """
  1111. Override numpy.ndarray's __getitem__ method to work as desired.
  1112. This function adds lists and Series as valid boolean indexers
  1113. (ndarrays only supports ndarray with dtype=bool).
  1114. If resulting ndim != 1, plain ndarray is returned instead of
  1115. corresponding `Index` subclass.
  1116. """
  1117. # There's no custom logic to be implemented in __getslice__, so it's
  1118. # not overloaded intentionally.
  1119. getitem = self._data.__getitem__
  1120. promote = self._shallow_copy
  1121. if is_scalar(key):
  1122. return getitem(key)
  1123. if isinstance(key, slice):
  1124. # This case is separated from the conditional above to avoid
  1125. # pessimization of basic indexing.
  1126. return promote(getitem(key))
  1127. if is_bool_indexer(key):
  1128. key = np.asarray(key)
  1129. key = _values_from_object(key)
  1130. result = getitem(key)
  1131. if not is_scalar(result):
  1132. return promote(result)
  1133. else:
  1134. return result
  1135. def _ensure_compat_append(self, other):
  1136. """
  1137. prepare the append
  1138. Returns
  1139. -------
  1140. list of to_concat, name of result Index
  1141. """
  1142. name = self.name
  1143. to_concat = [self]
  1144. if isinstance(other, (list, tuple)):
  1145. to_concat = to_concat + list(other)
  1146. else:
  1147. to_concat.append(other)
  1148. for obj in to_concat:
  1149. if (isinstance(obj, Index) and obj.name != name and
  1150. obj.name is not None):
  1151. name = None
  1152. break
  1153. to_concat = self._ensure_compat_concat(to_concat)
  1154. to_concat = [x._values if isinstance(x, Index) else x
  1155. for x in to_concat]
  1156. return to_concat, name
  1157. def append(self, other):
  1158. """
  1159. Append a collection of Index options together
  1160. Parameters
  1161. ----------
  1162. other : Index or list/tuple of indices
  1163. Returns
  1164. -------
  1165. appended : Index
  1166. """
  1167. to_concat, name = self._ensure_compat_append(other)
  1168. attribs = self._get_attributes_dict()
  1169. attribs['name'] = name
  1170. return self._shallow_copy_with_infer(
  1171. np.concatenate(to_concat), **attribs)
  1172. @staticmethod
  1173. def _ensure_compat_concat(indexes):
  1174. from pandas.tseries.api import (DatetimeIndex, PeriodIndex,
  1175. TimedeltaIndex)
  1176. klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex
  1177. is_ts = [isinstance(idx, klasses) for idx in indexes]
  1178. if any(is_ts) and not all(is_ts):
  1179. return [_maybe_box(idx) for idx in indexes]
  1180. return indexes
  1181. _index_shared_docs['take'] = """
  1182. return a new %(klass)s of the values selected by the indices
  1183. For internal compatibility with numpy arrays.
  1184. Parameters
  1185. ----------
  1186. indices : list
  1187. Indices to be taken
  1188. axis : int, optional
  1189. The axis over which to select values, always 0.
  1190. allow_fill : bool, default True
  1191. fill_value : bool, default None
  1192. If allow_fill=True and fill_value is not None, indices specified by
  1193. -1 is regarded as NA. If Index doesn't hold NA, raise ValueError
  1194. See also
  1195. --------
  1196. numpy.ndarray.take
  1197. """
  1198. @Appender(_index_shared_docs['take'])
  1199. def take(self, indices, axis=0, allow_fill=True,
  1200. fill_value=None, **kwargs):
  1201. nv.validate_take(tuple(), kwargs)
  1202. indices = _ensure_platform_int(indices)
  1203. if self._can_hold_na:
  1204. taken = self._assert_take_fillable(self.values, indices,
  1205. allow_fill=allow_fill,
  1206. fill_value=fill_value,
  1207. na_value=self._na_value)
  1208. else:
  1209. if allow_fill and fill_value is not None:
  1210. msg = 'Unable to fill values because {0} cannot contain NA'
  1211. raise ValueError(msg.format(self.__class__.__name__))
  1212. taken = self.values.take(indices)
  1213. return self._shallow_copy(taken)
  1214. def _assert_take_fillable(self, values, indices, allow_fill=True,
  1215. fill_value=None, na_value=np.nan):
  1216. """ Internal method to handle NA filling of take """
  1217. indices = _ensure_platform_int(indices)
  1218. # only fill if we are passing a non-None fill_value
  1219. if allow_fill and fill_value is not None:
  1220. if (indices < -1).any():
  1221. msg = ('When allow_fill=True and fill_value is not None, '
  1222. 'all indices must be >= -1')
  1223. raise ValueError(msg)
  1224. taken = values.take(indices)
  1225. mask = indices == -1
  1226. if mask.any():
  1227. taken[mask] = na_value
  1228. else:
  1229. taken = values.take(indices)
  1230. return taken
  1231. @cache_readonly
  1232. def _isnan(self):
  1233. """ return if each value is nan"""
  1234. if self._can_hold_na:
  1235. return isnull(self)
  1236. else:
  1237. # shouldn't reach to this condition by checking hasnans beforehand
  1238. values = np.empty(len(self), dtype=np.bool_)
  1239. values.fill(False)
  1240. return values
  1241. @cache_readonly
  1242. def _nan_idxs(self):
  1243. if self._can_hold_na:
  1244. w, = self._isnan.nonzero()
  1245. return w
  1246. else:
  1247. return np.array([], dtype=np.int64)
  1248. @cache_readonly
  1249. def hasnans(self):
  1250. """ return if I have any nans; enables various perf speedups """
  1251. if self._can_hold_na:
  1252. return self._isnan.any()
  1253. else:
  1254. return False
  1255. def putmask(self, mask, value):
  1256. """
  1257. return a new Index of the values set with the mask
  1258. See also
  1259. --------
  1260. numpy.ndarray.putmask
  1261. """
  1262. values = self.values.copy()
  1263. try:
  1264. np.putmask(values, mask, self._convert_for_op(value))
  1265. return self._shallow_copy(values)
  1266. except (ValueError, TypeError):
  1267. # coerces to object
  1268. return self.astype(object).putmask(mask, value)
  1269. def format(self, name=False, formatter=None, **kwargs):
  1270. """
  1271. Render a string representation of the Index
  1272. """
  1273. header = []
  1274. if name:
  1275. header.append(pprint_thing(self.name,
  1276. escape_chars=('\t', '\r', '\n')) if
  1277. self.name is not None else '')
  1278. if formatter is not None:
  1279. return header + list(self.map(formatter))
  1280. return self._format_with_header(header, **kwargs)
  1281. def _format_with_header(self, header, na_rep='NaN', **kwargs):
  1282. values = self.values
  1283. from pandas.formats.format import format_array
  1284. if is_categorical_dtype(values.dtype):
  1285. values = np.array(values)
  1286. elif is_object_dtype(values.dtype):
  1287. values = lib.maybe_convert_objects(values, safe=1)
  1288. if is_object_dtype(values.dtype):
  1289. result = [pprint_thing(x, escape_chars=('\t', '\r', '\n'))
  1290. for x in values]
  1291. # could have nans
  1292. mask = isnull(values)
  1293. if mask.any():
  1294. result = np.array(result)
  1295. result[mask] = na_rep
  1296. result = result.tolist()
  1297. else:
  1298. result = _trim_front(format_array(values, None, justify='left'))
  1299. return header + result
  1300. def to_native_types(self, slicer=None, **kwargs):
  1301. """ slice and dice then format """
  1302. values = self
  1303. if slicer is not None:
  1304. values = values[slicer]
  1305. return values._format_native_types(**kwargs)
  1306. def _format_native_types(self, na_rep='', quoting=None, **kwargs):
  1307. """ actually format my specific types """
  1308. mask = isnull(self)
  1309. if not self.is_object() and not quoting:
  1310. values = np.asarray(self).astype(str)
  1311. else:
  1312. values = np.array(self, dtype=object, copy=True)
  1313. values[mask] = na_rep
  1314. return values
  1315. def equals(self, other):
  1316. """
  1317. Determines if two Index objects contain the same elements.
  1318. """
  1319. if self.is_(other):
  1320. return True
  1321. if not isinstance(other, Index):
  1322. return False
  1323. return array_equivalent(_values_from_object(self),
  1324. _values_from_object(other))
  1325. def identical(self, other):
  1326. """Similar to equals, but check that other comparable attributes are
  1327. also equal
  1328. """
  1329. return (self.equals(other) and
  1330. all((getattr(self, c, None) == getattr(other, c, None)
  1331. for c in self._comparables)) and
  1332. type(self) == type(other))
  1333. def asof(self, label):
  1334. """
  1335. For a sorted index, return the most recent label up to and including
  1336. the passed label. Return NaN if not found.
  1337. See also
  1338. --------
  1339. get_loc : asof is a thin wrapper around get_loc with method='pad'
  1340. """
  1341. try:
  1342. loc = self.get_loc(label, method='pad')
  1343. except KeyError:
  1344. return _get_na_value(self.dtype)
  1345. else:
  1346. if isinstance(loc, slice):
  1347. loc = loc.indices(len(self))[-1]
  1348. return self[loc]
  1349. def asof_locs(self, where, mask):
  1350. """
  1351. where : array of timestamps
  1352. mask : array of booleans where data is not NA
  1353. """
  1354. locs = self.values[mask].searchsorted(where.values, side='right')
  1355. locs = np.where(locs > 0, locs - 1, 0)
  1356. result = np.arange(len(self))[mask].take(locs)
  1357. first = mask.argmax()
  1358. result[(locs == 0) & (where < self.values[first])] = -1
  1359. return result
  1360. def sort_values(self, return_indexer=False, ascending=True):
  1361. """
  1362. Return sorted copy of Index
  1363. """
  1364. _as = self.argsort()
  1365. if not ascending:
  1366. _as = _as[::-1]
  1367. sorted_index = self.take(_as)
  1368. if return_indexer:
  1369. return sorted_index, _as
  1370. else:
  1371. return sorted_index
  1372. def order(self, return_indexer=False, ascending=True):
  1373. """
  1374. Return sorted copy of Index
  1375. DEPRECATED: use :meth:`Index.sort_values`
  1376. """
  1377. warnings.warn("order is deprecated, use sort_values(...)",
  1378. FutureWarning, stacklevel=2)
  1379. return self.sort_values(return_indexer=return_indexer,
  1380. ascending=ascending)
  1381. def sort(self, *args, **kwargs):
  1382. raise TypeError("cannot sort an Index object in-place, use "
  1383. "sort_values instead")
  1384. def sortlevel(self, level=None, ascending=True, sort_remaining=None):
  1385. """
  1386. For internal compatibility with with the Index API
  1387. Sort the Index. This is for compat with MultiIndex
  1388. Parameters
  1389. ----------
  1390. ascending : boolean, default True
  1391. False to sort in descending order
  1392. level, sort_remaining are compat parameters
  1393. Returns
  1394. -------
  1395. sorted_index : Index
  1396. """
  1397. return self.sort_values(return_indexer=True, ascending=ascending)
  1398. def shift(self, periods=1, freq=None):
  1399. """
  1400. Shift Index containing datetime objects by input number of periods and
  1401. DateOffset
  1402. Returns
  1403. -------
  1404. shifted : Index
  1405. """
  1406. raise NotImplementedError("Not supported for type %s" %
  1407. type(self).__name__)
  1408. def argsort(self, *args, **kwargs):
  1409. """
  1410. Returns the indices that would sort the index and its
  1411. underlying data.
  1412. Returns
  1413. -------
  1414. argsorted : numpy array
  1415. See also
  1416. --------
  1417. numpy.ndarray.argsort
  1418. """
  1419. result = self.asi8
  1420. if result is None:
  1421. result = np.array(self)
  1422. return result.argsort(*args, **kwargs)
  1423. def __add__(self, other):
  1424. if is_list_like(other):
  1425. warnings.warn("using '+' to provide set union with Indexes is "
  1426. "deprecated, use '|' or .union()", FutureWarning,
  1427. stacklevel=2)
  1428. if isinstance(other, Index):
  1429. return self.union(other)
  1430. return Index(np.array(self) + other)
  1431. def __radd__(self, other):
  1432. if is_list_like(other):
  1433. warnings.warn("using '+' to provide set union with Indexes is "
  1434. "deprecated, use '|' or .union()", FutureWarning,
  1435. stacklevel=2)
  1436. return Index(other + np.array(self))
  1437. __iadd__ = __add__
  1438. def __sub__(self, other):
  1439. warnings.warn("using '-' to provide set differences with Indexes is "
  1440. "deprecated, use .difference()", FutureWarning,
  1441. stacklevel=2)
  1442. return self.difference(other)
  1443. def __and__(self, other):
  1444. return self.intersection(other)
  1445. def __or__(self, other):
  1446. return self.union(other)
  1447. def __xor__(self, other):
  1448. return self.symmetric_difference(other)
  1449. def _get_consensus_name(self, other):
  1450. """
  1451. Given 2 indexes, give a consensus name meaning
  1452. we take the not None one, or None if the names differ.
  1453. Return a new object if we are resetting the name
  1454. """
  1455. if self.name != other.name:
  1456. if self.name is None or other.name is None:
  1457. name = self.name or other.name
  1458. else:
  1459. name = None
  1460. if self.name != name:
  1461. return self._shallow_copy(name=name)
  1462. return self
  1463. def union(self, other):
  1464. """
  1465. Form the union of two Index objects and sorts if possible.
  1466. Parameters
  1467. ----------
  1468. other : Index or array-like
  1469. Returns
  1470. -------
  1471. union : Index
  1472. Examples
  1473. --------
  1474. >>> idx1 = pd.Index([1, 2, 3, 4])
  1475. >>> idx2 = pd.Index([3, 4, 5, 6])
  1476. >>> idx1.union(idx2)
  1477. Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
  1478. """
  1479. self._assert_can_do_setop(other)
  1480. other = _ensure_index(other)
  1481. if len(other) == 0 or self.equals(other):
  1482. return self._get_consensus_name(other)
  1483. if len(self) == 0:
  1484. return other._get_consensus_name(self)
  1485. if not is_dtype_equal(self.dtype, other.dtype):
  1486. this = self.astype('O')
  1487. other = other.astype('O')
  1488. return this.union(other)
  1489. if self.is_monotonic and other.is_monotonic:
  1490. try:
  1491. result = self._outer_indexer(self.values, other._values)[0]
  1492. except TypeError:
  1493. # incomparable objects
  1494. result = list(self.values)
  1495. # worth making this faster? a very unusual case
  1496. value_set = set(self.values)
  1497. result.extend([x for x in other._values if x not in value_set])
  1498. else:
  1499. indexer = self.get_indexer(other)
  1500. indexer, = (indexer == -1).nonzero()
  1501. if len(indexer) > 0:
  1502. other_diff = algos.take_nd(other._values, indexer,
  1503. allow_fill=False)
  1504. result = _concat._concat_compat((self.values, other_diff))
  1505. try:
  1506. self.values[0] < other_diff[0]
  1507. except TypeError as e:
  1508. warnings.warn("%s, sort order is undefined for "
  1509. "incomparable objects" % e, RuntimeWarning,
  1510. stacklevel=3)
  1511. else:
  1512. types = frozenset((self.inferred_type,
  1513. other.inferred_type))
  1514. if not types & _unsortable_types:
  1515. result.sort()
  1516. else:
  1517. result = self.values
  1518. try:
  1519. result = np.sort(result)
  1520. except TypeError as e:
  1521. warnings.warn("%s, sort order is undefined for "
  1522. "incomparable objects" % e, RuntimeWarning,
  1523. stacklevel=3)
  1524. # for subclasses
  1525. return self._wrap_union_result(other, result)
  1526. def _wrap_union_result(self, other, result):
  1527. name = self.name if self.name == other.name else None
  1528. return self.__class__(result, name=name)
  1529. def intersection(self, other):
  1530. """
  1531. Form the intersection of two Index objects.
  1532. This returns a new Index with elements common to the index and `other`.
  1533. Sortedness of the result is not guaranteed.
  1534. Parameters
  1535. ----------
  1536. other : Index or array-like
  1537. Returns
  1538. -------
  1539. intersection : Index
  1540. Examples
  1541. --------
  1542. >>> idx1 = pd.Index([1, 2, 3, 4])
  1543. >>> idx2 = pd.Index([3, 4, 5, 6])
  1544. >>> idx1.intersection(idx2)
  1545. Int64Index([3, 4], dtype='int64')
  1546. """
  1547. self._assert_can_do_setop(other)
  1548. other = _ensure_index(other)
  1549. if self.equals(other):
  1550. return self._get_consensus_name(other)
  1551. if not is_dtype_equal(self.dtype, other.dtype):
  1552. this = self.astype('O')
  1553. other = other.astype('O')
  1554. return this.intersection(other)
  1555. if self.is_monotonic and other.is_monotonic:
  1556. try:
  1557. result = self._inner_indexer(self.values, other._values)[0]
  1558. return self._wrap_union_result(other, result)
  1559. except TypeError:
  1560. pass
  1561. try:
  1562. indexer = Index(self.values).get_indexer(other._values)
  1563. indexer = indexer.take((indexer != -1).nonzero()[0])
  1564. except:
  1565. # duplicates
  1566. indexer = Index(self.values).get_indexer_non_unique(
  1567. other._values)[0].unique()
  1568. indexer = indexer[indexer != -1]
  1569. taken = self.take(indexer)
  1570. if self.name != other.name:
  1571. taken.name = None
  1572. return taken
  1573. def difference(self, other):
  1574. """
  1575. Return a new Index with elements from the index that are not in
  1576. `other`.
  1577. This is the set difference of two Index objects.
  1578. It's sorted if sorting is possible.
  1579. Parameters
  1580. ----------
  1581. other : Index or array-like
  1582. Returns
  1583. -------
  1584. difference : Index
  1585. Examples
  1586. --------
  1587. >>> idx1 = pd.Index([1, 2, 3, 4])
  1588. >>> idx2 = pd.Index([3, 4, 5, 6])
  1589. >>> idx1.difference(idx2)
  1590. Int64Index([1, 2], dtype='int64')
  1591. """
  1592. self._assert_can_do_setop(other)
  1593. if self.equals(other):
  1594. return Index([], name=self.name)
  1595. other, result_name = self._convert_can_do_setop(other)
  1596. this = self._get_unique_index()
  1597. indexer = this.get_indexer(other)
  1598. indexer = indexer.take((indexer != -1).nonzero()[0])
  1599. label_diff = np.setdiff1d(np.arange(this.size), indexer,
  1600. assume_unique=True)
  1601. the_diff = this.values.take(label_diff)
  1602. try:
  1603. the_diff = algos.safe_sort(the_diff)
  1604. except TypeError:
  1605. pass
  1606. return this._shallow_copy(the_diff, name=result_name)
  1607. def symmetric_difference(self, other, result_name=None):
  1608. """
  1609. Compute the symmetric difference of two Index objects.
  1610. It's sorted if sorting is possible.
  1611. Parameters
  1612. ----------
  1613. other : Index or array-like
  1614. result_name : str
  1615. Returns
  1616. -------
  1617. symmetric_difference : Index
  1618. Notes
  1619. -----
  1620. ``symmetric_difference`` contains elements that appear in either
  1621. ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
  1622. ``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped.
  1623. Examples
  1624. --------
  1625. >>> idx1 = Index([1, 2, 3, 4])
  1626. >>> idx2 = Index([2, 3, 4, 5])
  1627. >>> idx1.symmetric_difference(idx2)
  1628. Int64Index([1, 5], dtype='int64')
  1629. You can also use the ``^`` operator:
  1630. >>> idx1 ^ idx2
  1631. Int64Index([1, 5], dtype='int64')
  1632. """
  1633. self._assert_can_do_setop(other)
  1634. other, result_name_update = self._convert_can_do_setop(other)
  1635. if result_name is None:
  1636. result_name = result_name_update
  1637. this = self._get_unique_index()
  1638. other = other._get_unique_index()
  1639. indexer = this.get_indexer(other)
  1640. # {this} minus {other}
  1641. common_indexer = indexer.take((indexer != -1).nonzero()[0])
  1642. left_indexer = np.setdiff1d(np.arange(this.size), common_indexer,
  1643. assume_unique=True)
  1644. left_diff = this.values.take(left_indexer)
  1645. # {other} minus {this}
  1646. right_indexer = (indexer == -1).nonzero()[0]
  1647. right_diff = other.values.take(right_indexer)
  1648. the_diff = _concat._concat_compat([left_diff, right_diff])
  1649. try:
  1650. the_diff = algos.safe_sort(the_diff)
  1651. except TypeError:
  1652. pass
  1653. attribs = self._get_attributes_dict()
  1654. attribs['name'] = result_name
  1655. if 'freq' in attribs:
  1656. attribs['freq'] = None
  1657. return self._shallow_copy_with_infer(the_diff, **attribs)
  1658. sym_diff = deprecate('sym_diff', symmetric_difference)
  1659. def _get_unique_index(self, dropna=False):
  1660. """
  1661. Returns an index containing unique values.
  1662. Parameters
  1663. ----------
  1664. dropna : bool
  1665. If True, NaN values are dropped.
  1666. Returns
  1667. -------
  1668. uniques : index
  1669. """
  1670. if self.is_unique and not dropna:
  1671. return self
  1672. values = self.values
  1673. if not self.is_unique:
  1674. values = self.unique()
  1675. if dropna:
  1676. try:
  1677. if self.hasnans:
  1678. values = values[~isnull(values)]
  1679. except NotImplementedError:
  1680. pass
  1681. return self._shallow_copy(values)
  1682. def get_loc(self, key, method=None, tolerance=None):
  1683. """
  1684. Get integer location for requested label
  1685. Parameters
  1686. ----------
  1687. key : label
  1688. method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
  1689. * default: exact matches only.
  1690. * pad / ffill: find the PREVIOUS index value if no exact match.
  1691. * backfill / bfill: use NEXT index value if no exact match
  1692. * nearest: use the NEAREST index value if no exact match. Tied
  1693. distances are broken by preferring the larger index value.
  1694. tolerance : optional
  1695. Maximum distance from index value for inexact matches. The value of
  1696. the index at the matching location most satisfy the equation
  1697. ``abs(index[loc] - key) <= tolerance``.
  1698. .. versionadded:: 0.17.0
  1699. Returns
  1700. -------
  1701. loc : int if unique index, possibly slice or mask if not
  1702. """
  1703. if method is None:
  1704. if tolerance is not None:
  1705. raise ValueError('tolerance argument only valid if using pad, '
  1706. 'backfill or nearest lookups')
  1707. key = _values_from_object(key)
  1708. try:
  1709. return self._engine.get_loc(key)
  1710. except KeyError:
  1711. return self._engine.get_loc(self._maybe_cast_indexer(key))
  1712. indexer = self.get_indexer([key], method=method, tolerance=tolerance)
  1713. if indexer.ndim > 1 or indexer.size > 1:
  1714. raise TypeError('get_loc requires scalar valued input')
  1715. loc = indexer.item()
  1716. if loc == -1:
  1717. raise KeyError(key)
  1718. return loc
  1719. def get_value(self, series, key):
  1720. """
  1721. Fast lookup of value from 1-dimensional ndarray. Only use this if you
  1722. know what you're doing
  1723. """
  1724. # if we have something that is Index-like, then
  1725. # use this, e.g. DatetimeIndex
  1726. s = getattr(series, '_values', None)
  1727. if isinstance(s, Index) and is_scalar(key):
  1728. try:
  1729. return s[key]
  1730. except (IndexError, ValueError):
  1731. # invalid type as an indexer
  1732. pass
  1733. s = _values_from_object(series)
  1734. k = _values_from_object(key)
  1735. k = self._convert_scalar_indexer(k, kind='getitem')
  1736. try:
  1737. return self._engine.get_value(s, k,
  1738. tz=getattr(series.dtype, 'tz', None))
  1739. except KeyError as e1:
  1740. if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
  1741. raise
  1742. try:
  1743. return tslib.get_value_box(s, key)
  1744. except IndexError:
  1745. raise
  1746. except TypeError:
  1747. # generator/iterator-like
  1748. if is_iterator(key):
  1749. raise InvalidIndexError(key)
  1750. else:
  1751. raise e1
  1752. except Exception: # pragma: no cover
  1753. raise e1
  1754. except TypeError:
  1755. # python 3
  1756. if is_scalar(key): # pragma: no cover
  1757. raise IndexError(key)
  1758. raise InvalidIndexError(key)
  1759. def set_value(self, arr, key, value):
  1760. """
  1761. Fast lookup of value from 1-dimensional ndarray. Only use this if you
  1762. know what you're doing
  1763. """
  1764. self._engine.set_value(_values_from_object(arr),
  1765. _values_from_object(key), value)
  1766. def get_level_values(self, level):
  1767. """
  1768. Return vector of label values for requested level, equal to the length
  1769. of the index
  1770. Parameters
  1771. ----------
  1772. level : int
  1773. Returns
  1774. -------
  1775. values : ndarray
  1776. """
  1777. # checks that level number is actually just 1
  1778. self._validate_index_level(level)
  1779. return self
  1780. def get_indexer(self, target, method=None, limit=None, tolerance=None):
  1781. """
  1782. Compute indexer and mask for new index given the current index. The
  1783. indexer should be then used as an input to ndarray.take to align the
  1784. current data to the new index.
  1785. Parameters
  1786. ----------
  1787. target : Index
  1788. method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
  1789. * default: exact matches only.
  1790. * pad / ffill: find the PREVIOUS index value if no exact match.
  1791. * backfill / bfill: use NEXT index value if no exact match
  1792. * nearest: use the NEAREST index value if no exact match. Tied
  1793. distances are broken by preferring the larger index value.
  1794. limit : int, optional
  1795. Maximum number of consecutive labels in ``target`` to match for
  1796. inexact matches.
  1797. tolerance : optional
  1798. Maximum distance between original and new labels for inexact
  1799. matches. The values of the index at the matching locations most
  1800. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  1801. .. versionadded:: 0.17.0
  1802. Examples
  1803. --------
  1804. >>> indexer = index.get_indexer(new_index)
  1805. >>> new_values = cur_values.take(indexer)
  1806. Returns
  1807. -------
  1808. indexer : ndarray of int
  1809. Integers from 0 to n - 1 indicating that the index at these
  1810. positions matches the corresponding target values. Missing values
  1811. in the target are marked by -1.
  1812. """
  1813. method = missing.clean_reindex_fill_method(method)
  1814. target = _ensure_index(target)
  1815. if tolerance is not None:
  1816. tolerance = self._convert_tolerance(tolerance)
  1817. pself, ptarget = self._possibly_promote(target)
  1818. if pself is not self or ptarget is not target:
  1819. return pself.get_indexer(ptarget, method=method, limit=limit,
  1820. tolerance=tolerance)
  1821. if not is_dtype_equal(self.dtype, target.dtype):
  1822. this = self.astype(object)
  1823. target = target.astype(object)
  1824. return this.get_indexer(target, method=method, limit=limit,
  1825. tolerance=tolerance)
  1826. if not self.is_unique:
  1827. raise InvalidIndexError('Reindexing only valid with uniquely'
  1828. ' valued Index objects')
  1829. if method == 'pad' or method == 'backfill':
  1830. indexer = self._get_fill_indexer(target, method, limit, tolerance)
  1831. elif method == 'nearest':
  1832. indexer = self._get_nearest_indexer(target, limit, tolerance)
  1833. else:
  1834. if tolerance is not None:
  1835. raise ValueError('tolerance argument only valid if doing pad, '
  1836. 'backfill or nearest reindexing')
  1837. if limit is not None:
  1838. raise ValueError('limit argument only valid if doing pad, '
  1839. 'backfill or nearest reindexing')
  1840. indexer = self._engine.get_indexer(target._values)
  1841. return _ensure_platform_int(indexer)
  1842. def _convert_tolerance(self, tolerance):
  1843. # override this method on subclasses
  1844. return tolerance
  1845. def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
  1846. if self.is_monotonic_increasing and target.is_monotonic_increasing:
  1847. method = (self._engine.get_pad_indexer if method == 'pad' else
  1848. self._engine.get_backfill_indexer)
  1849. indexer = method(target._values, limit)
  1850. else:
  1851. indexer = self._get_fill_indexer_searchsorted(target, method,
  1852. limit)
  1853. if tolerance is not None:
  1854. indexer = self._filter_indexer_tolerance(target._values, indexer,
  1855. tolerance)
  1856. return indexer
  1857. def _get_fill_indexer_searchsorted(self, target, method, limit=None):
  1858. """
  1859. Fallback pad/backfill get_indexer that works for monotonic decreasing
  1860. indexes and non-monotonic targets
  1861. """
  1862. if limit is not None:
  1863. raise ValueError('limit argument for %r method only well-defined '
  1864. 'if index and target are monotonic' % method)
  1865. side = 'left' if method == 'pad' else 'right'
  1866. target = np.asarray(target)
  1867. # find exact matches first (this simplifies the algorithm)
  1868. indexer = self.get_indexer(target)
  1869. nonexact = (indexer == -1)
  1870. indexer[nonexact] = self._searchsorted_monotonic(target[nonexact],
  1871. side)
  1872. if side == 'left':
  1873. # searchsorted returns "indices into a sorted array such that,
  1874. # if the corresponding elements in v were inserted before the
  1875. # indices, the order of a would be preserved".
  1876. # Thus, we need to subtract 1 to find values to the left.
  1877. indexer[nonexact] -= 1
  1878. # This also mapped not found values (values of 0 from
  1879. # np.searchsorted) to -1, which conveniently is also our
  1880. # sentinel for missing values
  1881. else:
  1882. # Mark indices to the right of the largest value as not found
  1883. indexer[indexer == len(self)] = -1
  1884. return indexer
  1885. def _get_nearest_indexer(self, target, limit, tolerance):
  1886. """
  1887. Get the indexer for the nearest index labels; requires an index with
  1888. values that can be subtracted from each other (e.g., not strings or
  1889. tuples).
  1890. """
  1891. left_indexer = self.get_indexer(target, 'pad', limit=limit)
  1892. right_indexer = self.get_indexer(target, 'backfill', limit=limit)
  1893. target = np.asarray(target)
  1894. left_distances = abs(self.values[left_indexer] - target)
  1895. right_distances = abs(self.values[right_indexer] - target)
  1896. op = operator.lt if self.is_monotonic_increasing else operator.le
  1897. indexer = np.where(op(left_distances, right_distances) |
  1898. (right_indexer == -1), left_indexer, right_indexer)
  1899. if tolerance is not None:
  1900. indexer = self._filter_indexer_tolerance(target, indexer,
  1901. tolerance)
  1902. return indexer
  1903. def _filter_indexer_tolerance(self, target, indexer, tolerance):
  1904. distance = abs(self.values[indexer] - target)
  1905. indexer = np.where(distance <= tolerance, indexer, -1)
  1906. return indexer
  1907. def get_indexer_non_unique(self, target):
  1908. """ return an indexer suitable for taking from a non unique index
  1909. return the labels in the same order as the target, and
  1910. return a missing indexer into the target (missing are marked as -1
  1911. in the indexer); target must be an iterable """
  1912. target = _ensure_index(target)
  1913. pself, ptarget = self._possibly_promote(target)
  1914. if pself is not self or ptarget is not target:
  1915. return pself.get_indexer_non_unique(ptarget)
  1916. if self.is_all_dates:
  1917. self = Index(self.asi8)
  1918. tgt_values = target.asi8
  1919. else:
  1920. tgt_values = target._values
  1921. indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
  1922. return Index(indexer), missing
  1923. def get_indexer_for(self, target, **kwargs):
  1924. """ guaranteed return of an indexer even when non-unique """
  1925. if self.is_unique:
  1926. return self.get_indexer(target, **kwargs)
  1927. indexer, _ = self.get_indexer_non_unique(target, **kwargs)
  1928. return indexer
  1929. def _possibly_promote(self, other):
  1930. # A hack, but it works
  1931. from pandas.tseries.index import DatetimeIndex
  1932. if self.inferred_type == 'date' and isinstance(other, DatetimeIndex):
  1933. return DatetimeIndex(self), other
  1934. elif self.inferred_type == 'boolean':
  1935. if not is_object_dtype(self.dtype):
  1936. return self.astype('object'), other.astype('object')
  1937. return self, other
  1938. def groupby(self, to_groupby):
  1939. """
  1940. Group the index labels by a given array of values.
  1941. Parameters
  1942. ----------
  1943. to_groupby : array
  1944. Values used to determine the groups.
  1945. Returns
  1946. -------
  1947. groups : dict
  1948. {group name -> group labels}
  1949. """
  1950. return self._groupby(self.values, _values_from_object(to_groupby))
  1951. def map(self, mapper):
  1952. """
  1953. Apply mapper function to its values.
  1954. Parameters
  1955. ----------
  1956. mapper : callable
  1957. Function to be applied.
  1958. Returns
  1959. -------
  1960. applied : array
  1961. """
  1962. return self._arrmap(self.values, mapper)
  1963. def isin(self, values, level=None):
  1964. """
  1965. Compute boolean array of whether each index value is found in the
  1966. passed set of values.
  1967. Parameters
  1968. ----------
  1969. values : set or list-like
  1970. Sought values.
  1971. .. versionadded:: 0.18.1
  1972. Support for values as a set
  1973. level : str or int, optional
  1974. Name or position of the index level to use (if the index is a
  1975. MultiIndex).
  1976. Notes
  1977. -----
  1978. If `level` is specified:
  1979. - if it is the name of one *and only one* index level, use that level;
  1980. - otherwise it should be a number indicating level position.
  1981. Returns
  1982. -------
  1983. is_contained : ndarray (boolean dtype)
  1984. """
  1985. if level is not None:
  1986. self._validate_index_level(level)
  1987. return algos.isin(np.array(self), values)
  1988. def _can_reindex(self, indexer):
  1989. """
  1990. *this is an internal non-public method*
  1991. Check if we are allowing reindexing with this particular indexer
  1992. Parameters
  1993. ----------
  1994. indexer : an integer indexer
  1995. Raises
  1996. ------
  1997. ValueError if its a duplicate axis
  1998. """
  1999. # trying to reindex on an axis with duplicates
  2000. if not self.is_unique and len(indexer):
  2001. raise ValueError("cannot reindex from a duplicate axis")
  2002. def reindex(self, target, method=None, level=None, limit=None,
  2003. tolerance=None):
  2004. """
  2005. Create index with target's values (move/add/delete values as necessary)
  2006. Parameters
  2007. ----------
  2008. target : an iterable
  2009. Returns
  2010. -------
  2011. new_index : pd.Index
  2012. Resulting index
  2013. indexer : np.ndarray or None
  2014. Indices of output values in original index
  2015. """
  2016. # GH6552: preserve names when reindexing to non-named target
  2017. # (i.e. neither Index nor Series).
  2018. preserve_names = not hasattr(target, 'name')
  2019. # GH7774: preserve dtype/tz if target is empty and not an Index.
  2020. target = _ensure_has_len(target) # target may be an iterator
  2021. if not isinstance(target, Index) and len(target) == 0:
  2022. attrs = self._get_attributes_dict()
  2023. attrs.pop('freq', None) # don't preserve freq
  2024. target = self._simple_new(None, dtype=self.dtype, **attrs)
  2025. else:
  2026. target = _ensure_index(target)
  2027. if level is not None:
  2028. if method is not None:
  2029. raise TypeError('Fill method not supported if level passed')
  2030. _, indexer, _ = self._join_level(target, level, how='right',
  2031. return_indexers=True)
  2032. else:
  2033. if self.equals(target):
  2034. indexer = None
  2035. else:
  2036. if self.is_unique:
  2037. indexer = self.get_indexer(target, method=method,
  2038. limit=limit,
  2039. tolerance=tolerance)
  2040. else:
  2041. if method is not None or limit is not None:
  2042. raise ValueError("cannot reindex a non-unique index "
  2043. "with a method or limit")
  2044. indexer, missing = self.get_indexer_non_unique(target)
  2045. if preserve_names and target.nlevels == 1 and target.name != self.name:
  2046. target = target.copy()
  2047. target.name = self.name
  2048. return target, indexer
  2049. def _reindex_non_unique(self, target):
  2050. """
  2051. *this is an internal non-public method*
  2052. Create a new index with target's values (move/add/delete values as
  2053. necessary) use with non-unique Index and a possibly non-unique target
  2054. Parameters
  2055. ----------
  2056. target : an iterable
  2057. Returns
  2058. -------
  2059. new_index : pd.Index
  2060. Resulting index
  2061. indexer : np.ndarray or None
  2062. Indices of output values in original index
  2063. """
  2064. target = _ensure_index(target)
  2065. indexer, missing = self.get_indexer_non_unique(target)
  2066. check = indexer != -1
  2067. new_labels = self.take(indexer[check])
  2068. new_indexer = None
  2069. if len(missing):
  2070. l = np.arange(len(indexer))
  2071. missing = _ensure_platform_int(missing)
  2072. missing_labels = target.take(missing)
  2073. missing_indexer = _ensure_int64(l[~check])
  2074. cur_labels = self.take(indexer[check])._values
  2075. cur_indexer = _ensure_int64(l[check])
  2076. new_labels = np.empty(tuple([len(indexer)]), dtype=object)
  2077. new_labels[cur_indexer] = cur_labels
  2078. new_labels[missing_indexer] = missing_labels
  2079. # a unique indexer
  2080. if target.is_unique:
  2081. # see GH5553, make sure we use the right indexer
  2082. new_indexer = np.arange(len(indexer))
  2083. new_indexer[cur_indexer] = np.arange(len(cur_labels))
  2084. new_indexer[missing_indexer] = -1
  2085. # we have a non_unique selector, need to use the original
  2086. # indexer here
  2087. else:
  2088. # need to retake to have the same size as the indexer
  2089. indexer = indexer._values
  2090. indexer[~check] = 0
  2091. # reset the new indexer to account for the new size
  2092. new_indexer = np.arange(len(self.take(indexer)))
  2093. new_indexer[~check] = -1
  2094. new_index = self._shallow_copy_with_infer(new_labels, freq=None)
  2095. return new_index, indexer, new_indexer
  2096. def join(self, other, how='left', level=None, return_indexers=False):
  2097. """
  2098. *this is an internal non-public method*
  2099. Compute join_index and indexers to conform data
  2100. structures to the new index.
  2101. Parameters
  2102. ----------
  2103. other : Index
  2104. how : {'left', 'right', 'inner', 'outer'}
  2105. level : int or level name, default None
  2106. return_indexers : boolean, default False
  2107. Returns
  2108. -------
  2109. join_index, (left_indexer, right_indexer)
  2110. """
  2111. from .multi import MultiIndex
  2112. self_is_mi = isinstance(self, MultiIndex)
  2113. other_is_mi = isinstance(other, MultiIndex)
  2114. # try to figure out the join level
  2115. # GH3662
  2116. if level is None and (self_is_mi or other_is_mi):
  2117. # have the same levels/names so a simple join
  2118. if self.names == other.names:
  2119. pass
  2120. else:
  2121. return self._join_multi(other, how=how,
  2122. return_indexers=return_indexers)
  2123. # join on the level
  2124. if level is not None and (self_is_mi or other_is_mi):
  2125. return self._join_level(other, level, how=how,
  2126. return_indexers=return_indexers)
  2127. other = _ensure_index(other)
  2128. if len(other) == 0 and how in ('left', 'outer'):
  2129. join_index = self._shallow_copy()
  2130. if return_indexers:
  2131. rindexer = np.repeat(-1, len(join_index))
  2132. return join_index, None, rindexer
  2133. else:
  2134. return join_index
  2135. if len(self) == 0 and how in ('right', 'outer'):
  2136. join_index = other._shallow_copy()
  2137. if return_indexers:
  2138. lindexer = np.repeat(-1, len(join_index))
  2139. return join_index, lindexer, None
  2140. else:
  2141. return join_index
  2142. if self._join_precedence < other._join_precedence:
  2143. how = {'right': 'left', 'left': 'right'}.get(how, how)
  2144. result = other.join(self, how=how, level=level,
  2145. return_indexers=return_indexers)
  2146. if return_indexers:
  2147. x, y, z = result
  2148. result = x, z, y
  2149. return result
  2150. if not is_dtype_equal(self.dtype, other.dtype):
  2151. this = self.astype('O')
  2152. other = other.astype('O')
  2153. return this.join(other, how=how, return_indexers=return_indexers)
  2154. _validate_join_method(how)
  2155. if not self.is_unique and not other.is_unique:
  2156. return self._join_non_unique(other, how=how,
  2157. return_indexers=return_indexers)
  2158. elif not self.is_unique or not other.is_unique:
  2159. if self.is_monotonic and other.is_monotonic:
  2160. return self._join_monotonic(other, how=how,
  2161. return_indexers=return_indexers)
  2162. else:
  2163. return self._join_non_unique(other, how=how,
  2164. return_indexers=return_indexers)
  2165. elif self.is_monotonic and other.is_monotonic:
  2166. try:
  2167. return self._join_monotonic(other, how=how,
  2168. return_indexers=return_indexers)
  2169. except TypeError:
  2170. pass
  2171. if how == 'left':
  2172. join_index = self
  2173. elif how == 'right':
  2174. join_index = other
  2175. elif how == 'inner':
  2176. join_index = self.intersection(other)
  2177. elif how == 'outer':
  2178. join_index = self.union(other)
  2179. if return_indexers:
  2180. if join_index is self:
  2181. lindexer = None
  2182. else:
  2183. lindexer = self.get_indexer(join_index)
  2184. if join_index is other:
  2185. rindexer = None
  2186. else:
  2187. rindexer = other.get_indexer(join_index)
  2188. return join_index, lindexer, rindexer
  2189. else:
  2190. return join_index
  2191. def _join_multi(self, other, how, return_indexers=True):
  2192. from .multi import MultiIndex
  2193. self_is_mi = isinstance(self, MultiIndex)
  2194. other_is_mi = isinstance(other, MultiIndex)
  2195. # figure out join names
  2196. self_names = [n for n in self.names if n is not None]
  2197. other_names = [n for n in other.names if n is not None]
  2198. overlap = list(set(self_names) & set(other_names))
  2199. # need at least 1 in common, but not more than 1
  2200. if not len(overlap):
  2201. raise ValueError("cannot join with no level specified and no "
  2202. "overlapping names")
  2203. if len(overlap) > 1:
  2204. raise NotImplementedError("merging with more than one level "
  2205. "overlap on a multi-index is not "
  2206. "implemented")
  2207. jl = overlap[0]
  2208. # make the indices into mi's that match
  2209. if not (self_is_mi and other_is_mi):
  2210. flip_order = False
  2211. if self_is_mi:
  2212. self, other = other, self
  2213. flip_order = True
  2214. # flip if join method is right or left
  2215. how = {'right': 'left', 'left': 'right'}.get(how, how)
  2216. level = other.names.index(jl)
  2217. result = self._join_level(other, level, how=how,
  2218. return_indexers=return_indexers)
  2219. if flip_order:
  2220. if isinstance(result, tuple):
  2221. return result[0], result[2], result[1]
  2222. return result
  2223. # 2 multi-indexes
  2224. raise NotImplementedError("merging with both multi-indexes is not "
  2225. "implemented")
  2226. def _join_non_unique(self, other, how='left', return_indexers=False):
  2227. from pandas.tools.merge import _get_join_indexers
  2228. left_idx, right_idx = _get_join_indexers([self.values],
  2229. [other._values], how=how,
  2230. sort=True)
  2231. left_idx = _ensure_platform_int(left_idx)
  2232. right_idx = _ensure_platform_int(right_idx)
  2233. join_index = self.values.take(left_idx)
  2234. mask = left_idx == -1
  2235. np.putmask(join_index, mask, other._values.take(right_idx))
  2236. join_index = self._wrap_joined_index(join_index, other)
  2237. if return_indexers:
  2238. return join_index, left_idx, right_idx
  2239. else:
  2240. return join_index
  2241. def _join_level(self, other, level, how='left', return_indexers=False,
  2242. keep_order=True):
  2243. """
  2244. The join method *only* affects the level of the resulting
  2245. MultiIndex. Otherwise it just exactly aligns the Index data to the
  2246. labels of the level in the MultiIndex. If `keep_order` == True, the
  2247. order of the data indexed by the MultiIndex will not be changed;
  2248. otherwise, it will tie out with `other`.
  2249. """
  2250. from pandas.algos import groupsort_indexer
  2251. from .multi import MultiIndex
  2252. def _get_leaf_sorter(labels):
  2253. """
  2254. returns sorter for the inner most level while preserving the
  2255. order of higher levels
  2256. """
  2257. if labels[0].size == 0:
  2258. return np.empty(0, dtype='int64')
  2259. if len(labels) == 1:
  2260. lab = _ensure_int64(labels[0])
  2261. sorter, _ = groupsort_indexer(lab, 1 + lab.max())
  2262. return sorter
  2263. # find indexers of begining of each set of
  2264. # same-key labels w.r.t all but last level
  2265. tic = labels[0][:-1] != labels[0][1:]
  2266. for lab in labels[1:-1]:
  2267. tic |= lab[:-1] != lab[1:]
  2268. starts = np.hstack(([True], tic, [True])).nonzero()[0]
  2269. lab = _ensure_int64(labels[-1])
  2270. return lib.get_level_sorter(lab, _ensure_int64(starts))
  2271. if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
  2272. raise TypeError('Join on level between two MultiIndex objects '
  2273. 'is ambiguous')
  2274. left, right = self, other
  2275. flip_order = not isinstance(self, MultiIndex)
  2276. if flip_order:
  2277. left, right = right, left
  2278. how = {'right': 'left', 'left': 'right'}.get(how, how)
  2279. level = left._get_level_number(level)
  2280. old_level = left.levels[level]
  2281. if not right.is_unique:
  2282. raise NotImplementedError('Index._join_level on non-unique index '
  2283. 'is not implemented')
  2284. new_level, left_lev_indexer, right_lev_indexer = \
  2285. old_level.join(right, how=how, return_indexers=True)
  2286. if left_lev_indexer is None:
  2287. if keep_order or len(left) == 0:
  2288. left_indexer = None
  2289. join_index = left
  2290. else: # sort the leaves
  2291. left_indexer = _get_leaf_sorter(left.labels[:level + 1])
  2292. join_index = left[left_indexer]
  2293. else:
  2294. left_lev_indexer = _ensure_int64(left_lev_indexer)
  2295. rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
  2296. len(old_level))
  2297. new_lev_labels = algos.take_nd(rev_indexer, left.labels[level],
  2298. allow_fill=False)
  2299. new_labels = list(left.labels)
  2300. new_labels[level] = new_lev_labels
  2301. new_levels = list(left.levels)
  2302. new_levels[level] = new_level
  2303. if keep_order: # just drop missing values. o.w. keep order
  2304. left_indexer = np.arange(len(left))
  2305. mask = new_lev_labels != -1
  2306. if not mask.all():
  2307. new_labels = [lab[mask] for lab in new_labels]
  2308. left_indexer = left_indexer[mask]
  2309. else: # tie out the order with other
  2310. if level == 0: # outer most level, take the fast route
  2311. ngroups = 1 + new_lev_labels.max()
  2312. left_indexer, counts = groupsort_indexer(new_lev_labels,
  2313. ngroups)
  2314. # missing values are placed first; drop them!
  2315. left_indexer = left_indexer[counts[0]:]
  2316. new_labels = [lab[left_indexer] for lab in new_labels]
  2317. else: # sort the leaves
  2318. mask = new_lev_labels != -1
  2319. mask_all = mask.all()
  2320. if not mask_all:
  2321. new_labels = [lab[mask] for lab in new_labels]
  2322. left_indexer = _get_leaf_sorter(new_labels[:level + 1])
  2323. new_labels = [lab[left_indexer] for lab in new_labels]
  2324. # left_indexers are w.r.t masked frame.
  2325. # reverse to original frame!
  2326. if not mask_all:
  2327. left_indexer = mask.nonzero()[0][left_indexer]
  2328. join_index = MultiIndex(levels=new_levels, labels=new_labels,
  2329. names=left.names, verify_integrity=False)
  2330. if right_lev_indexer is not None:
  2331. right_indexer = algos.take_nd(right_lev_indexer,
  2332. join_index.labels[level],
  2333. allow_fill=False)
  2334. else:
  2335. right_indexer = join_index.labels[level]
  2336. if flip_order:
  2337. left_indexer, right_indexer = right_indexer, left_indexer
  2338. if return_indexers:
  2339. return join_index, left_indexer, right_indexer
  2340. else:
  2341. return join_index
  2342. def _join_monotonic(self, other, how='left', return_indexers=False):
  2343. if self.equals(other):
  2344. ret_index = other if how == 'right' else self
  2345. if return_indexers:
  2346. return ret_index, None, None
  2347. else:
  2348. return ret_index
  2349. sv = self.values
  2350. ov = other._values
  2351. if self.is_unique and other.is_unique:
  2352. # We can perform much better than the general case
  2353. if how == 'left':
  2354. join_index = self
  2355. lidx = None
  2356. ridx = self._left_indexer_unique(sv, ov)
  2357. elif how == 'right':
  2358. join_index = other
  2359. lidx = self._left_indexer_unique(ov, sv)
  2360. ridx = None
  2361. elif how == 'inner':
  2362. join_index, lidx, ridx = self._inner_indexer(sv, ov)
  2363. join_index = self._wrap_joined_index(join_index, other)
  2364. elif how == 'outer':
  2365. join_index, lidx, ridx = self._outer_indexer(sv, ov)
  2366. join_index = self._wrap_joined_index(join_index, other)
  2367. else:
  2368. if how == 'left':
  2369. join_index, lidx, ridx = self._left_indexer(sv, ov)
  2370. elif how == 'right':
  2371. join_index, ridx, lidx = self._left_indexer(ov, sv)
  2372. elif how == 'inner':
  2373. join_index, lidx, ridx = self._inner_indexer(sv, ov)
  2374. elif how == 'outer':
  2375. join_index, lidx, ridx = self._outer_indexer(sv, ov)
  2376. join_index = self._wrap_joined_index(join_index, other)
  2377. if return_indexers:
  2378. return join_index, lidx, ridx
  2379. else:
  2380. return join_index
  2381. def _wrap_joined_index(self, joined, other):
  2382. name = self.name if self.name == other.name else None
  2383. return Index(joined, name=name)
  2384. def slice_indexer(self, start=None, end=None, step=None, kind=None):
  2385. """
  2386. For an ordered Index, compute the slice indexer for input labels and
  2387. step
  2388. Parameters
  2389. ----------
  2390. start : label, default None
  2391. If None, defaults to the beginning
  2392. end : label, default None
  2393. If None, defaults to the end
  2394. step : int, default None
  2395. kind : string, default None
  2396. Returns
  2397. -------
  2398. indexer : ndarray or slice
  2399. Notes
  2400. -----
  2401. This function assumes that the data is sorted, so use at your own peril
  2402. """
  2403. start_slice, end_slice = self.slice_locs(start, end, step=step,
  2404. kind=kind)
  2405. # return a slice
  2406. if not is_scalar(start_slice):
  2407. raise AssertionError("Start slice bound is non-scalar")
  2408. if not is_scalar(end_slice):
  2409. raise AssertionError("End slice bound is non-scalar")
  2410. return slice(start_slice, end_slice, step)
  2411. def _maybe_cast_indexer(self, key):
  2412. """
  2413. If we have a float key and are not a floating index
  2414. then try to cast to an int if equivalent
  2415. """
  2416. if is_float(key) and not self.is_floating():
  2417. try:
  2418. ckey = int(key)
  2419. if ckey == key:
  2420. key = ckey
  2421. except (ValueError, TypeError):
  2422. pass
  2423. return key
  2424. def _validate_indexer(self, form, key, kind):
  2425. """
  2426. if we are positional indexer
  2427. validate that we have appropriate typed bounds
  2428. must be an integer
  2429. """
  2430. assert kind in ['ix', 'loc', 'getitem', 'iloc']
  2431. if key is None:
  2432. pass
  2433. elif is_integer(key):
  2434. pass
  2435. elif kind in ['iloc', 'getitem']:
  2436. self._invalid_indexer(form, key)
  2437. return key
  2438. def _maybe_cast_slice_bound(self, label, side, kind):
  2439. """
  2440. This function should be overloaded in subclasses that allow non-trivial
  2441. casting on label-slice bounds, e.g. datetime-like indices allowing
  2442. strings containing formatted datetimes.
  2443. Parameters
  2444. ----------
  2445. label : object
  2446. side : {'left', 'right'}
  2447. kind : {'ix', 'loc', 'getitem'}
  2448. Returns
  2449. -------
  2450. label : object
  2451. Notes
  2452. -----
  2453. Value of `side` parameter should be validated in caller.
  2454. """
  2455. assert kind in ['ix', 'loc', 'getitem', None]
  2456. # We are a plain index here (sub-class override this method if they
  2457. # wish to have special treatment for floats/ints, e.g. Float64Index and
  2458. # datetimelike Indexes
  2459. # reject them
  2460. if is_float(label):
  2461. if not (kind in ['ix'] and (self.holds_integer() or
  2462. self.is_floating())):
  2463. self._invalid_indexer('slice', label)
  2464. # we are trying to find integer bounds on a non-integer based index
  2465. # this is rejected (generally .loc gets you here)
  2466. elif is_integer(label):
  2467. self._invalid_indexer('slice', label)
  2468. return label
  2469. def _searchsorted_monotonic(self, label, side='left'):
  2470. if self.is_monotonic_increasing:
  2471. return self.searchsorted(label, side=side)
  2472. elif self.is_monotonic_decreasing:
  2473. # np.searchsorted expects ascending sort order, have to reverse
  2474. # everything for it to work (element ordering, search side and
  2475. # resulting value).
  2476. pos = self[::-1].searchsorted(label, side='right' if side == 'left'
  2477. else 'right')
  2478. return len(self) - pos
  2479. raise ValueError('index must be monotonic increasing or decreasing')
  2480. def get_slice_bound(self, label, side, kind):
  2481. """
  2482. Calculate slice bound that corresponds to given label.
  2483. Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
  2484. of given label.
  2485. Parameters
  2486. ----------
  2487. label : object
  2488. side : {'left', 'right'}
  2489. kind : {'ix', 'loc', 'getitem'}
  2490. """
  2491. assert kind in ['ix', 'loc', 'getitem', None]
  2492. if side not in ('left', 'right'):
  2493. raise ValueError("Invalid value for side kwarg,"
  2494. " must be either 'left' or 'right': %s" %
  2495. (side, ))
  2496. original_label = label
  2497. # For datetime indices label may be a string that has to be converted
  2498. # to datetime boundary according to its resolution.
  2499. label = self._maybe_cast_slice_bound(label, side, kind)
  2500. # we need to look up the label
  2501. try:
  2502. slc = self.get_loc(label)
  2503. except KeyError as err:
  2504. try:
  2505. return self._searchsorted_monotonic(label, side)
  2506. except ValueError:
  2507. # raise the original KeyError
  2508. raise err
  2509. if isinstance(slc, np.ndarray):
  2510. # get_loc may return a boolean array or an array of indices, which
  2511. # is OK as long as they are representable by a slice.
  2512. if is_bool_dtype(slc):
  2513. slc = lib.maybe_booleans_to_slice(slc.view('u1'))
  2514. else:
  2515. slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self))
  2516. if isinstance(slc, np.ndarray):
  2517. raise KeyError("Cannot get %s slice bound for non-unique "
  2518. "label: %r" % (side, original_label))
  2519. if isinstance(slc, slice):
  2520. if side == 'left':
  2521. return slc.start
  2522. else:
  2523. return slc.stop
  2524. else:
  2525. if side == 'right':
  2526. return slc + 1
  2527. else:
  2528. return slc
  2529. def slice_locs(self, start=None, end=None, step=None, kind=None):
  2530. """
  2531. Compute slice locations for input labels.
  2532. Parameters
  2533. ----------
  2534. start : label, default None
  2535. If None, defaults to the beginning
  2536. end : label, default None
  2537. If None, defaults to the end
  2538. step : int, defaults None
  2539. If None, defaults to 1
  2540. kind : {'ix', 'loc', 'getitem'} or None
  2541. Returns
  2542. -------
  2543. start, end : int
  2544. """
  2545. inc = (step is None or step >= 0)
  2546. if not inc:
  2547. # If it's a reverse slice, temporarily swap bounds.
  2548. start, end = end, start
  2549. start_slice = None
  2550. if start is not None:
  2551. start_slice = self.get_slice_bound(start, 'left', kind)
  2552. if start_slice is None:
  2553. start_slice = 0
  2554. end_slice = None
  2555. if end is not None:
  2556. end_slice = self.get_slice_bound(end, 'right', kind)
  2557. if end_slice is None:
  2558. end_slice = len(self)
  2559. if not inc:
  2560. # Bounds at this moment are swapped, swap them back and shift by 1.
  2561. #
  2562. # slice_locs('B', 'A', step=-1): s='B', e='A'
  2563. #
  2564. # s='A' e='B'
  2565. # AFTER SWAP: | |
  2566. # v ------------------> V
  2567. # -----------------------------------
  2568. # | | |A|A|A|A| | | | | |B|B| | | | |
  2569. # -----------------------------------
  2570. # ^ <------------------ ^
  2571. # SHOULD BE: | |
  2572. # end=s-1 start=e-1
  2573. #
  2574. end_slice, start_slice = start_slice - 1, end_slice - 1
  2575. # i == -1 triggers ``len(self) + i`` selection that points to the
  2576. # last element, not before-the-first one, subtracting len(self)
  2577. # compensates that.
  2578. if end_slice == -1:
  2579. end_slice -= len(self)
  2580. if start_slice == -1:
  2581. start_slice -= len(self)
  2582. return start_slice, end_slice
  2583. def delete(self, loc):
  2584. """
  2585. Make new Index with passed location(-s) deleted
  2586. Returns
  2587. -------
  2588. new_index : Index
  2589. """
  2590. return self._shallow_copy(np.delete(self._data, loc))
  2591. def insert(self, loc, item):
  2592. """
  2593. Make new Index inserting new item at location. Follows
  2594. Python list.append semantics for negative values
  2595. Parameters
  2596. ----------
  2597. loc : int
  2598. item : object
  2599. Returns
  2600. -------
  2601. new_index : Index
  2602. """
  2603. _self = np.asarray(self)
  2604. item = self._coerce_scalar_to_index(item)._values
  2605. idx = np.concatenate((_self[:loc], item, _self[loc:]))
  2606. return self._shallow_copy_with_infer(idx)
  2607. def drop(self, labels, errors='raise'):
  2608. """
  2609. Make new Index with passed list of labels deleted
  2610. Parameters
  2611. ----------
  2612. labels : array-like
  2613. errors : {'ignore', 'raise'}, default 'raise'
  2614. If 'ignore', suppress error and existing labels are dropped.
  2615. Returns
  2616. -------
  2617. dropped : Index
  2618. """
  2619. labels = com._index_labels_to_array(labels)
  2620. indexer = self.get_indexer(labels)
  2621. mask = indexer == -1
  2622. if mask.any():
  2623. if errors != 'ignore':
  2624. raise ValueError('labels %s not contained in axis' %
  2625. labels[mask])
  2626. indexer = indexer[~mask]
  2627. return self.delete(indexer)
  2628. @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
  2629. False: 'first'})
  2630. @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs)
  2631. def drop_duplicates(self, keep='first'):
  2632. return super(Index, self).drop_duplicates(keep=keep)
  2633. @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
  2634. False: 'first'})
  2635. @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
  2636. def duplicated(self, keep='first'):
  2637. return super(Index, self).duplicated(keep=keep)
  2638. _index_shared_docs['fillna'] = """
  2639. Fill NA/NaN values with the specified value
  2640. Parameters
  2641. ----------
  2642. value : scalar
  2643. Scalar value to use to fill holes (e.g. 0).
  2644. This value cannot be a list-likes.
  2645. downcast : dict, default is None
  2646. a dict of item->dtype of what to downcast if possible,
  2647. or the string 'infer' which will try to downcast to an appropriate
  2648. equal type (e.g. float64 to int64 if possible)
  2649. Returns
  2650. -------
  2651. filled : %(klass)s
  2652. """
  2653. @Appender(_index_shared_docs['fillna'])
  2654. def fillna(self, value=None, downcast=None):
  2655. self._assert_can_do_op(value)
  2656. if self.hasnans:
  2657. result = self.putmask(self._isnan, value)
  2658. if downcast is None:
  2659. # no need to care metadata other than name
  2660. # because it can't have freq if
  2661. return Index(result, name=self.name)
  2662. return self._shallow_copy()
  2663. _index_shared_docs['dropna'] = """
  2664. Return Index without NA/NaN values
  2665. Parameters
  2666. ----------
  2667. how : {'any', 'all'}, default 'any'
  2668. If the Index is a MultiIndex, drop the value when any or all levels
  2669. are NaN.
  2670. Returns
  2671. -------
  2672. valid : Index
  2673. """
  2674. @Appender(_index_shared_docs['dropna'])
  2675. def dropna(self, how='any'):
  2676. if how not in ('any', 'all'):
  2677. raise ValueError("invalid how option: {0}".format(how))
  2678. if self.hasnans:
  2679. return self._shallow_copy(self.values[~self._isnan])
  2680. return self._shallow_copy()
  2681. def _evaluate_with_timedelta_like(self, other, op, opstr):
  2682. raise TypeError("can only perform ops with timedelta like values")
  2683. def _evaluate_with_datetime_like(self, other, op, opstr):
  2684. raise TypeError("can only perform ops with datetime like values")
  2685. def _evalute_compare(self, op):
  2686. raise base.AbstractMethodError(self)
  2687. @classmethod
  2688. def _add_comparison_methods(cls):
  2689. """ add in comparison methods """
  2690. def _make_compare(op):
  2691. def _evaluate_compare(self, other):
  2692. if isinstance(other, (np.ndarray, Index, ABCSeries)):
  2693. if other.ndim > 0 and len(self) != len(other):
  2694. raise ValueError('Lengths must match to compare')
  2695. # we may need to directly compare underlying
  2696. # representations
  2697. if needs_i8_conversion(self) and needs_i8_conversion(other):
  2698. return self._evaluate_compare(other, op)
  2699. if is_object_dtype(self) and self.nlevels == 1:
  2700. # don't pass MultiIndex
  2701. result = _comp_method_OBJECT_ARRAY(op, self.values, other)
  2702. else:
  2703. result = op(self.values, np.asarray(other))
  2704. # technically we could support bool dtyped Index
  2705. # for now just return the indexing array directly
  2706. if is_bool_dtype(result):
  2707. return result
  2708. try:
  2709. return Index(result)
  2710. except TypeError:
  2711. return result
  2712. return _evaluate_compare
  2713. cls.__eq__ = _make_compare(operator.eq)
  2714. cls.__ne__ = _make_compare(operator.ne)
  2715. cls.__lt__ = _make_compare(operator.lt)
  2716. cls.__gt__ = _make_compare(operator.gt)
  2717. cls.__le__ = _make_compare(operator.le)
  2718. cls.__ge__ = _make_compare(operator.ge)
  2719. @classmethod
  2720. def _add_numericlike_set_methods_disabled(cls):
  2721. """ add in the numeric set-like methods to disable """
  2722. def _make_invalid_op(name):
  2723. def invalid_op(self, other=None):
  2724. raise TypeError("cannot perform {name} with this index type: "
  2725. "{typ}".format(name=name, typ=type(self)))
  2726. invalid_op.__name__ = name
  2727. return invalid_op
  2728. cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa
  2729. cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa
  2730. @classmethod
  2731. def _add_numeric_methods_disabled(cls):
  2732. """ add in numeric methods to disable """
  2733. def _make_invalid_op(name):
  2734. def invalid_op(self, other=None):
  2735. raise TypeError("cannot perform {name} with this index type: "
  2736. "{typ}".format(name=name, typ=type(self)))
  2737. invalid_op.__name__ = name
  2738. return invalid_op
  2739. cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__')
  2740. cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__')
  2741. cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__')
  2742. cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__')
  2743. if not compat.PY3:
  2744. cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__')
  2745. cls.__neg__ = _make_invalid_op('__neg__')
  2746. cls.__pos__ = _make_invalid_op('__pos__')
  2747. cls.__abs__ = _make_invalid_op('__abs__')
  2748. cls.__inv__ = _make_invalid_op('__inv__')
  2749. def _maybe_update_attributes(self, attrs):
  2750. """ Update Index attributes (e.g. freq) depending on op """
  2751. return attrs
  2752. def _validate_for_numeric_unaryop(self, op, opstr):
  2753. """ validate if we can perform a numeric unary operation """
  2754. if not self._is_numeric_dtype:
  2755. raise TypeError("cannot evaluate a numeric op "
  2756. "{opstr} for type: {typ}".format(
  2757. opstr=opstr,
  2758. typ=type(self))
  2759. )
  2760. def _validate_for_numeric_binop(self, other, op, opstr):
  2761. """
  2762. return valid other, evaluate or raise TypeError
  2763. if we are not of the appropriate type
  2764. internal method called by ops
  2765. """
  2766. from pandas.tseries.offsets import DateOffset
  2767. # if we are an inheritor of numeric,
  2768. # but not actually numeric (e.g. DatetimeIndex/PeriodInde)
  2769. if not self._is_numeric_dtype:
  2770. raise TypeError("cannot evaluate a numeric op {opstr} "
  2771. "for type: {typ}".format(
  2772. opstr=opstr,
  2773. typ=type(self))
  2774. )
  2775. if isinstance(other, Index):
  2776. if not other._is_numeric_dtype:
  2777. raise TypeError("cannot evaluate a numeric op "
  2778. "{opstr} with type: {typ}".format(
  2779. opstr=type(self),
  2780. typ=type(other))
  2781. )
  2782. elif isinstance(other, np.ndarray) and not other.ndim:
  2783. other = other.item()
  2784. if isinstance(other, (Index, ABCSeries, np.ndarray)):
  2785. if len(self) != len(other):
  2786. raise ValueError("cannot evaluate a numeric op with "
  2787. "unequal lengths")
  2788. other = _values_from_object(other)
  2789. if other.dtype.kind not in ['f', 'i']:
  2790. raise TypeError("cannot evaluate a numeric op "
  2791. "with a non-numeric dtype")
  2792. elif isinstance(other, (DateOffset, np.timedelta64,
  2793. Timedelta, datetime.timedelta)):
  2794. # higher up to handle
  2795. pass
  2796. elif isinstance(other, (Timestamp, np.datetime64)):
  2797. # higher up to handle
  2798. pass
  2799. else:
  2800. if not (is_float(other) or is_integer(other)):
  2801. raise TypeError("can only perform ops with scalar values")
  2802. return other
  2803. @classmethod
  2804. def _add_numeric_methods_binary(cls):
  2805. """ add in numeric methods """
  2806. def _make_evaluate_binop(op, opstr, reversed=False):
  2807. def _evaluate_numeric_binop(self, other):
  2808. from pandas.tseries.offsets import DateOffset
  2809. other = self._validate_for_numeric_binop(other, op, opstr)
  2810. # handle time-based others
  2811. if isinstance(other, (DateOffset, np.timedelta64,
  2812. Timedelta, datetime.timedelta)):
  2813. return self._evaluate_with_timedelta_like(other, op, opstr)
  2814. elif isinstance(other, (Timestamp, np.datetime64)):
  2815. return self._evaluate_with_datetime_like(other, op, opstr)
  2816. # if we are a reversed non-communative op
  2817. values = self.values
  2818. if reversed:
  2819. values, other = other, values
  2820. attrs = self._get_attributes_dict()
  2821. attrs = self._maybe_update_attributes(attrs)
  2822. return Index(op(values, other), **attrs)
  2823. return _evaluate_numeric_binop
  2824. cls.__add__ = cls.__radd__ = _make_evaluate_binop(
  2825. operator.add, '__add__')
  2826. cls.__sub__ = _make_evaluate_binop(
  2827. operator.sub, '__sub__')
  2828. cls.__rsub__ = _make_evaluate_binop(
  2829. operator.sub, '__sub__', reversed=True)
  2830. cls.__mul__ = cls.__rmul__ = _make_evaluate_binop(
  2831. operator.mul, '__mul__')
  2832. cls.__pow__ = cls.__rpow__ = _make_evaluate_binop(
  2833. operator.pow, '__pow__')
  2834. cls.__mod__ = _make_evaluate_binop(
  2835. operator.mod, '__mod__')
  2836. cls.__floordiv__ = _make_evaluate_binop(
  2837. operator.floordiv, '__floordiv__')
  2838. cls.__rfloordiv__ = _make_evaluate_binop(
  2839. operator.floordiv, '__floordiv__', reversed=True)
  2840. cls.__truediv__ = _make_evaluate_binop(
  2841. operator.truediv, '__truediv__')
  2842. cls.__rtruediv__ = _make_evaluate_binop(
  2843. operator.truediv, '__truediv__', reversed=True)
  2844. if not compat.PY3:
  2845. cls.__div__ = _make_evaluate_binop(
  2846. operator.div, '__div__')
  2847. cls.__rdiv__ = _make_evaluate_binop(
  2848. operator.div, '__div__', reversed=True)
  2849. @classmethod
  2850. def _add_numeric_methods_unary(cls):
  2851. """ add in numeric unary methods """
  2852. def _make_evaluate_unary(op, opstr):
  2853. def _evaluate_numeric_unary(self):
  2854. self._validate_for_numeric_unaryop(op, opstr)
  2855. attrs = self._get_attributes_dict()
  2856. attrs = self._maybe_update_attributes(attrs)
  2857. return Index(op(self.values), **attrs)
  2858. return _evaluate_numeric_unary
  2859. cls.__neg__ = _make_evaluate_unary(lambda x: -x, '__neg__')
  2860. cls.__pos__ = _make_evaluate_unary(lambda x: x, '__pos__')
  2861. cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__')
  2862. cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__')
  2863. @classmethod
  2864. def _add_numeric_methods(cls):
  2865. cls._add_numeric_methods_unary()
  2866. cls._add_numeric_methods_binary()
  2867. @classmethod
  2868. def _add_logical_methods(cls):
  2869. """ add in logical methods """
  2870. _doc = """
  2871. %(desc)s
  2872. Parameters
  2873. ----------
  2874. All arguments to numpy.%(outname)s are accepted.
  2875. Returns
  2876. -------
  2877. %(outname)s : bool or array_like (if axis is specified)
  2878. A single element array_like may be converted to bool."""
  2879. def _make_logical_function(name, desc, f):
  2880. @Substitution(outname=name, desc=desc)
  2881. @Appender(_doc)
  2882. def logical_func(self, *args, **kwargs):
  2883. result = f(self.values)
  2884. if (isinstance(result, (np.ndarray, ABCSeries, Index)) and
  2885. result.ndim == 0):
  2886. # return NumPy type
  2887. return result.dtype.type(result.item())
  2888. else: # pragma: no cover
  2889. return result
  2890. logical_func.__name__ = name
  2891. return logical_func
  2892. cls.all = _make_logical_function('all', 'Return whether all elements '
  2893. 'are True',
  2894. np.all)
  2895. cls.any = _make_logical_function('any',
  2896. 'Return whether any element is True',
  2897. np.any)
  2898. @classmethod
  2899. def _add_logical_methods_disabled(cls):
  2900. """ add in logical methods to disable """
  2901. def _make_invalid_op(name):
  2902. def invalid_op(self, other=None):
  2903. raise TypeError("cannot perform {name} with this index type: "
  2904. "{typ}".format(name=name, typ=type(self)))
  2905. invalid_op.__name__ = name
  2906. return invalid_op
  2907. cls.all = _make_invalid_op('all')
  2908. cls.any = _make_invalid_op('any')
  2909. Index._add_numeric_methods_disabled()
  2910. Index._add_logical_methods()
  2911. Index._add_comparison_methods()
  2912. def _ensure_index(index_like, copy=False):
  2913. if isinstance(index_like, Index):
  2914. if copy:
  2915. index_like = index_like.copy()
  2916. return index_like
  2917. if hasattr(index_like, 'name'):
  2918. return Index(index_like, name=index_like.name, copy=copy)
  2919. # must check for exactly list here because of strict type
  2920. # check in clean_index_list
  2921. if isinstance(index_like, list):
  2922. if type(index_like) != list:
  2923. index_like = list(index_like)
  2924. # 2200 ?
  2925. converted, all_arrays = lib.clean_index_list(index_like)
  2926. if len(converted) > 0 and all_arrays:
  2927. from .multi import MultiIndex
  2928. return MultiIndex.from_arrays(converted)
  2929. else:
  2930. index_like = converted
  2931. else:
  2932. # clean_index_list does the equivalent of copying
  2933. # so only need to do this if not list instance
  2934. if copy:
  2935. from copy import copy
  2936. index_like = copy(index_like)
  2937. return Index(index_like)
  2938. def _get_na_value(dtype):
  2939. return {np.datetime64: tslib.NaT,
  2940. np.timedelta64: tslib.NaT}.get(dtype, np.nan)
  2941. def _ensure_frozen(array_like, categories, copy=False):
  2942. array_like = _coerce_indexer_dtype(array_like, categories)
  2943. array_like = array_like.view(FrozenNDArray)
  2944. if copy:
  2945. array_like = array_like.copy()
  2946. return array_like
  2947. def _ensure_has_len(seq):
  2948. """If seq is an iterator, put its values into a list."""
  2949. try:
  2950. len(seq)
  2951. except TypeError:
  2952. return list(seq)
  2953. else:
  2954. return seq
  2955. def _maybe_box(idx):
  2956. from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex
  2957. klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex
  2958. if isinstance(idx, klasses):
  2959. return idx.asobject
  2960. return idx
  2961. def _trim_front(strings):
  2962. """
  2963. Trims zeros and decimal points
  2964. """
  2965. trimmed = strings
  2966. while len(strings) > 0 and all([x[0] == ' ' for x in trimmed]):
  2967. trimmed = [x[1:] for x in trimmed]
  2968. return trimmed
  2969. def _validate_join_method(method):
  2970. if method not in ['left', 'right', 'inner', 'outer']:
  2971. raise ValueError('do not recognize join method %s' % method)