/pandas/indexes/base.py
Python | 3638 lines | 3257 code | 106 blank | 275 comment | 156 complexity | d7b2bb923b284eccd79964d1a0e22d83 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- import datetime
- import warnings
- import operator
- import numpy as np
- import pandas.tslib as tslib
- import pandas.lib as lib
- import pandas._join as _join
- import pandas.algos as _algos
- import pandas.index as _index
- from pandas.lib import Timestamp, Timedelta, is_datetime_array
- from pandas.compat import range, u
- from pandas.compat.numpy import function as nv
- from pandas import compat
- from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex
- from pandas.types.missing import isnull, array_equivalent
- from pandas.types.common import (_ensure_int64, _ensure_object,
- _ensure_platform_int,
- is_integer,
- is_float,
- is_dtype_equal,
- is_object_dtype,
- is_categorical_dtype,
- is_bool_dtype,
- is_integer_dtype, is_float_dtype,
- is_datetime64_any_dtype,
- is_timedelta64_dtype,
- needs_i8_conversion,
- is_iterator, is_list_like,
- is_scalar)
- from pandas.types.cast import _coerce_indexer_dtype
- from pandas.core.common import (is_bool_indexer,
- _values_from_object,
- _asarray_tuplesafe)
- from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray,
- IndexOpsMixin)
- import pandas.core.base as base
- from pandas.util.decorators import (Appender, Substitution, cache_readonly,
- deprecate, deprecate_kwarg)
- import pandas.core.common as com
- import pandas.types.concat as _concat
- import pandas.core.missing as missing
- import pandas.core.algorithms as algos
- from pandas.formats.printing import pprint_thing
- from pandas.core.ops import _comp_method_OBJECT_ARRAY
- from pandas.core.strings import StringAccessorMixin
- from pandas.core.config import get_option
- # simplify
- default_pprint = lambda x, max_seq_items=None: \
- pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
- max_seq_items=max_seq_items)
- __all__ = ['Index']
- _unsortable_types = frozenset(('mixed', 'mixed-integer'))
- _index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array')
- _index_shared_docs = dict()
- def _try_get_item(x):
- try:
- return x.item()
- except AttributeError:
- return x
- class InvalidIndexError(Exception):
- pass
- _o_dtype = np.dtype(object)
- _Identity = object
- def _new_Index(cls, d):
- """ This is called upon unpickling, rather than the default which doesn't
- have arguments and breaks __new__
- """
- return cls.__new__(cls, **d)
- class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
- """
- Immutable ndarray implementing an ordered, sliceable set. The basic object
- storing axis labels for all pandas objects
- Parameters
- ----------
- data : array-like (1-dimensional)
- dtype : NumPy dtype (default: object)
- copy : bool
- Make a copy of input ndarray
- name : object
- Name to be stored in the index
- tupleize_cols : bool (default: True)
- When True, attempt to create a MultiIndex if possible
- Notes
- -----
- An Index instance can **only** contain hashable objects
- """
- # To hand over control to subclasses
- _join_precedence = 1
- # Cython methods
- _groupby = _algos.groupby_object
- _arrmap = _algos.arrmap_object
- _left_indexer_unique = _join.left_join_indexer_unique_object
- _left_indexer = _join.left_join_indexer_object
- _inner_indexer = _join.inner_join_indexer_object
- _outer_indexer = _join.outer_join_indexer_object
- _box_scalars = False
- _typ = 'index'
- _data = None
- _id = None
- name = None
- asi8 = None
- _comparables = ['name']
- _attributes = ['name']
- _allow_index_ops = True
- _allow_datetime_index_ops = False
- _allow_period_index_ops = False
- _is_numeric_dtype = False
- _can_hold_na = True
- # prioritize current class for _shallow_copy_with_infer,
- # used to infer integers as datetime-likes
- _infer_as_myclass = False
- _engine_type = _index.ObjectEngine
- def __new__(cls, data=None, dtype=None, copy=False, name=None,
- fastpath=False, tupleize_cols=True, **kwargs):
- if name is None and hasattr(data, 'name'):
- name = data.name
- if fastpath:
- return cls._simple_new(data, name)
- from .range import RangeIndex
- # range
- if isinstance(data, RangeIndex):
- return RangeIndex(start=data, copy=copy, dtype=dtype, name=name)
- elif isinstance(data, range):
- return RangeIndex.from_range(data, copy=copy, dtype=dtype,
- name=name)
- # categorical
- if is_categorical_dtype(data) or is_categorical_dtype(dtype):
- from .category import CategoricalIndex
- return CategoricalIndex(data, copy=copy, name=name, **kwargs)
- # index-like
- elif isinstance(data, (np.ndarray, Index, ABCSeries)):
- if (is_datetime64_any_dtype(data) or
- (dtype is not None and is_datetime64_any_dtype(dtype)) or
- 'tz' in kwargs):
- from pandas.tseries.index import DatetimeIndex
- result = DatetimeIndex(data, copy=copy, name=name,
- dtype=dtype, **kwargs)
- if dtype is not None and is_dtype_equal(_o_dtype, dtype):
- return Index(result.to_pydatetime(), dtype=_o_dtype)
- else:
- return result
- elif (is_timedelta64_dtype(data) or
- (dtype is not None and is_timedelta64_dtype(dtype))):
- from pandas.tseries.tdi import TimedeltaIndex
- result = TimedeltaIndex(data, copy=copy, name=name, **kwargs)
- if dtype is not None and _o_dtype == dtype:
- return Index(result.to_pytimedelta(), dtype=_o_dtype)
- else:
- return result
- if dtype is not None:
- try:
- # we need to avoid having numpy coerce
- # things that look like ints/floats to ints unless
- # they are actually ints, e.g. '0' and 0.0
- # should not be coerced
- # GH 11836
- if is_integer_dtype(dtype):
- inferred = lib.infer_dtype(data)
- if inferred == 'integer':
- data = np.array(data, copy=copy, dtype=dtype)
- elif inferred in ['floating', 'mixed-integer-float']:
- # if we are actually all equal to integers
- # then coerce to integer
- from .numeric import Int64Index, Float64Index
- try:
- res = data.astype('i8')
- if (res == data).all():
- return Int64Index(res, copy=copy,
- name=name)
- except (TypeError, ValueError):
- pass
- # return an actual float index
- return Float64Index(data, copy=copy, dtype=dtype,
- name=name)
- elif inferred == 'string':
- pass
- else:
- data = data.astype(dtype)
- elif is_float_dtype(dtype):
- inferred = lib.infer_dtype(data)
- if inferred == 'string':
- pass
- else:
- data = data.astype(dtype)
- else:
- data = np.array(data, dtype=dtype, copy=copy)
- except (TypeError, ValueError):
- pass
- # maybe coerce to a sub-class
- from pandas.tseries.period import (PeriodIndex,
- IncompatibleFrequency)
- if isinstance(data, PeriodIndex):
- return PeriodIndex(data, copy=copy, name=name, **kwargs)
- if issubclass(data.dtype.type, np.integer):
- from .numeric import Int64Index
- return Int64Index(data, copy=copy, dtype=dtype, name=name)
- elif issubclass(data.dtype.type, np.floating):
- from .numeric import Float64Index
- return Float64Index(data, copy=copy, dtype=dtype, name=name)
- elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
- subarr = data.astype('object')
- else:
- subarr = _asarray_tuplesafe(data, dtype=object)
- # _asarray_tuplesafe does not always copy underlying data,
- # so need to make sure that this happens
- if copy:
- subarr = subarr.copy()
- if dtype is None:
- inferred = lib.infer_dtype(subarr)
- if inferred == 'integer':
- from .numeric import Int64Index
- return Int64Index(subarr.astype('i8'), copy=copy,
- name=name)
- elif inferred in ['floating', 'mixed-integer-float']:
- from .numeric import Float64Index
- return Float64Index(subarr, copy=copy, name=name)
- elif inferred == 'boolean':
- # don't support boolean explicity ATM
- pass
- elif inferred != 'string':
- if inferred.startswith('datetime'):
- if (lib.is_datetime_with_singletz_array(subarr) or
- 'tz' in kwargs):
- # only when subarr has the same tz
- from pandas.tseries.index import DatetimeIndex
- try:
- return DatetimeIndex(subarr, copy=copy,
- name=name, **kwargs)
- except tslib.OutOfBoundsDatetime:
- pass
- elif inferred.startswith('timedelta'):
- from pandas.tseries.tdi import TimedeltaIndex
- return TimedeltaIndex(subarr, copy=copy, name=name,
- **kwargs)
- elif inferred == 'period':
- try:
- return PeriodIndex(subarr, name=name, **kwargs)
- except IncompatibleFrequency:
- pass
- return cls._simple_new(subarr, name)
- elif hasattr(data, '__array__'):
- return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
- **kwargs)
- elif data is None or is_scalar(data):
- cls._scalar_data_error(data)
- else:
- if (tupleize_cols and isinstance(data, list) and data and
- isinstance(data[0], tuple)):
- # we must be all tuples, otherwise don't construct
- # 10697
- if all(isinstance(e, tuple) for e in data):
- try:
- # must be orderable in py3
- if compat.PY3:
- sorted(data)
- from .multi import MultiIndex
- return MultiIndex.from_tuples(
- data, names=name or kwargs.get('names'))
- except (TypeError, KeyError):
- # python2 - MultiIndex fails on mixed types
- pass
- # other iterable of some kind
- subarr = _asarray_tuplesafe(data, dtype=object)
- return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
- """
- NOTE for new Index creation:
- - _simple_new: It returns new Index with the same type as the caller.
- All metadata (such as name) must be provided by caller's responsibility.
- Using _shallow_copy is recommended because it fills these metadata
- otherwise specified.
- - _shallow_copy: It returns new Index with the same type (using
- _simple_new), but fills caller's metadata otherwise specified. Passed
- kwargs will overwrite corresponding metadata.
- - _shallow_copy_with_infer: It returns new Index inferring its type
- from passed values. It fills caller's metadata otherwise specified as the
- same as _shallow_copy.
- See each method's docstring.
- """
- @classmethod
- def _simple_new(cls, values, name=None, dtype=None, **kwargs):
- """
- we require the we have a dtype compat for the values
- if we are passed a non-dtype compat, then coerce using the constructor
- Must be careful not to recurse.
- """
- if not hasattr(values, 'dtype'):
- if values is None and dtype is not None:
- values = np.empty(0, dtype=dtype)
- else:
- values = np.array(values, copy=False)
- if is_object_dtype(values):
- values = cls(values, name=name, dtype=dtype,
- **kwargs)._values
- result = object.__new__(cls)
- result._data = values
- result.name = name
- for k, v in compat.iteritems(kwargs):
- setattr(result, k, v)
- return result._reset_identity()
- _index_shared_docs['_shallow_copy'] = """
- create a new Index with the same class as the caller, don't copy the
- data, use the same object attributes with passed in attributes taking
- precedence
- *this is an internal non-public method*
- Parameters
- ----------
- values : the values to create the new Index, optional
- kwargs : updates the default attributes for this Index
- """
- @Appender(_index_shared_docs['_shallow_copy'])
- def _shallow_copy(self, values=None, **kwargs):
- if values is None:
- values = self.values
- attributes = self._get_attributes_dict()
- attributes.update(kwargs)
- return self._simple_new(values, **attributes)
- def _shallow_copy_with_infer(self, values=None, **kwargs):
- """
- create a new Index inferring the class with passed value, don't copy
- the data, use the same object attributes with passed in attributes
- taking precedence
- *this is an internal non-public method*
- Parameters
- ----------
- values : the values to create the new Index, optional
- kwargs : updates the default attributes for this Index
- """
- if values is None:
- values = self.values
- attributes = self._get_attributes_dict()
- attributes.update(kwargs)
- attributes['copy'] = False
- if self._infer_as_myclass:
- try:
- return self._constructor(values, **attributes)
- except (TypeError, ValueError):
- pass
- return Index(values, **attributes)
- def _deepcopy_if_needed(self, orig, copy=False):
- """
- .. versionadded:: 0.19.0
- Make a copy of self if data coincides (in memory) with orig.
- Subclasses should override this if self._base is not an ndarray.
- Parameters
- ----------
- orig : ndarray
- other ndarray to compare self._data against
- copy : boolean, default False
- when False, do not run any check, just return self
- Returns
- -------
- A copy of self if needed, otherwise self : Index
- """
- if copy:
- # Retrieve the "base objects", i.e. the original memory allocations
- orig = orig if orig.base is None else orig.base
- new = self._data if self._data.base is None else self._data.base
- if orig is new:
- return self.copy(deep=True)
- return self
- def _update_inplace(self, result, **kwargs):
- # guard when called from IndexOpsMixin
- raise TypeError("Index can't be updated inplace")
- def is_(self, other):
- """
- More flexible, faster check like ``is`` but that works through views
- Note: this is *not* the same as ``Index.identical()``, which checks
- that metadata is also the same.
- Parameters
- ----------
- other : object
- other object to compare against.
- Returns
- -------
- True if both have same underlying data, False otherwise : bool
- """
- # use something other than None to be clearer
- return self._id is getattr(
- other, '_id', Ellipsis) and self._id is not None
- def _reset_identity(self):
- """Initializes or resets ``_id`` attribute with new object"""
- self._id = _Identity()
- return self
- # ndarray compat
- def __len__(self):
- """
- return the length of the Index
- """
- return len(self._data)
- def __array__(self, dtype=None):
- """ the array interface, return my values """
- return self._data.view(np.ndarray)
- def __array_wrap__(self, result, context=None):
- """
- Gets called after a ufunc
- """
- if is_bool_dtype(result):
- return result
- attrs = self._get_attributes_dict()
- attrs = self._maybe_update_attributes(attrs)
- return Index(result, **attrs)
- @cache_readonly
- def dtype(self):
- """ return the dtype object of the underlying data """
- return self._data.dtype
- @cache_readonly
- def dtype_str(self):
- """ return the dtype str of the underlying data """
- return str(self.dtype)
- @property
- def values(self):
- """ return the underlying data as an ndarray """
- return self._data.view(np.ndarray)
- def get_values(self):
- """ return the underlying data as an ndarray """
- return self.values
- # ops compat
- def tolist(self):
- """
- return a list of the Index values
- """
- return list(self.values)
- def repeat(self, n, *args, **kwargs):
- """
- Repeat elements of an Index. Refer to `numpy.ndarray.repeat`
- for more information about the `n` argument.
- See also
- --------
- numpy.ndarray.repeat
- """
- nv.validate_repeat(args, kwargs)
- return self._shallow_copy(self._values.repeat(n))
- def where(self, cond, other=None):
- """
- .. versionadded:: 0.19.0
- Return an Index of same shape as self and whose corresponding
- entries are from self where cond is True and otherwise are from
- other.
- Parameters
- ----------
- cond : boolean same length as self
- other : scalar, or array-like
- """
- if other is None:
- other = self._na_value
- values = np.where(cond, self.values, other)
- return self._shallow_copy_with_infer(values, dtype=self.dtype)
- def ravel(self, order='C'):
- """
- return an ndarray of the flattened values of the underlying data
- See also
- --------
- numpy.ndarray.ravel
- """
- return self._values.ravel(order=order)
- # construction helpers
- @classmethod
- def _scalar_data_error(cls, data):
- raise TypeError('{0}(...) must be called with a collection of some '
- 'kind, {1} was passed'.format(cls.__name__,
- repr(data)))
- @classmethod
- def _string_data_error(cls, data):
- raise TypeError('String dtype not supported, you may need '
- 'to explicitly cast to a numeric type')
- @classmethod
- def _coerce_to_ndarray(cls, data):
- """coerces data to ndarray, raises on scalar data. Converts other
- iterables to list first and then to array. Does not touch ndarrays.
- """
- if not isinstance(data, (np.ndarray, Index)):
- if data is None or is_scalar(data):
- cls._scalar_data_error(data)
- # other iterable of some kind
- if not isinstance(data, (ABCSeries, list, tuple)):
- data = list(data)
- data = np.asarray(data)
- return data
- def _get_attributes_dict(self):
- """ return an attributes dict for my class """
- return dict([(k, getattr(self, k, None)) for k in self._attributes])
- def view(self, cls=None):
- # we need to see if we are subclassing an
- # index type here
- if cls is not None and not hasattr(cls, '_typ'):
- result = self._data.view(cls)
- else:
- result = self._shallow_copy()
- if isinstance(result, Index):
- result._id = self._id
- return result
- def _coerce_scalar_to_index(self, item):
- """
- we need to coerce a scalar to a compat for our index type
- Parameters
- ----------
- item : scalar item to coerce
- """
- return Index([item], dtype=self.dtype, **self._get_attributes_dict())
- _index_shared_docs['copy'] = """
- Make a copy of this object. Name and dtype sets those attributes on
- the new object.
- Parameters
- ----------
- name : string, optional
- deep : boolean, default False
- dtype : numpy dtype or pandas type
- Returns
- -------
- copy : Index
- Notes
- -----
- In most cases, there should be no functional difference from using
- ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
- """
- @Appender(_index_shared_docs['copy'])
- def copy(self, name=None, deep=False, dtype=None, **kwargs):
- names = kwargs.get('names')
- if names is not None and name is not None:
- raise TypeError("Can only provide one of `names` and `name`")
- if deep:
- from copy import deepcopy
- new_index = self._shallow_copy(self._data.copy())
- name = name or deepcopy(self.name)
- else:
- new_index = self._shallow_copy()
- name = self.name
- if name is not None:
- names = [name]
- if names:
- new_index = new_index.set_names(names)
- if dtype:
- new_index = new_index.astype(dtype)
- return new_index
- __copy__ = copy
- def __unicode__(self):
- """
- Return a string representation for this object.
- Invoked by unicode(df) in py2 only. Yields a Unicode String in both
- py2/py3.
- """
- klass = self.__class__.__name__
- data = self._format_data()
- attrs = self._format_attrs()
- space = self._format_space()
- prepr = (u(",%s") %
- space).join([u("%s=%s") % (k, v) for k, v in attrs])
- # no data provided, just attributes
- if data is None:
- data = ''
- res = u("%s(%s%s)") % (klass, data, prepr)
- return res
- def _format_space(self):
- # using space here controls if the attributes
- # are line separated or not (the default)
- # max_seq_items = get_option('display.max_seq_items')
- # if len(self) > max_seq_items:
- # space = "\n%s" % (' ' * (len(klass) + 1))
- return " "
- @property
- def _formatter_func(self):
- """
- Return the formatted data as a unicode string
- """
- return default_pprint
- def _format_data(self):
- """
- Return the formatted data as a unicode string
- """
- from pandas.formats.format import get_console_size, _get_adjustment
- display_width, _ = get_console_size()
- if display_width is None:
- display_width = get_option('display.width') or 80
- space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
- space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
- n = len(self)
- sep = ','
- max_seq_items = get_option('display.max_seq_items') or n
- formatter = self._formatter_func
- # do we want to justify (only do so for non-objects)
- is_justify = not (self.inferred_type in ('string', 'unicode') or
- (self.inferred_type == 'categorical' and
- is_object_dtype(self.categories)))
- # are we a truncated display
- is_truncated = n > max_seq_items
- # adj can optionaly handle unicode eastern asian width
- adj = _get_adjustment()
- def _extend_line(s, line, value, display_width, next_line_prefix):
- if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
- display_width):
- s += line.rstrip()
- line = next_line_prefix
- line += value
- return s, line
- def best_len(values):
- if values:
- return max([adj.len(x) for x in values])
- else:
- return 0
- if n == 0:
- summary = '[], '
- elif n == 1:
- first = formatter(self[0])
- summary = '[%s], ' % first
- elif n == 2:
- first = formatter(self[0])
- last = formatter(self[-1])
- summary = '[%s, %s], ' % (first, last)
- else:
- if n > max_seq_items:
- n = min(max_seq_items // 2, 10)
- head = [formatter(x) for x in self[:n]]
- tail = [formatter(x) for x in self[-n:]]
- else:
- head = []
- tail = [formatter(x) for x in self]
- # adjust all values to max length if needed
- if is_justify:
- # however, if we are not truncated and we are only a single
- # line, then don't justify
- if (is_truncated or
- not (len(', '.join(head)) < display_width and
- len(', '.join(tail)) < display_width)):
- max_len = max(best_len(head), best_len(tail))
- head = [x.rjust(max_len) for x in head]
- tail = [x.rjust(max_len) for x in tail]
- summary = ""
- line = space2
- for i in range(len(head)):
- word = head[i] + sep + ' '
- summary, line = _extend_line(summary, line, word,
- display_width, space2)
- if is_truncated:
- # remove trailing space of last line
- summary += line.rstrip() + space2 + '...'
- line = space2
- for i in range(len(tail) - 1):
- word = tail[i] + sep + ' '
- summary, line = _extend_line(summary, line, word,
- display_width, space2)
- # last value: no sep added + 1 space of width used for trailing ','
- summary, line = _extend_line(summary, line, tail[-1],
- display_width - 2, space2)
- summary += line
- summary += '],'
- if len(summary) > (display_width):
- summary += space1
- else: # one row
- summary += ' '
- # remove initial space
- summary = '[' + summary[len(space2):]
- return summary
- def _format_attrs(self):
- """
- Return a list of tuples of the (attr,formatted_value)
- """
- attrs = []
- attrs.append(('dtype', "'%s'" % self.dtype))
- if self.name is not None:
- attrs.append(('name', default_pprint(self.name)))
- max_seq_items = get_option('display.max_seq_items') or len(self)
- if len(self) > max_seq_items:
- attrs.append(('length', len(self)))
- return attrs
- def to_series(self, **kwargs):
- """
- Create a Series with both index and values equal to the index keys
- useful with map for returning an indexer based on an index
- Returns
- -------
- Series : dtype will be based on the type of the Index values.
- """
- from pandas import Series
- return Series(self._to_embed(), index=self, name=self.name)
- def _to_embed(self, keep_tz=False):
- """
- *this is an internal non-public method*
- return an array repr of this object, potentially casting to object
- """
- return self.values.copy()
- _index_shared_docs['astype'] = """
- Create an Index with values cast to dtypes. The class of a new Index
- is determined by dtype. When conversion is impossible, a ValueError
- exception is raised.
- Parameters
- ----------
- dtype : numpy dtype or pandas type
- copy : bool, default True
- By default, astype always returns a newly allocated object.
- If copy is set to False and internal requirements on dtype are
- satisfied, the original data is used to create a new Index
- or the original Index is returned.
- .. versionadded:: 0.19.0
- """
- @Appender(_index_shared_docs['astype'])
- def astype(self, dtype, copy=True):
- return Index(self.values.astype(dtype, copy=copy), name=self.name,
- dtype=dtype)
- def _to_safe_for_reshape(self):
- """ convert to object if we are a categorical """
- return self
- def to_datetime(self, dayfirst=False):
- """
- For an Index containing strings or datetime.datetime objects, attempt
- conversion to DatetimeIndex
- """
- from pandas.tseries.index import DatetimeIndex
- if self.inferred_type == 'string':
- from dateutil.parser import parse
- parser = lambda x: parse(x, dayfirst=dayfirst)
- parsed = lib.try_parse_dates(self.values, parser=parser)
- return DatetimeIndex(parsed)
- else:
- return DatetimeIndex(self.values)
- def _assert_can_do_setop(self, other):
- if not is_list_like(other):
- raise TypeError('Input must be Index or array-like')
- return True
- def _convert_can_do_setop(self, other):
- if not isinstance(other, Index):
- other = Index(other, name=self.name)
- result_name = self.name
- else:
- result_name = self.name if self.name == other.name else None
- return other, result_name
- def _convert_for_op(self, value):
- """ Convert value to be insertable to ndarray """
- return value
- def _assert_can_do_op(self, value):
- """ Check value is valid for scalar op """
- if not lib.isscalar(value):
- msg = "'value' must be a scalar, passed: {0}"
- raise TypeError(msg.format(type(value).__name__))
- @property
- def nlevels(self):
- return 1
- def _get_names(self):
- return FrozenList((self.name, ))
- def _set_names(self, values, level=None):
- if len(values) != 1:
- raise ValueError('Length of new names must be 1, got %d' %
- len(values))
- self.name = values[0]
- names = property(fset=_set_names, fget=_get_names)
- def set_names(self, names, level=None, inplace=False):
- """
- Set new names on index. Defaults to returning new index.
- Parameters
- ----------
- names : str or sequence
- name(s) to set
- level : int, level name, or sequence of int/level names (default None)
- If the index is a MultiIndex (hierarchical), level(s) to set (None
- for all levels). Otherwise level must be None
- inplace : bool
- if True, mutates in place
- Returns
- -------
- new index (of same type and class...etc) [if inplace, returns None]
- Examples
- --------
- >>> Index([1, 2, 3, 4]).set_names('foo')
- Int64Index([1, 2, 3, 4], dtype='int64')
- >>> Index([1, 2, 3, 4]).set_names(['foo'])
- Int64Index([1, 2, 3, 4], dtype='int64')
- >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
- (2, u'one'), (2, u'two')],
- names=['foo', 'bar'])
- >>> idx.set_names(['baz', 'quz'])
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=[u'baz', u'quz'])
- >>> idx.set_names('baz', level=0)
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=[u'baz', u'bar'])
- """
- if level is not None and self.nlevels == 1:
- raise ValueError('Level must be None for non-MultiIndex')
- if level is not None and not is_list_like(level) and is_list_like(
- names):
- raise TypeError("Names must be a string")
- if not is_list_like(names) and level is None and self.nlevels > 1:
- raise TypeError("Must pass list-like as `names`.")
- if not is_list_like(names):
- names = [names]
- if level is not None and not is_list_like(level):
- level = [level]
- if inplace:
- idx = self
- else:
- idx = self._shallow_copy()
- idx._set_names(names, level=level)
- if not inplace:
- return idx
- def rename(self, name, inplace=False):
- """
- Set new names on index. Defaults to returning new index.
- Parameters
- ----------
- name : str or list
- name to set
- inplace : bool
- if True, mutates in place
- Returns
- -------
- new index (of same type and class...etc) [if inplace, returns None]
- """
- return self.set_names([name], inplace=inplace)
- def reshape(self, *args, **kwargs):
- """
- NOT IMPLEMENTED: do not call this method, as reshaping is not
- supported for Index objects and will raise an error.
- Reshape an Index.
- """
- raise NotImplementedError("reshaping is not supported "
- "for Index objects")
- @property
- def _has_complex_internals(self):
- # to disable groupby tricks in MultiIndex
- return False
- def summary(self, name=None):
- if len(self) > 0:
- head = self[0]
- if (hasattr(head, 'format') and
- not isinstance(head, compat.string_types)):
- head = head.format()
- tail = self[-1]
- if (hasattr(tail, 'format') and
- not isinstance(tail, compat.string_types)):
- tail = tail.format()
- index_summary = ', %s to %s' % (pprint_thing(head),
- pprint_thing(tail))
- else:
- index_summary = ''
- if name is None:
- name = type(self).__name__
- return '%s: %s entries%s' % (name, len(self), index_summary)
- def _mpl_repr(self):
- # how to represent ourselves to matplotlib
- return self.values
- _na_value = np.nan
- """The expected NA value to use with this index."""
- # introspection
- @property
- def is_monotonic(self):
- """ alias for is_monotonic_increasing (deprecated) """
- return self._engine.is_monotonic_increasing
- @property
- def is_monotonic_increasing(self):
- """
- return if the index is monotonic increasing (only equal or
- increasing) values.
- """
- return self._engine.is_monotonic_increasing
- @property
- def is_monotonic_decreasing(self):
- """
- return if the index is monotonic decreasing (only equal or
- decreasing) values.
- """
- return self._engine.is_monotonic_decreasing
- def is_lexsorted_for_tuple(self, tup):
- return True
- @cache_readonly(allow_setting=True)
- def is_unique(self):
- """ return if the index has unique values """
- return self._engine.is_unique
- @property
- def has_duplicates(self):
- return not self.is_unique
- def is_boolean(self):
- return self.inferred_type in ['boolean']
- def is_integer(self):
- return self.inferred_type in ['integer']
- def is_floating(self):
- return self.inferred_type in ['floating', 'mixed-integer-float']
- def is_numeric(self):
- return self.inferred_type in ['integer', 'floating']
- def is_object(self):
- return is_object_dtype(self.dtype)
- def is_categorical(self):
- return self.inferred_type in ['categorical']
- def is_mixed(self):
- return self.inferred_type in ['mixed']
- def holds_integer(self):
- return self.inferred_type in ['integer', 'mixed-integer']
- # validate / convert indexers
- def _convert_scalar_indexer(self, key, kind=None):
- """
- convert a scalar indexer
- Parameters
- ----------
- key : label of the slice bound
- kind : {'ix', 'loc', 'getitem', 'iloc'} or None
- """
- assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
- if kind == 'iloc':
- return self._validate_indexer('positional', key, kind)
- if len(self) and not isinstance(self, ABCMultiIndex,):
- # we can raise here if we are definitive that this
- # is positional indexing (eg. .ix on with a float)
- # or label indexing if we are using a type able
- # to be represented in the index
- if kind in ['getitem', 'ix'] and is_float(key):
- if not self.is_floating():
- return self._invalid_indexer('label', key)
- elif kind in ['loc'] and is_float(key):
- # we want to raise KeyError on string/mixed here
- # technically we *could* raise a TypeError
- # on anything but mixed though
- if self.inferred_type not in ['floating',
- 'mixed-integer-float',
- 'string',
- 'unicode',
- 'mixed']:
- return self._invalid_indexer('label', key)
- elif kind in ['loc'] and is_integer(key):
- if not self.holds_integer():
- return self._invalid_indexer('label', key)
- return key
- def _convert_slice_indexer(self, key, kind=None):
- """
- convert a slice indexer. disallow floats in the start/stop/step
- Parameters
- ----------
- key : label of the slice bound
- kind : {'ix', 'loc', 'getitem', 'iloc'} or None
- """
- assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
- # if we are not a slice, then we are done
- if not isinstance(key, slice):
- return key
- # validate iloc
- if kind == 'iloc':
- return slice(self._validate_indexer('slice', key.start, kind),
- self._validate_indexer('slice', key.stop, kind),
- self._validate_indexer('slice', key.step, kind))
- # potentially cast the bounds to integers
- start, stop, step = key.start, key.stop, key.step
- # figure out if this is a positional indexer
- def is_int(v):
- return v is None or is_integer(v)
- is_null_slicer = start is None and stop is None
- is_index_slice = is_int(start) and is_int(stop)
- is_positional = is_index_slice and not self.is_integer()
- if kind == 'getitem':
- """
- called from the getitem slicers, validate that we are in fact
- integers
- """
- if self.is_integer() or is_index_slice:
- return slice(self._validate_indexer('slice', key.start, kind),
- self._validate_indexer('slice', key.stop, kind),
- self._validate_indexer('slice', key.step, kind))
- # convert the slice to an indexer here
- # if we are mixed and have integers
- try:
- if is_positional and self.is_mixed():
- # TODO: i, j are not used anywhere
- if start is not None:
- i = self.get_loc(start) # noqa
- if stop is not None:
- j = self.get_loc(stop) # noqa
- is_positional = False
- except KeyError:
- if self.inferred_type == 'mixed-integer-float':
- raise
- if is_null_slicer:
- indexer = key
- elif is_positional:
- indexer = key
- else:
- try:
- indexer = self.slice_indexer(start, stop, step, kind=kind)
- except Exception:
- if is_index_slice:
- if self.is_integer():
- raise
- else:
- indexer = key
- else:
- raise
- return indexer
- def _convert_list_indexer(self, keyarr, kind=None):
- """
- passed a key that is tuplesafe that is integer based
- and we have a mixed index (e.g. number/labels). figure out
- the indexer. return None if we can't help
- """
- if (kind in [None, 'iloc', 'ix'] and
- is_integer_dtype(keyarr) and not self.is_floating() and
- not isinstance(keyarr, ABCPeriodIndex)):
- if self.inferred_type == 'mixed-integer':
- indexer = self.get_indexer(keyarr)
- if (indexer >= 0).all():
- return indexer
- # missing values are flagged as -1 by get_indexer and negative
- # indices are already converted to positive indices in the
- # above if-statement, so the negative flags are changed to
- # values outside the range of indices so as to trigger an
- # IndexError in maybe_convert_indices
- indexer[indexer < 0] = len(self)
- from pandas.core.indexing import maybe_convert_indices
- return maybe_convert_indices(indexer, len(self))
- elif not self.inferred_type == 'integer':
- keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
- return keyarr
- return None
- def _invalid_indexer(self, form, key):
- """ consistent invalid indexer message """
- raise TypeError("cannot do {form} indexing on {klass} with these "
- "indexers [{key}] of {kind}".format(
- form=form, klass=type(self), key=key,
- kind=type(key)))
- def get_duplicates(self):
- from collections import defaultdict
- counter = defaultdict(lambda: 0)
- for k in self.values:
- counter[k] += 1
- return sorted(k for k, v in compat.iteritems(counter) if v > 1)
- _get_duplicates = get_duplicates
- def _cleanup(self):
- self._engine.clear_mapping()
- @cache_readonly
- def _constructor(self):
- return type(self)
- @cache_readonly
- def _engine(self):
- # property, for now, slow to look up
- return self._engine_type(lambda: self.values, len(self))
- def _validate_index_level(self, level):
- """
- Validate index level.
- For single-level Index getting level number is a no-op, but some
- verification must be done like in MultiIndex.
- """
- if isinstance(level, int):
- if level < 0 and level != -1:
- raise IndexError("Too many levels: Index has only 1 level,"
- " %d is not a valid level number" % (level, ))
- elif level > 0:
- raise IndexError("Too many levels:"
- " Index has only 1 level, not %d" %
- (level + 1))
- elif level != self.name:
- raise KeyError('Level %s must be same as name (%s)' %
- (level, self.name))
- def _get_level_number(self, level):
- self._validate_index_level(level)
- return 0
- @cache_readonly
- def inferred_type(self):
- """ return a string of the type inferred from the values """
- return lib.infer_dtype(self)
- def is_type_compatible(self, kind):
- return kind == self.inferred_type
- @cache_readonly
- def is_all_dates(self):
- if self._data is None:
- return False
- return is_datetime_array(_ensure_object(self.values))
- def __iter__(self):
- return iter(self.values)
- def __reduce__(self):
- d = dict(data=self._data)
- d.update(self._get_attributes_dict())
- return _new_Index, (self.__class__, d), None
- def __setstate__(self, state):
- """Necessary for making this object picklable"""
- if isinstance(state, dict):
- self._data = state.pop('data')
- for k, v in compat.iteritems(state):
- setattr(self, k, v)
- elif isinstance(state, tuple):
- if len(state) == 2:
- nd_state, own_state = state
- data = np.empty(nd_state[1], dtype=nd_state[2])
- np.ndarray.__setstate__(data, nd_state)
- self.name = own_state[0]
- else: # pragma: no cover
- data = np.empty(state)
- np.ndarray.__setstate__(data, state)
- self._data = data
- self._reset_identity()
- else:
- raise Exception("invalid pickle state")
- _unpickle_compat = __setstate__
- def __deepcopy__(self, memo=None):
- if memo is None:
- memo = {}
- return self.copy(deep=True)
- def __nonzero__(self):
- raise ValueError("The truth value of a {0} is ambiguous. "
- "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
- .format(self.__class__.__name__))
- __bool__ = __nonzero__
- def __contains__(self, key):
- hash(key)
- # work around some kind of odd cython bug
- try:
- return key in self._engine
- except TypeError:
- return False
- def __hash__(self):
- raise TypeError("unhashable type: %r" % type(self).__name__)
- def __setitem__(self, key, value):
- raise TypeError("Index does not support mutable operations")
- def __getitem__(self, key):
- """
- Override numpy.ndarray's __getitem__ method to work as desired.
- This function adds lists and Series as valid boolean indexers
- (ndarrays only supports ndarray with dtype=bool).
- If resulting ndim != 1, plain ndarray is returned instead of
- corresponding `Index` subclass.
- """
- # There's no custom logic to be implemented in __getslice__, so it's
- # not overloaded intentionally.
- getitem = self._data.__getitem__
- promote = self._shallow_copy
- if is_scalar(key):
- return getitem(key)
- if isinstance(key, slice):
- # This case is separated from the conditional above to avoid
- # pessimization of basic indexing.
- return promote(getitem(key))
- if is_bool_indexer(key):
- key = np.asarray(key)
- key = _values_from_object(key)
- result = getitem(key)
- if not is_scalar(result):
- return promote(result)
- else:
- return result
- def _ensure_compat_append(self, other):
- """
- prepare the append
- Returns
- -------
- list of to_concat, name of result Index
- """
- name = self.name
- to_concat = [self]
- if isinstance(other, (list, tuple)):
- to_concat = to_concat + list(other)
- else:
- to_concat.append(other)
- for obj in to_concat:
- if (isinstance(obj, Index) and obj.name != name and
- obj.name is not None):
- name = None
- break
- to_concat = self._ensure_compat_concat(to_concat)
- to_concat = [x._values if isinstance(x, Index) else x
- for x in to_concat]
- return to_concat, name
- def append(self, other):
- """
- Append a collection of Index options together
- Parameters
- ----------
- other : Index or list/tuple of indices
- Returns
- -------
- appended : Index
- """
- to_concat, name = self._ensure_compat_append(other)
- attribs = self._get_attributes_dict()
- attribs['name'] = name
- return self._shallow_copy_with_infer(
- np.concatenate(to_concat), **attribs)
- @staticmethod
- def _ensure_compat_concat(indexes):
- from pandas.tseries.api import (DatetimeIndex, PeriodIndex,
- TimedeltaIndex)
- klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex
- is_ts = [isinstance(idx, klasses) for idx in indexes]
- if any(is_ts) and not all(is_ts):
- return [_maybe_box(idx) for idx in indexes]
- return indexes
- _index_shared_docs['take'] = """
- return a new %(klass)s of the values selected by the indices
- For internal compatibility with numpy arrays.
- Parameters
- ----------
- indices : list
- Indices to be taken
- axis : int, optional
- …
Large files files are truncated, but you can click here to view the full file