PageRenderTime 43ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/indexes/numeric.py

http://github.com/wesm/pandas
Python | 380 lines | 288 code | 32 blank | 60 comment | 37 complexity | 6a1ef2645c9857afcf7877109b525522 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import numpy as np
  2. import pandas.lib as lib
  3. import pandas._join as _join
  4. import pandas.algos as _algos
  5. import pandas.index as _index
  6. from pandas.types.common import (is_dtype_equal, pandas_dtype,
  7. is_float_dtype, is_object_dtype,
  8. is_integer_dtype, is_scalar)
  9. from pandas.types.missing import array_equivalent, isnull
  10. from pandas.core.common import _values_from_object
  11. from pandas import compat
  12. from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs
  13. from pandas.util.decorators import Appender, cache_readonly
  14. import pandas.indexes.base as ibase
  15. class NumericIndex(Index):
  16. """
  17. Provide numeric type operations
  18. This is an abstract class
  19. """
  20. _is_numeric_dtype = True
  21. def __new__(cls, data=None, dtype=None, copy=False, name=None,
  22. fastpath=False):
  23. if fastpath:
  24. return cls._simple_new(data, name=name)
  25. # isscalar, generators handled in coerce_to_ndarray
  26. data = cls._coerce_to_ndarray(data)
  27. if issubclass(data.dtype.type, compat.string_types):
  28. cls._string_data_error(data)
  29. if copy or not is_dtype_equal(data.dtype, cls._default_dtype):
  30. subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
  31. cls._assert_safe_casting(data, subarr)
  32. else:
  33. subarr = data
  34. if name is None and hasattr(data, 'name'):
  35. name = data.name
  36. return cls._simple_new(subarr, name=name)
  37. def _maybe_cast_slice_bound(self, label, side, kind):
  38. """
  39. This function should be overloaded in subclasses that allow non-trivial
  40. casting on label-slice bounds, e.g. datetime-like indices allowing
  41. strings containing formatted datetimes.
  42. Parameters
  43. ----------
  44. label : object
  45. side : {'left', 'right'}
  46. kind : {'ix', 'loc', 'getitem'}
  47. Returns
  48. -------
  49. label : object
  50. Notes
  51. -----
  52. Value of `side` parameter should be validated in caller.
  53. """
  54. assert kind in ['ix', 'loc', 'getitem', None]
  55. # we will try to coerce to integers
  56. return self._maybe_cast_indexer(label)
  57. def _convert_tolerance(self, tolerance):
  58. try:
  59. return float(tolerance)
  60. except ValueError:
  61. raise ValueError('tolerance argument for %s must be numeric: %r' %
  62. (type(self).__name__, tolerance))
  63. @classmethod
  64. def _assert_safe_casting(cls, data, subarr):
  65. """
  66. Subclasses need to override this only if the process of casting data
  67. from some accepted dtype to the internal dtype(s) bears the risk of
  68. truncation (e.g. float to int).
  69. """
  70. pass
  71. class Int64Index(NumericIndex):
  72. """
  73. Immutable ndarray implementing an ordered, sliceable set. The basic object
  74. storing axis labels for all pandas objects. Int64Index is a special case
  75. of `Index` with purely integer labels. This is the default index type used
  76. by the DataFrame and Series ctors when no explicit index is provided by the
  77. user.
  78. Parameters
  79. ----------
  80. data : array-like (1-dimensional)
  81. dtype : NumPy dtype (default: int64)
  82. copy : bool
  83. Make a copy of input ndarray
  84. name : object
  85. Name to be stored in the index
  86. Notes
  87. -----
  88. An Index instance can **only** contain hashable objects
  89. """
  90. _typ = 'int64index'
  91. _groupby = _algos.groupby_int64
  92. _arrmap = _algos.arrmap_int64
  93. _left_indexer_unique = _join.left_join_indexer_unique_int64
  94. _left_indexer = _join.left_join_indexer_int64
  95. _inner_indexer = _join.inner_join_indexer_int64
  96. _outer_indexer = _join.outer_join_indexer_int64
  97. _can_hold_na = False
  98. _engine_type = _index.Int64Engine
  99. _default_dtype = np.int64
  100. @property
  101. def inferred_type(self):
  102. return 'integer'
  103. @property
  104. def asi8(self):
  105. # do not cache or you'll create a memory leak
  106. return self.values.view('i8')
  107. @property
  108. def is_all_dates(self):
  109. """
  110. Checks that all the labels are datetime objects
  111. """
  112. return False
  113. def _convert_scalar_indexer(self, key, kind=None):
  114. """
  115. convert a scalar indexer
  116. Parameters
  117. ----------
  118. key : label of the slice bound
  119. kind : {'ix', 'loc', 'getitem'} or None
  120. """
  121. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  122. # don't coerce ilocs to integers
  123. if kind != 'iloc':
  124. key = self._maybe_cast_indexer(key)
  125. return (super(Int64Index, self)
  126. ._convert_scalar_indexer(key, kind=kind))
  127. def equals(self, other):
  128. """
  129. Determines if two Index objects contain the same elements.
  130. """
  131. if self.is_(other):
  132. return True
  133. return array_equivalent(_values_from_object(self),
  134. _values_from_object(other))
  135. def _wrap_joined_index(self, joined, other):
  136. name = self.name if self.name == other.name else None
  137. return Int64Index(joined, name=name)
  138. @classmethod
  139. def _assert_safe_casting(cls, data, subarr):
  140. """
  141. Ensure incoming data can be represented as ints.
  142. """
  143. if not issubclass(data.dtype.type, np.integer):
  144. if not np.array_equal(data, subarr):
  145. raise TypeError('Unsafe NumPy casting, you must '
  146. 'explicitly cast')
  147. Int64Index._add_numeric_methods()
  148. Int64Index._add_logical_methods()
  149. class Float64Index(NumericIndex):
  150. """
  151. Immutable ndarray implementing an ordered, sliceable set. The basic object
  152. storing axis labels for all pandas objects. Float64Index is a special case
  153. of `Index` with purely floating point labels.
  154. Parameters
  155. ----------
  156. data : array-like (1-dimensional)
  157. dtype : NumPy dtype (default: object)
  158. copy : bool
  159. Make a copy of input ndarray
  160. name : object
  161. Name to be stored in the index
  162. Notes
  163. -----
  164. An Float64Index instance can **only** contain hashable objects
  165. """
  166. _typ = 'float64index'
  167. _engine_type = _index.Float64Engine
  168. _groupby = _algos.groupby_float64
  169. _arrmap = _algos.arrmap_float64
  170. _left_indexer_unique = _join.left_join_indexer_unique_float64
  171. _left_indexer = _join.left_join_indexer_float64
  172. _inner_indexer = _join.inner_join_indexer_float64
  173. _outer_indexer = _join.outer_join_indexer_float64
  174. _default_dtype = np.float64
  175. @property
  176. def inferred_type(self):
  177. return 'floating'
  178. @Appender(_index_shared_docs['astype'])
  179. def astype(self, dtype, copy=True):
  180. dtype = pandas_dtype(dtype)
  181. if is_float_dtype(dtype):
  182. values = self._values.astype(dtype, copy=copy)
  183. elif is_integer_dtype(dtype):
  184. if self.hasnans:
  185. raise ValueError('cannot convert float NaN to integer')
  186. values = self._values.astype(dtype, copy=copy)
  187. elif is_object_dtype(dtype):
  188. values = self._values.astype('object', copy=copy)
  189. else:
  190. raise TypeError('Setting %s dtype to anything other than '
  191. 'float64 or object is not supported' %
  192. self.__class__)
  193. return Index(values, name=self.name, dtype=dtype)
  194. def _convert_scalar_indexer(self, key, kind=None):
  195. """
  196. convert a scalar indexer
  197. Parameters
  198. ----------
  199. key : label of the slice bound
  200. kind : {'ix', 'loc', 'getitem'} or None
  201. """
  202. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  203. if kind == 'iloc':
  204. return self._validate_indexer('positional', key, kind)
  205. return key
  206. def _convert_slice_indexer(self, key, kind=None):
  207. """
  208. convert a slice indexer, by definition these are labels
  209. unless we are iloc
  210. Parameters
  211. ----------
  212. key : label of the slice bound
  213. kind : optional, type of the indexing operation (loc/ix/iloc/None)
  214. """
  215. # if we are not a slice, then we are done
  216. if not isinstance(key, slice):
  217. return key
  218. if kind == 'iloc':
  219. return super(Float64Index, self)._convert_slice_indexer(key,
  220. kind=kind)
  221. # translate to locations
  222. return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
  223. def _format_native_types(self, na_rep='', float_format=None, decimal='.',
  224. quoting=None, **kwargs):
  225. from pandas.formats.format import FloatArrayFormatter
  226. formatter = FloatArrayFormatter(self.values, na_rep=na_rep,
  227. float_format=float_format,
  228. decimal=decimal, quoting=quoting,
  229. fixed_width=False)
  230. return formatter.get_result_as_array()
  231. def get_value(self, series, key):
  232. """ we always want to get an index value, never a value """
  233. if not is_scalar(key):
  234. raise InvalidIndexError
  235. from pandas.core.indexing import maybe_droplevels
  236. from pandas.core.series import Series
  237. k = _values_from_object(key)
  238. loc = self.get_loc(k)
  239. new_values = _values_from_object(series)[loc]
  240. if is_scalar(new_values) or new_values is None:
  241. return new_values
  242. new_index = self[loc]
  243. new_index = maybe_droplevels(new_index, k)
  244. return Series(new_values, index=new_index, name=series.name)
  245. def equals(self, other):
  246. """
  247. Determines if two Index objects contain the same elements.
  248. """
  249. if self is other:
  250. return True
  251. # need to compare nans locations and make sure that they are the same
  252. # since nans don't compare equal this is a bit tricky
  253. try:
  254. if not isinstance(other, Float64Index):
  255. other = self._constructor(other)
  256. if (not is_dtype_equal(self.dtype, other.dtype) or
  257. self.shape != other.shape):
  258. return False
  259. left, right = self._values, other._values
  260. return ((left == right) | (self._isnan & other._isnan)).all()
  261. except (TypeError, ValueError):
  262. return False
  263. def __contains__(self, other):
  264. if super(Float64Index, self).__contains__(other):
  265. return True
  266. try:
  267. # if other is a sequence this throws a ValueError
  268. return np.isnan(other) and self.hasnans
  269. except ValueError:
  270. try:
  271. return len(other) <= 1 and ibase._try_get_item(other) in self
  272. except TypeError:
  273. return False
  274. except:
  275. return False
  276. def get_loc(self, key, method=None, tolerance=None):
  277. try:
  278. if np.all(np.isnan(key)):
  279. nan_idxs = self._nan_idxs
  280. try:
  281. return nan_idxs.item()
  282. except (ValueError, IndexError):
  283. # should only need to catch ValueError here but on numpy
  284. # 1.7 .item() can raise IndexError when NaNs are present
  285. return nan_idxs
  286. except (TypeError, NotImplementedError):
  287. pass
  288. return super(Float64Index, self).get_loc(key, method=method,
  289. tolerance=tolerance)
  290. @property
  291. def is_all_dates(self):
  292. """
  293. Checks that all the labels are datetime objects
  294. """
  295. return False
  296. @cache_readonly
  297. def is_unique(self):
  298. return super(Float64Index, self).is_unique and self._nan_idxs.size < 2
  299. @Appender(Index.isin.__doc__)
  300. def isin(self, values, level=None):
  301. value_set = set(values)
  302. if level is not None:
  303. self._validate_index_level(level)
  304. return lib.ismember_nans(np.array(self), value_set,
  305. isnull(list(value_set)).any())
  306. Float64Index._add_numeric_methods()
  307. Float64Index._add_logical_methods_disabled()