PageRenderTime 112ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/indexes/numeric.py

https://github.com/hoffstein/pandas
Python | 393 lines | 291 code | 40 blank | 62 comment | 38 complexity | 4d168420305af994295f43decd629b58 MD5 | raw file
  1. import numpy as np
  2. import pandas.lib as lib
  3. import pandas.algos as _algos
  4. import pandas.index as _index
  5. from pandas import compat
  6. from pandas.indexes.base import Index, InvalidIndexError
  7. from pandas.util.decorators import Appender, cache_readonly
  8. import pandas.core.common as com
  9. from pandas.core.common import is_dtype_equal, isnull
  10. import pandas.indexes.base as ibase
  11. class NumericIndex(Index):
  12. """
  13. Provide numeric type operations
  14. This is an abstract class
  15. """
  16. _is_numeric_dtype = True
  17. def _maybe_cast_slice_bound(self, label, side, kind):
  18. """
  19. This function should be overloaded in subclasses that allow non-trivial
  20. casting on label-slice bounds, e.g. datetime-like indices allowing
  21. strings containing formatted datetimes.
  22. Parameters
  23. ----------
  24. label : object
  25. side : {'left', 'right'}
  26. kind : {'ix', 'loc', 'getitem'}
  27. Returns
  28. -------
  29. label : object
  30. Notes
  31. -----
  32. Value of `side` parameter should be validated in caller.
  33. """
  34. assert kind in ['ix', 'loc', 'getitem', None]
  35. # we will try to coerce to integers
  36. return self._maybe_cast_indexer(label)
  37. def _convert_tolerance(self, tolerance):
  38. try:
  39. return float(tolerance)
  40. except ValueError:
  41. raise ValueError('tolerance argument for %s must be numeric: %r' %
  42. (type(self).__name__, tolerance))
  43. class Int64Index(NumericIndex):
  44. """
  45. Immutable ndarray implementing an ordered, sliceable set. The basic object
  46. storing axis labels for all pandas objects. Int64Index is a special case
  47. of `Index` with purely integer labels. This is the default index type used
  48. by the DataFrame and Series ctors when no explicit index is provided by the
  49. user.
  50. Parameters
  51. ----------
  52. data : array-like (1-dimensional)
  53. dtype : NumPy dtype (default: int64)
  54. copy : bool
  55. Make a copy of input ndarray
  56. name : object
  57. Name to be stored in the index
  58. Notes
  59. -----
  60. An Index instance can **only** contain hashable objects
  61. """
  62. _typ = 'int64index'
  63. _groupby = _algos.groupby_int64
  64. _arrmap = _algos.arrmap_int64
  65. _left_indexer_unique = _algos.left_join_indexer_unique_int64
  66. _left_indexer = _algos.left_join_indexer_int64
  67. _inner_indexer = _algos.inner_join_indexer_int64
  68. _outer_indexer = _algos.outer_join_indexer_int64
  69. _can_hold_na = False
  70. _engine_type = _index.Int64Engine
  71. def __new__(cls, data=None, dtype=None, copy=False, name=None,
  72. fastpath=False, **kwargs):
  73. if fastpath:
  74. return cls._simple_new(data, name=name)
  75. # isscalar, generators handled in coerce_to_ndarray
  76. data = cls._coerce_to_ndarray(data)
  77. if issubclass(data.dtype.type, compat.string_types):
  78. cls._string_data_error(data)
  79. elif issubclass(data.dtype.type, np.integer):
  80. # don't force the upcast as we may be dealing
  81. # with a platform int
  82. if (dtype is None or
  83. not issubclass(np.dtype(dtype).type, np.integer)):
  84. dtype = np.int64
  85. subarr = np.array(data, dtype=dtype, copy=copy)
  86. else:
  87. subarr = np.array(data, dtype=np.int64, copy=copy)
  88. if len(data) > 0:
  89. if (subarr != data).any():
  90. raise TypeError('Unsafe NumPy casting to integer, you must'
  91. ' explicitly cast')
  92. return cls._simple_new(subarr, name=name)
  93. @property
  94. def inferred_type(self):
  95. return 'integer'
  96. @property
  97. def asi8(self):
  98. # do not cache or you'll create a memory leak
  99. return self.values.view('i8')
  100. @property
  101. def is_all_dates(self):
  102. """
  103. Checks that all the labels are datetime objects
  104. """
  105. return False
  106. def _convert_scalar_indexer(self, key, kind=None):
  107. """
  108. convert a scalar indexer
  109. Parameters
  110. ----------
  111. key : label of the slice bound
  112. kind : {'ix', 'loc', 'getitem'} or None
  113. """
  114. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  115. # don't coerce ilocs to integers
  116. if kind != 'iloc':
  117. key = self._maybe_cast_indexer(key)
  118. return (super(Int64Index, self)
  119. ._convert_scalar_indexer(key, kind=kind))
  120. def equals(self, other):
  121. """
  122. Determines if two Index objects contain the same elements.
  123. """
  124. if self.is_(other):
  125. return True
  126. try:
  127. return com.array_equivalent(com._values_from_object(self),
  128. com._values_from_object(other))
  129. except TypeError:
  130. # e.g. fails in numpy 1.6 with DatetimeIndex #1681
  131. return False
  132. def _wrap_joined_index(self, joined, other):
  133. name = self.name if self.name == other.name else None
  134. return Int64Index(joined, name=name)
  135. Int64Index._add_numeric_methods()
  136. Int64Index._add_logical_methods()
  137. class Float64Index(NumericIndex):
  138. """
  139. Immutable ndarray implementing an ordered, sliceable set. The basic object
  140. storing axis labels for all pandas objects. Float64Index is a special case
  141. of `Index` with purely floating point labels.
  142. Parameters
  143. ----------
  144. data : array-like (1-dimensional)
  145. dtype : NumPy dtype (default: object)
  146. copy : bool
  147. Make a copy of input ndarray
  148. name : object
  149. Name to be stored in the index
  150. Notes
  151. -----
  152. An Float64Index instance can **only** contain hashable objects
  153. """
  154. _typ = 'float64index'
  155. _engine_type = _index.Float64Engine
  156. _groupby = _algos.groupby_float64
  157. _arrmap = _algos.arrmap_float64
  158. _left_indexer_unique = _algos.left_join_indexer_unique_float64
  159. _left_indexer = _algos.left_join_indexer_float64
  160. _inner_indexer = _algos.inner_join_indexer_float64
  161. _outer_indexer = _algos.outer_join_indexer_float64
  162. def __new__(cls, data=None, dtype=None, copy=False, name=None,
  163. fastpath=False, **kwargs):
  164. if fastpath:
  165. return cls._simple_new(data, name)
  166. data = cls._coerce_to_ndarray(data)
  167. if issubclass(data.dtype.type, compat.string_types):
  168. cls._string_data_error(data)
  169. if dtype is None:
  170. dtype = np.float64
  171. dtype = np.dtype(dtype)
  172. # allow integer / object dtypes to be passed, but coerce to float64
  173. if dtype.kind in ['i', 'O']:
  174. dtype = np.float64
  175. elif dtype.kind in ['f']:
  176. pass
  177. else:
  178. raise TypeError("cannot support {0} dtype in "
  179. "Float64Index".format(dtype))
  180. try:
  181. subarr = np.array(data, dtype=dtype, copy=copy)
  182. except:
  183. raise TypeError('Unsafe NumPy casting, you must explicitly cast')
  184. # coerce to float64 for storage
  185. if subarr.dtype != np.float64:
  186. subarr = subarr.astype(np.float64)
  187. return cls._simple_new(subarr, name)
  188. @property
  189. def inferred_type(self):
  190. return 'floating'
  191. def astype(self, dtype):
  192. if np.dtype(dtype) not in (np.object, np.float64):
  193. raise TypeError('Setting %s dtype to anything other than '
  194. 'float64 or object is not supported' %
  195. self.__class__)
  196. return Index(self._values, name=self.name, dtype=dtype)
  197. def _convert_scalar_indexer(self, key, kind=None):
  198. """
  199. convert a scalar indexer
  200. Parameters
  201. ----------
  202. key : label of the slice bound
  203. kind : {'ix', 'loc', 'getitem'} or None
  204. """
  205. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  206. if kind == 'iloc':
  207. return self._validate_indexer('positional', key, kind)
  208. return key
  209. def _convert_slice_indexer(self, key, kind=None):
  210. """
  211. convert a slice indexer, by definition these are labels
  212. unless we are iloc
  213. Parameters
  214. ----------
  215. key : label of the slice bound
  216. kind : optional, type of the indexing operation (loc/ix/iloc/None)
  217. """
  218. # if we are not a slice, then we are done
  219. if not isinstance(key, slice):
  220. return key
  221. if kind == 'iloc':
  222. return super(Float64Index, self)._convert_slice_indexer(key,
  223. kind=kind)
  224. # translate to locations
  225. return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
  226. def _format_native_types(self, na_rep='', float_format=None, decimal='.',
  227. quoting=None, **kwargs):
  228. from pandas.formats.format import FloatArrayFormatter
  229. formatter = FloatArrayFormatter(self.values, na_rep=na_rep,
  230. float_format=float_format,
  231. decimal=decimal, quoting=quoting,
  232. fixed_width=False)
  233. return formatter.get_result_as_array()
  234. def get_value(self, series, key):
  235. """ we always want to get an index value, never a value """
  236. if not lib.isscalar(key):
  237. raise InvalidIndexError
  238. from pandas.core.indexing import maybe_droplevels
  239. from pandas.core.series import Series
  240. k = com._values_from_object(key)
  241. loc = self.get_loc(k)
  242. new_values = com._values_from_object(series)[loc]
  243. if lib.isscalar(new_values) or new_values is None:
  244. return new_values
  245. new_index = self[loc]
  246. new_index = maybe_droplevels(new_index, k)
  247. return Series(new_values, index=new_index, name=series.name)
  248. def equals(self, other):
  249. """
  250. Determines if two Index objects contain the same elements.
  251. """
  252. if self is other:
  253. return True
  254. # need to compare nans locations and make sure that they are the same
  255. # since nans don't compare equal this is a bit tricky
  256. try:
  257. if not isinstance(other, Float64Index):
  258. other = self._constructor(other)
  259. if (not is_dtype_equal(self.dtype, other.dtype) or
  260. self.shape != other.shape):
  261. return False
  262. left, right = self._values, other._values
  263. return ((left == right) | (self._isnan & other._isnan)).all()
  264. except TypeError:
  265. # e.g. fails in numpy 1.6 with DatetimeIndex #1681
  266. return False
  267. def __contains__(self, other):
  268. if super(Float64Index, self).__contains__(other):
  269. return True
  270. try:
  271. # if other is a sequence this throws a ValueError
  272. return np.isnan(other) and self.hasnans
  273. except ValueError:
  274. try:
  275. return len(other) <= 1 and ibase._try_get_item(other) in self
  276. except TypeError:
  277. return False
  278. except:
  279. return False
  280. def get_loc(self, key, method=None, tolerance=None):
  281. try:
  282. if np.all(np.isnan(key)):
  283. nan_idxs = self._nan_idxs
  284. try:
  285. return nan_idxs.item()
  286. except (ValueError, IndexError):
  287. # should only need to catch ValueError here but on numpy
  288. # 1.7 .item() can raise IndexError when NaNs are present
  289. return nan_idxs
  290. except (TypeError, NotImplementedError):
  291. pass
  292. return super(Float64Index, self).get_loc(key, method=method,
  293. tolerance=tolerance)
  294. @property
  295. def is_all_dates(self):
  296. """
  297. Checks that all the labels are datetime objects
  298. """
  299. return False
  300. @cache_readonly
  301. def is_unique(self):
  302. return super(Float64Index, self).is_unique and self._nan_idxs.size < 2
  303. @Appender(Index.isin.__doc__)
  304. def isin(self, values, level=None):
  305. value_set = set(values)
  306. if level is not None:
  307. self._validate_index_level(level)
  308. return lib.ismember_nans(np.array(self), value_set,
  309. isnull(list(value_set)).any())
  310. Float64Index._add_numeric_methods()
  311. Float64Index._add_logical_methods_disabled()