PageRenderTime 134ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/generic.py

https://github.com/benracine/pandas
Python | 510 lines | 469 code | 19 blank | 22 comment | 9 complexity | ff5f3c3269f3d9c6d2020e5847c18756 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import numpy as np
  2. import cPickle
  3. from pandas.core.common import save, load
  4. from pandas.core.index import Index, MultiIndex, _ensure_index
  5. import pandas.core.datetools as datetools
  6. #-------------------------------------------------------------------------------
  7. # Picklable mixin
  8. class Picklable(object):
  9. def save(self, path):
  10. save(self, path)
  11. @classmethod
  12. def load(cls, path):
  13. return load(path)
  14. class PandasError(Exception):
  15. pass
  16. class AxisProperty(object):
  17. def __init__(self, axis=0):
  18. self.axis = axis
  19. def __get__(self, obj, type=None):
  20. data = getattr(obj, '_data')
  21. return data.axes[self.axis]
  22. def __set__(self, obj, value):
  23. data = getattr(obj, '_data')
  24. data.set_axis(self.axis, value)
  25. class PandasObject(Picklable):
  26. _AXIS_NUMBERS = {
  27. 'index' : 0,
  28. 'columns' : 1
  29. }
  30. _AXIS_ALIASES = {}
  31. _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems())
  32. #----------------------------------------------------------------------
  33. # Axis name business
  34. @classmethod
  35. def _get_axis_number(cls, axis):
  36. axis = cls._AXIS_ALIASES.get(axis, axis)
  37. if isinstance(axis, int):
  38. if axis in cls._AXIS_NAMES:
  39. return axis
  40. else:
  41. raise Exception('No %d axis' % axis)
  42. else:
  43. return cls._AXIS_NUMBERS[axis]
  44. @classmethod
  45. def _get_axis_name(cls, axis):
  46. axis = cls._AXIS_ALIASES.get(axis, axis)
  47. if isinstance(axis, basestring):
  48. if axis in cls._AXIS_NUMBERS:
  49. return axis
  50. else:
  51. raise Exception('No axis named %s' % axis)
  52. else:
  53. return cls._AXIS_NAMES[axis]
  54. def _get_axis(self, axis):
  55. name = self._get_axis_name(axis)
  56. return getattr(self, name)
  57. def groupby(self, by=None, axis=0, level=None, as_index=True):
  58. """
  59. Group series using mapper (dict or key function, apply given function
  60. to group, return result as series) or by a series of columns
  61. Parameters
  62. ----------
  63. by : mapping function / list of functions, dict, Series, or tuple /
  64. list of column names.
  65. Called on each element of the object index to determine the groups.
  66. If a dict or Series is passed, the Series or dict VALUES will be
  67. used to determine the groups
  68. axis : int, default 0
  69. level : int, level name, or sequence of such, default None
  70. If the axis is a MultiIndex (hierarchical), group by a particular
  71. level or levels
  72. as_index : boolean, default True
  73. For aggregated output, return object with group labels as the
  74. index. Only relevant for DataFrame input. as_index=False is
  75. effectively "SQL-style" grouped output
  76. Examples
  77. --------
  78. # DataFrame result
  79. >>> data.groupby(func, axis=0).mean()
  80. # DataFrame result
  81. >>> data.groupby(['col1', 'col2'])['col3'].mean()
  82. # DataFrame with hierarchical index
  83. >>> data.groupby(['col1', 'col2']).mean()
  84. Returns
  85. -------
  86. GroupBy object
  87. """
  88. from pandas.core.groupby import groupby
  89. return groupby(self, by, axis=axis, level=level, as_index=as_index)
  90. def truncate(self, before=None, after=None):
  91. """Function truncate a sorted DataFrame / Series before and/or after
  92. some particular dates.
  93. Parameters
  94. ----------
  95. before : date
  96. Truncate before date
  97. after : date
  98. Truncate after date
  99. Returns
  100. -------
  101. truncated : type of caller
  102. """
  103. before = datetools.to_datetime(before)
  104. after = datetools.to_datetime(after)
  105. # returns view, want to copy
  106. return self.ix[before:after].copy()
  107. def select(self, crit, axis=0):
  108. """
  109. Return data corresponding to axis labels matching criteria
  110. Parameters
  111. ----------
  112. crit : function
  113. To be called on each index (label). Should return True or False
  114. axis : int
  115. Returns
  116. -------
  117. selection : type of caller
  118. """
  119. axis_name = self._get_axis_name(axis)
  120. axis = self._get_axis(axis)
  121. if len(axis) > 0:
  122. new_axis = axis[np.asarray([crit(label) for label in axis])]
  123. else:
  124. new_axis = axis
  125. return self.reindex(**{axis_name : new_axis})
  126. def drop(self, labels, axis=0):
  127. """
  128. Return new object with labels in requested axis removed
  129. Parameters
  130. ----------
  131. labels : array-like
  132. axis : int
  133. Returns
  134. -------
  135. dropped : type of caller
  136. """
  137. axis_name = self._get_axis_name(axis)
  138. axis = self._get_axis(axis)
  139. new_axis = axis.drop(labels)
  140. return self.reindex(**{axis_name : new_axis})
  141. def sort_index(self, axis=0, ascending=True):
  142. """
  143. Sort object by labels (along an axis)
  144. Parameters
  145. ----------
  146. axis : {0, 1}
  147. Sort index/rows versus columns
  148. ascending : boolean, default True
  149. Sort ascending vs. descending
  150. Returns
  151. -------
  152. sorted_obj : type of caller
  153. """
  154. axis = self._get_axis_number(axis)
  155. axis_name = self._get_axis_name(axis)
  156. labels = self._get_axis(axis)
  157. sort_index = labels.argsort()
  158. if not ascending:
  159. sort_index = sort_index[::-1]
  160. new_axis = labels.take(sort_index)
  161. return self.reindex(**{axis_name : new_axis})
  162. @property
  163. def ix(self):
  164. raise NotImplementedError
  165. def reindex(self, *args, **kwds):
  166. raise NotImplementedError
  167. class NDFrame(PandasObject):
  168. """
  169. N-dimensional analogue of DataFrame. Store multi-dimensional in a
  170. size-mutable, labeled data structure
  171. Parameters
  172. ----------
  173. data : BlockManager
  174. axes : list
  175. copy : boolean, default False
  176. """
  177. # kludge
  178. _default_stat_axis = 0
  179. def __init__(self, data, axes=None, copy=False, dtype=None):
  180. if dtype is not None:
  181. data = data.astype(dtype)
  182. self._data = data
  183. def astype(self, dtype):
  184. """
  185. Cast object to input numpy.dtype
  186. Parameters
  187. ----------
  188. dtype : numpy.dtype or Python type
  189. Returns
  190. -------
  191. casted : type of caller
  192. """
  193. return self._constructor(self._data, dtype=dtype)
  194. @property
  195. def _constructor(self):
  196. return NDFrame
  197. @property
  198. def axes(self):
  199. return self._data.axes
  200. def __repr__(self):
  201. return 'NDFrame'
  202. @property
  203. def values(self):
  204. return self._data.as_matrix()
  205. @property
  206. def ndim(self):
  207. return self._data.ndim
  208. #----------------------------------------------------------------------
  209. # Consolidation of internals
  210. def _consolidate_inplace(self):
  211. self._data = self._data.consolidate()
  212. def consolidate(self):
  213. """
  214. Compute NDFrame with "consolidated" internals (data of each dtype
  215. grouped together in a single ndarray). Mainly an internal API function,
  216. but available here to the savvy user
  217. Returns
  218. -------
  219. consolidated : type of caller
  220. """
  221. cons_data = self._data.consolidate()
  222. if cons_data is self._data:
  223. cons_data = cons_data.copy()
  224. return self._constructor(cons_data)
  225. @property
  226. def _is_mixed_type(self):
  227. self._consolidate_inplace()
  228. return len(self._data.blocks) > 1
  229. def _reindex_axis(self, new_index, fill_method, axis, copy):
  230. new_index = _ensure_index(new_index)
  231. cur_axis = self._data.axes[axis]
  232. if cur_axis.equals(new_index) and not copy:
  233. return self
  234. if axis == 0:
  235. new_data = self._data.reindex_items(new_index)
  236. else:
  237. new_data = self._data.reindex_axis(new_index, axis=axis,
  238. method=fill_method)
  239. return self._constructor(new_data)
  240. def cumsum(self, axis=None, skipna=True):
  241. """
  242. Return DataFrame of cumulative sums over requested axis.
  243. Parameters
  244. ----------
  245. axis : {0, 1}
  246. 0 for row-wise, 1 for column-wise
  247. skipna : boolean, default True
  248. Exclude NA/null values. If an entire row/column is NA, the result
  249. will be NA
  250. Returns
  251. -------
  252. y : DataFrame
  253. """
  254. if axis is None:
  255. axis = self._default_stat_axis
  256. else:
  257. axis = self._get_axis_number(axis)
  258. y = self.values.copy()
  259. if not issubclass(y.dtype.type, np.integer):
  260. mask = np.isnan(self.values)
  261. if skipna:
  262. np.putmask(y, mask, 0.)
  263. result = y.cumsum(axis)
  264. if skipna:
  265. np.putmask(result, mask, np.nan)
  266. else:
  267. result = y.cumsum(axis)
  268. return self._wrap_array(result, self.axes, copy=False)
  269. def _wrap_array(self, array, axes, copy=False):
  270. raise NotImplementedError
  271. def cumprod(self, axis=None, skipna=True):
  272. """
  273. Return cumulative product over requested axis as DataFrame
  274. Parameters
  275. ----------
  276. axis : {0, 1}
  277. 0 for row-wise, 1 for column-wise
  278. skipna : boolean, default True
  279. Exclude NA/null values. If an entire row/column is NA, the result
  280. will be NA
  281. Returns
  282. -------
  283. y : DataFrame
  284. """
  285. if axis is None:
  286. axis = self._default_stat_axis
  287. else:
  288. axis = self._get_axis_number(axis)
  289. y = self.values.copy()
  290. if not issubclass(y.dtype.type, np.integer):
  291. mask = np.isnan(self.values)
  292. if skipna:
  293. np.putmask(y, mask, 1.)
  294. result = y.cumprod(axis)
  295. if skipna:
  296. np.putmask(result, mask, np.nan)
  297. else:
  298. result = y.cumprod(axis)
  299. return self._wrap_array(result, self.axes, copy=False)
  300. def _values_aggregate(self, func, axis, fill_value, skipna=True):
  301. axis = self._get_axis_number(axis)
  302. values = self.values
  303. mask = np.isfinite(values)
  304. if skipna and fill_value is not None:
  305. values = values.copy()
  306. values[-mask] = fill_value
  307. result = func(values, axis=axis)
  308. count = mask.sum(axis=axis)
  309. result[count == 0] = np.NaN
  310. return result
  311. def copy(self, deep=True):
  312. """
  313. Make a copy of this object
  314. Parameters
  315. ----------
  316. deep : boolean, default True
  317. Make a deep copy, i.e. also copy data
  318. Returns
  319. -------
  320. copy : type of caller
  321. """
  322. data = self._data
  323. if deep:
  324. data = data.copy()
  325. return self._constructor(data)
  326. def swaplevel(self, i, j, axis=0):
  327. """
  328. Swap levels i and j in a MultiIndex on a particular axis
  329. Returns
  330. -------
  331. swapped : type of caller (new object)
  332. """
  333. axis = self._get_axis_number(axis)
  334. result = self.copy()
  335. labels = result._data.axes[axis]
  336. result._data.set_axis(axis, labels.swaplevel(i, j))
  337. return result
  338. def add_prefix(self, prefix):
  339. """
  340. Concatenate prefix string with panel items names.
  341. Parameters
  342. ----------
  343. prefix : string
  344. Returns
  345. -------
  346. with_prefix : type of caller
  347. """
  348. new_data = self._data.add_prefix(prefix)
  349. return self._constructor(new_data)
  350. def add_suffix(self, suffix):
  351. """
  352. Concatenate suffix string with panel items names
  353. Parameters
  354. ----------
  355. suffix : string
  356. Returns
  357. -------
  358. with_suffix : type of caller
  359. """
  360. new_data = self._data.add_suffix(suffix)
  361. return self._constructor(new_data)
  362. def rename_axis(self, mapper, axis=0, copy=True):
  363. """
  364. Alter index and / or columns using input function or functions.
  365. Function / dict values must be unique (1-to-1). Labels not contained in
  366. a dict / Series will be left as-is.
  367. Parameters
  368. ----------
  369. mapper : dict-like or function, optional
  370. axis : int, default 0
  371. copy : boolean, default True
  372. Also copy underlying data
  373. See also
  374. --------
  375. DataFrame.rename
  376. Returns
  377. -------
  378. renamed : type of caller
  379. """
  380. # should move this at some point
  381. from pandas.core.series import _get_rename_function
  382. mapper_f = _get_rename_function(mapper)
  383. if axis == 0:
  384. new_data = self._data.rename_items(mapper_f, copydata=copy)
  385. else:
  386. new_data = self._data.rename_axis(mapper_f, axis=axis)
  387. if copy:
  388. new_data = new_data.copy()
  389. return self._constructor(new_data)
  390. def take(self, indices, axis=0):
  391. """
  392. Analogous to ndarray.take
  393. Parameters
  394. ----------
  395. indices : list / array of ints
  396. axis : int, default 0
  397. Returns
  398. -------
  399. taken : type of caller
  400. """
  401. if axis == 0:
  402. labels = self._get_axis(axis)
  403. new_items = labels.take(indices)
  404. new_data = self._data.reindex_items(new_items)
  405. else:
  406. new_data = self._data.take(indices, axis=axis)
  407. return self._constructor(new_data)