PageRenderTime 70ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/generic.py

https://github.com/ara818/pandas
Python | 460 lines | 419 code | 19 blank | 22 comment | 13 complexity | 58b633e357af592e0a302451ed1f30c1 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import numpy as np
  2. import cPickle
  3. from pandas.core.index import Index, MultiIndex, _ensure_index
  4. import pandas.core.datetools as datetools
  5. #-------------------------------------------------------------------------------
  6. # Picklable mixin
  7. class Picklable(object):
  8. def save(self, fileName):
  9. f = open(fileName, 'wb')
  10. try:
  11. cPickle.dump(self, f, protocol=cPickle.HIGHEST_PROTOCOL)
  12. finally:
  13. f.close()
  14. @classmethod
  15. def load(cls, fileName):
  16. f = open(fileName, 'rb')
  17. try:
  18. return cPickle.load(f)
  19. finally:
  20. f.close()
  21. class PandasError(Exception):
  22. pass
  23. class AxisProperty(object):
  24. def __init__(self, axis=0):
  25. self.axis = axis
  26. def __get__(self, obj, type=None):
  27. data = getattr(obj, '_data')
  28. return data.axes[self.axis]
  29. def __set__(self, obj, value):
  30. data = getattr(obj, '_data')
  31. data.set_axis(self.axis, value)
  32. class PandasObject(Picklable):
  33. _AXIS_NUMBERS = {
  34. 'index' : 0,
  35. 'columns' : 1
  36. }
  37. _AXIS_ALIASES = {}
  38. _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems())
  39. #----------------------------------------------------------------------
  40. # Axis name business
  41. @classmethod
  42. def _get_axis_number(cls, axis):
  43. axis = cls._AXIS_ALIASES.get(axis, axis)
  44. if isinstance(axis, int):
  45. if axis in cls._AXIS_NAMES:
  46. return axis
  47. else:
  48. raise Exception('No %d axis' % axis)
  49. else:
  50. return cls._AXIS_NUMBERS[axis]
  51. @classmethod
  52. def _get_axis_name(cls, axis):
  53. axis = cls._AXIS_ALIASES.get(axis, axis)
  54. if isinstance(axis, basestring):
  55. if axis in cls._AXIS_NUMBERS:
  56. return axis
  57. else:
  58. raise Exception('No axis named %s' % axis)
  59. else:
  60. return cls._AXIS_NAMES[axis]
  61. def _get_axis(self, axis):
  62. name = self._get_axis_name(axis)
  63. return getattr(self, name)
  64. def groupby(self, by=None, axis=0, level=None, as_index=True):
  65. """
  66. Group series using mapper (dict or key function, apply given function
  67. to group, return result as series) or by a series of columns
  68. Parameters
  69. ----------
  70. by : mapping function / list of functions, dict, Series, or tuple /
  71. list of column names.
  72. Called on each element of the object index to determine the groups.
  73. If a dict or Series is passed, the Series or dict VALUES will be
  74. used to determine the groups
  75. axis : int, default 0
  76. level : int, default None
  77. If the axis is a MultiIndex (hierarchical), group by a particular
  78. level
  79. as_index : boolean, default True
  80. For aggregated output, return object with group labels as the
  81. index. Only relevant for DataFrame input. as_index=False is
  82. effectively "SQL-style" grouped output
  83. Examples
  84. --------
  85. # DataFrame result
  86. >>> data.groupby(func, axis=0).mean()
  87. # DataFrame result
  88. >>> data.groupby(['col1', 'col2'])['col3'].mean()
  89. # DataFrame with hierarchical index
  90. >>> data.groupby(['col1', 'col2']).mean()
  91. Returns
  92. -------
  93. GroupBy object
  94. """
  95. from pandas.core.groupby import groupby
  96. return groupby(self, by, axis=axis, level=level, as_index=as_index)
  97. def truncate(self, before=None, after=None):
  98. """Function truncate a sorted DataFrame / Series before and/or after
  99. some particular dates.
  100. Parameters
  101. ----------
  102. before : date
  103. Truncate before date
  104. after : date
  105. Truncate after date
  106. Returns
  107. -------
  108. truncated : type of caller
  109. """
  110. before = datetools.to_datetime(before)
  111. after = datetools.to_datetime(after)
  112. # returns view, want to copy
  113. return self.ix[before:after].copy()
  114. def select(self, crit, axis=0):
  115. """
  116. Return data corresponding to axis labels matching criteria
  117. Parameters
  118. ----------
  119. crit : function
  120. To be called on each index (label). Should return True or False
  121. axis : int
  122. Returns
  123. -------
  124. selection : type of caller
  125. """
  126. axis_name = self._get_axis_name(axis)
  127. axis = self._get_axis(axis)
  128. if len(axis) > 0:
  129. new_axis = axis[np.asarray([crit(label) for label in axis])]
  130. else:
  131. new_axis = axis
  132. return self.reindex(**{axis_name : new_axis})
  133. def drop(self, labels, axis=0):
  134. """
  135. Return new object with labels in requested axis removed
  136. Parameters
  137. ----------
  138. labels : array-like
  139. axis : int
  140. Returns
  141. -------
  142. dropped : type of caller
  143. """
  144. axis_name = self._get_axis_name(axis)
  145. axis = self._get_axis(axis)
  146. new_axis = axis.drop(labels)
  147. return self.reindex(**{axis_name : new_axis})
  148. def sort_index(self, axis=0, ascending=True):
  149. """
  150. Sort object by labels (along an axis)
  151. Parameters
  152. ----------
  153. axis : {0, 1}
  154. Sort index/rows versus columns
  155. ascending : boolean, default True
  156. Sort ascending vs. descending
  157. Returns
  158. -------
  159. sorted_obj : type of caller
  160. """
  161. axis = self._get_axis_number(axis)
  162. axis_name = self._get_axis_name(axis)
  163. labels = self._get_axis(axis)
  164. sort_index = labels.argsort()
  165. if not ascending:
  166. sort_index = sort_index[::-1]
  167. new_axis = labels.take(sort_index)
  168. return self.reindex(**{axis_name : new_axis})
  169. @property
  170. def ix(self):
  171. raise NotImplementedError
  172. def reindex(self, **kwds):
  173. raise NotImplementedError
  174. class NDFrame(PandasObject):
  175. """
  176. N-dimensional analogue of DataFrame. Store multi-dimensional in a
  177. size-mutable, labeled data structure
  178. Parameters
  179. ----------
  180. data : BlockManager
  181. axes : list
  182. copy : boolean, default False
  183. """
  184. # kludge
  185. _default_stat_axis = 0
  186. def __init__(self, data, axes=None, copy=False, dtype=None):
  187. if dtype is not None:
  188. data = data.astype(dtype)
  189. self._data = data
  190. def astype(self, dtype):
  191. """
  192. Cast object to input numpy.dtype
  193. Parameters
  194. ----------
  195. dtype : numpy.dtype or Python type
  196. Returns
  197. -------
  198. casted : type of caller
  199. """
  200. return self._constructor(self._data, dtype=dtype)
  201. @property
  202. def _constructor(self):
  203. return NDFrame
  204. @property
  205. def axes(self):
  206. return self._data.axes
  207. def __repr__(self):
  208. return 'NDFrame'
  209. @property
  210. def values(self):
  211. return self._data.as_matrix()
  212. @property
  213. def ndim(self):
  214. return self._data.ndim
  215. #----------------------------------------------------------------------
  216. # Consolidation of internals
  217. def _consolidate_inplace(self):
  218. self._data = self._data.consolidate()
  219. def consolidate(self):
  220. """
  221. Compute NDFrame with "consolidated" internals (data of each dtype
  222. grouped together in a single ndarray). Mainly an internal API function,
  223. but available here to the savvy user
  224. Returns
  225. -------
  226. consolidated : type of caller
  227. """
  228. cons_data = self._data.consolidate()
  229. if cons_data is self._data:
  230. cons_data = cons_data.copy()
  231. return self._constructor(cons_data)
  232. @property
  233. def _is_mixed_type(self):
  234. self._consolidate_inplace()
  235. return len(self._data.blocks) > 1
  236. def _reindex_axis(self, new_index, fill_method, axis, copy):
  237. new_index = _ensure_index(new_index)
  238. cur_axis = self._data.axes[axis]
  239. if cur_axis.equals(new_index) and not copy:
  240. return self
  241. if axis == 0:
  242. new_data = self._data.reindex_items(new_index)
  243. else:
  244. new_data = self._data.reindex_axis(new_index, axis=axis,
  245. method=fill_method)
  246. return self._constructor(new_data)
  247. def cumsum(self, axis=None, skipna=True):
  248. """
  249. Return DataFrame of cumulative sums over requested axis.
  250. Parameters
  251. ----------
  252. axis : {0, 1}
  253. 0 for row-wise, 1 for column-wise
  254. skipna : boolean, default True
  255. Exclude NA/null values. If an entire row/column is NA, the result
  256. will be NA
  257. Returns
  258. -------
  259. y : DataFrame
  260. """
  261. if axis is None:
  262. axis = self._default_stat_axis
  263. else:
  264. axis = self._get_axis_number(axis)
  265. y = self.values.copy()
  266. if not issubclass(y.dtype.type, np.int_):
  267. mask = np.isnan(self.values)
  268. if skipna:
  269. np.putmask(y, mask, 0.)
  270. result = y.cumsum(axis)
  271. if skipna:
  272. np.putmask(result, mask, np.nan)
  273. else:
  274. result = y.cumsum(axis)
  275. return self._wrap_array(result, self.axes, copy=False)
  276. def _wrap_array(self, array, axes, copy=False):
  277. raise NotImplementedError
  278. def cumprod(self, axis=None, skipna=True):
  279. """
  280. Return cumulative product over requested axis as DataFrame
  281. Parameters
  282. ----------
  283. axis : {0, 1}
  284. 0 for row-wise, 1 for column-wise
  285. skipna : boolean, default True
  286. Exclude NA/null values. If an entire row/column is NA, the result
  287. will be NA
  288. Returns
  289. -------
  290. y : DataFrame
  291. """
  292. if axis is None:
  293. axis = self._default_stat_axis
  294. else:
  295. axis = self._get_axis_number(axis)
  296. y = self.values.copy()
  297. if not issubclass(y.dtype.type, np.int_):
  298. mask = np.isnan(self.values)
  299. if skipna:
  300. np.putmask(y, mask, 1.)
  301. result = y.cumprod(axis)
  302. if skipna:
  303. np.putmask(result, mask, np.nan)
  304. else:
  305. result = y.cumprod(axis)
  306. return self._wrap_array(result, self.axes, copy=False)
  307. def _values_aggregate(self, func, axis, fill_value, skipna=True):
  308. axis = self._get_axis_number(axis)
  309. values = self.values
  310. mask = np.isfinite(values)
  311. if skipna and fill_value is not None:
  312. values = values.copy()
  313. values[-mask] = fill_value
  314. result = func(values, axis=axis)
  315. count = mask.sum(axis=axis)
  316. result[count == 0] = np.NaN
  317. return result
  318. def copy(self, deep=True):
  319. """
  320. Make a copy of this object
  321. Parameters
  322. ----------
  323. deep : boolean, default True
  324. Make a deep copy, i.e. also copy data
  325. Returns
  326. -------
  327. copy : type of caller
  328. """
  329. data = self._data
  330. if deep:
  331. data = data.copy()
  332. return self._constructor(data)
  333. def swaplevel(self, i, j, axis=0):
  334. """
  335. Swap levels i and j in a MultiIndex on a particular axis
  336. Returns
  337. -------
  338. swapped : type of caller (new object)
  339. """
  340. axis = self._get_axis_number(axis)
  341. result = self.copy()
  342. labels = result._data.axes[axis]
  343. result._data.set_axis(axis, labels.swaplevel(i, j))
  344. return result
  345. def add_prefix(self, prefix):
  346. """
  347. Concatenate prefix string with panel items names.
  348. Parameters
  349. ----------
  350. prefix : string
  351. Returns
  352. -------
  353. with_prefix : type of caller
  354. """
  355. new_data = self._data.add_prefix(prefix)
  356. return self._constructor(new_data)
  357. def add_suffix(self, suffix):
  358. """
  359. Concatenate suffix string with panel items names
  360. Parameters
  361. ----------
  362. suffix : string
  363. Returns
  364. -------
  365. with_suffix : type of caller
  366. """
  367. new_data = self._data.add_suffix(suffix)
  368. return self._constructor(new_data)