PageRenderTime 62ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/panel.py

http://github.com/pydata/pandas
Python | 1451 lines | 1406 code | 7 blank | 38 comment | 11 complexity | d5671717649f68b7b677c9823118dab5 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. """
  2. Contains data structures designed for manipulating panel (3-dimensional) data
  3. """
  4. # pylint: disable=E1103,W0231,W0212,W0621
  5. from __future__ import division
  6. from pandas.compat import (map, zip, range, lrange, lmap, u, OrderedDict,
  7. OrderedDefaultdict)
  8. from pandas import compat
  9. import sys
  10. import numpy as np
  11. from pandas.core.common import (PandasError, _try_sort, _default_index,
  12. _infer_dtype_from_scalar, notnull)
  13. from pandas.core.categorical import Categorical
  14. from pandas.core.index import (Index, MultiIndex, _ensure_index,
  15. _get_combined_index)
  16. from pandas.core.indexing import _maybe_droplevels, _is_list_like
  17. from pandas.core.internals import (BlockManager,
  18. create_block_manager_from_arrays,
  19. create_block_manager_from_blocks)
  20. from pandas.core.series import Series
  21. from pandas.core.frame import DataFrame
  22. from pandas.core.generic import NDFrame, _shared_docs
  23. from pandas.tools.util import cartesian_product
  24. from pandas import compat
  25. from pandas.util.decorators import (deprecate, Appender, Substitution,
  26. deprecate_kwarg)
  27. import pandas.core.common as com
  28. import pandas.core.ops as ops
  29. import pandas.core.nanops as nanops
  30. import pandas.computation.expressions as expressions
  31. _shared_doc_kwargs = dict(
  32. axes='items, major_axis, minor_axis',
  33. klass="Panel",
  34. axes_single_arg="{0,1,2,'items','major_axis','minor_axis'}")
  35. _shared_doc_kwargs['args_transpose'] = ("three positional arguments: each one"
  36. "of\n %s" %
  37. _shared_doc_kwargs['axes_single_arg'])
  38. def _ensure_like_indices(time, panels):
  39. """
  40. Makes sure that time and panels are conformable
  41. """
  42. n_time = len(time)
  43. n_panel = len(panels)
  44. u_panels = np.unique(panels) # this sorts!
  45. u_time = np.unique(time)
  46. if len(u_time) == n_time:
  47. time = np.tile(u_time, len(u_panels))
  48. if len(u_panels) == n_panel:
  49. panels = np.repeat(u_panels, len(u_time))
  50. return time, panels
  51. def panel_index(time, panels, names=['time', 'panel']):
  52. """
  53. Returns a multi-index suitable for a panel-like DataFrame
  54. Parameters
  55. ----------
  56. time : array-like
  57. Time index, does not have to repeat
  58. panels : array-like
  59. Panel index, does not have to repeat
  60. names : list, optional
  61. List containing the names of the indices
  62. Returns
  63. -------
  64. multi_index : MultiIndex
  65. Time index is the first level, the panels are the second level.
  66. Examples
  67. --------
  68. >>> years = range(1960,1963)
  69. >>> panels = ['A', 'B', 'C']
  70. >>> panel_idx = panel_index(years, panels)
  71. >>> panel_idx
  72. MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'),
  73. (1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'),
  74. (1962, 'C')], dtype=object)
  75. or
  76. >>> import numpy as np
  77. >>> years = np.repeat(range(1960,1963), 3)
  78. >>> panels = np.tile(['A', 'B', 'C'], 3)
  79. >>> panel_idx = panel_index(years, panels)
  80. >>> panel_idx
  81. MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'),
  82. (1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'),
  83. (1962, 'C')], dtype=object)
  84. """
  85. time, panels = _ensure_like_indices(time, panels)
  86. time_factor = Categorical.from_array(time)
  87. panel_factor = Categorical.from_array(panels)
  88. labels = [time_factor.labels, panel_factor.labels]
  89. levels = [time_factor.levels, panel_factor.levels]
  90. return MultiIndex(levels, labels, sortorder=None, names=names,
  91. verify_integrity=False)
  92. class Panel(NDFrame):
  93. """
  94. Represents wide format panel data, stored as 3-dimensional array
  95. Parameters
  96. ----------
  97. data : ndarray (items x major x minor), or dict of DataFrames
  98. items : Index or array-like
  99. axis=0
  100. major_axis : Index or array-like
  101. axis=1
  102. minor_axis : Index or array-like
  103. axis=2
  104. dtype : dtype, default None
  105. Data type to force, otherwise infer
  106. copy : boolean, default False
  107. Copy data from inputs. Only affects DataFrame / 2d ndarray input
  108. """
  109. @property
  110. def _constructor(self):
  111. return type(self)
  112. _constructor_sliced = DataFrame
  113. def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
  114. copy=False, dtype=None):
  115. self._init_data(data=data, items=items, major_axis=major_axis,
  116. minor_axis=minor_axis, copy=copy, dtype=dtype)
  117. def _init_data(self, data, copy, dtype, **kwargs):
  118. """
  119. Generate ND initialization; axes are passed
  120. as required objects to __init__
  121. """
  122. if data is None:
  123. data = {}
  124. if dtype is not None:
  125. dtype = self._validate_dtype(dtype)
  126. passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS]
  127. axes = None
  128. if isinstance(data, BlockManager):
  129. if any(x is not None for x in passed_axes):
  130. axes = [x if x is not None else y
  131. for x, y in zip(passed_axes, data.axes)]
  132. mgr = data
  133. elif isinstance(data, dict):
  134. mgr = self._init_dict(data, passed_axes, dtype=dtype)
  135. copy = False
  136. dtype = None
  137. elif isinstance(data, (np.ndarray, list)):
  138. mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
  139. copy = False
  140. dtype = None
  141. else: # pragma: no cover
  142. raise PandasError('Panel constructor not properly called!')
  143. NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
  144. def _init_dict(self, data, axes, dtype=None):
  145. haxis = axes.pop(self._info_axis_number)
  146. # prefilter if haxis passed
  147. if haxis is not None:
  148. haxis = _ensure_index(haxis)
  149. data = OrderedDict((k, v) for k, v
  150. in compat.iteritems(data) if k in haxis)
  151. else:
  152. ks = list(data.keys())
  153. if not isinstance(data, OrderedDict):
  154. ks = _try_sort(ks)
  155. haxis = Index(ks)
  156. for k, v in compat.iteritems(data):
  157. if isinstance(v, dict):
  158. data[k] = self._constructor_sliced(v)
  159. # extract axis for remaining axes & create the slicemap
  160. raxes = [self._extract_axis(self, data, axis=i)
  161. if a is None else a for i, a in enumerate(axes)]
  162. raxes_sm = self._extract_axes_for_slice(self, raxes)
  163. # shallow copy
  164. arrays = []
  165. haxis_shape = [len(a) for a in raxes]
  166. for h in haxis:
  167. v = values = data.get(h)
  168. if v is None:
  169. values = np.empty(haxis_shape, dtype=dtype)
  170. values.fill(np.nan)
  171. elif isinstance(v, self._constructor_sliced):
  172. d = raxes_sm.copy()
  173. d['copy'] = False
  174. v = v.reindex(**d)
  175. if dtype is not None:
  176. v = v.astype(dtype)
  177. values = v.values
  178. arrays.append(values)
  179. return self._init_arrays(arrays, haxis, [haxis] + raxes)
  180. def _init_arrays(self, arrays, arr_names, axes):
  181. return create_block_manager_from_arrays(arrays, arr_names, axes)
  182. @classmethod
  183. def from_dict(cls, data, intersect=False, orient='items', dtype=None):
  184. """
  185. Construct Panel from dict of DataFrame objects
  186. Parameters
  187. ----------
  188. data : dict
  189. {field : DataFrame}
  190. intersect : boolean
  191. Intersect indexes of input DataFrames
  192. orient : {'items', 'minor'}, default 'items'
  193. The "orientation" of the data. If the keys of the passed dict
  194. should be the items of the result panel, pass 'items'
  195. (default). Otherwise if the columns of the values of the passed
  196. DataFrame objects should be the items (which in the case of
  197. mixed-dtype data you should do), instead pass 'minor'
  198. Returns
  199. -------
  200. Panel
  201. """
  202. orient = orient.lower()
  203. if orient == 'minor':
  204. new_data = OrderedDefaultdict(dict)
  205. for col, df in compat.iteritems(data):
  206. for item, s in compat.iteritems(df):
  207. new_data[item][col] = s
  208. data = new_data
  209. elif orient != 'items': # pragma: no cover
  210. raise ValueError('Orientation must be one of {items, minor}.')
  211. d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype)
  212. ks = list(d['data'].keys())
  213. if not isinstance(d['data'], OrderedDict):
  214. ks = list(sorted(ks))
  215. d[cls._info_axis_name] = Index(ks)
  216. return cls(**d)
  217. def __getitem__(self, key):
  218. if isinstance(self._info_axis, MultiIndex):
  219. return self._getitem_multilevel(key)
  220. return super(Panel, self).__getitem__(key)
  221. def _getitem_multilevel(self, key):
  222. info = self._info_axis
  223. loc = info.get_loc(key)
  224. if isinstance(loc, (slice, np.ndarray)):
  225. new_index = info[loc]
  226. result_index = _maybe_droplevels(new_index, key)
  227. slices = [loc] + [slice(None) for x in range(
  228. self._AXIS_LEN - 1)]
  229. new_values = self.values[slices]
  230. d = self._construct_axes_dict(self._AXIS_ORDERS[1:])
  231. d[self._info_axis_name] = result_index
  232. result = self._constructor(new_values, **d)
  233. return result
  234. else:
  235. return self._get_item_cache(key)
  236. def _init_matrix(self, data, axes, dtype=None, copy=False):
  237. values = self._prep_ndarray(self, data, copy=copy)
  238. if dtype is not None:
  239. try:
  240. values = values.astype(dtype)
  241. except Exception:
  242. raise ValueError('failed to cast to %s' % dtype)
  243. shape = values.shape
  244. fixed_axes = []
  245. for i, ax in enumerate(axes):
  246. if ax is None:
  247. ax = _default_index(shape[i])
  248. else:
  249. ax = _ensure_index(ax)
  250. fixed_axes.append(ax)
  251. return create_block_manager_from_blocks([values], fixed_axes)
  252. #----------------------------------------------------------------------
  253. # Comparison methods
  254. def _compare_constructor(self, other, func):
  255. if not self._indexed_same(other):
  256. raise Exception('Can only compare identically-labeled '
  257. 'same type objects')
  258. new_data = {}
  259. for col in self._info_axis:
  260. new_data[col] = func(self[col], other[col])
  261. d = self._construct_axes_dict(copy=False)
  262. return self._constructor(data=new_data, **d)
  263. #----------------------------------------------------------------------
  264. # Magic methods
  265. def __unicode__(self):
  266. """
  267. Return a string representation for a particular Panel
  268. Invoked by unicode(df) in py2 only.
  269. Yields a Unicode String in both py2/py3.
  270. """
  271. class_name = str(self.__class__)
  272. shape = self.shape
  273. dims = u('Dimensions: %s') % ' x '.join(
  274. ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)])
  275. def axis_pretty(a):
  276. v = getattr(self, a)
  277. if len(v) > 0:
  278. return u('%s axis: %s to %s') % (a.capitalize(),
  279. com.pprint_thing(v[0]),
  280. com.pprint_thing(v[-1]))
  281. else:
  282. return u('%s axis: None') % a.capitalize()
  283. output = '\n'.join(
  284. [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS])
  285. return output
  286. def _get_plane_axes_index(self, axis):
  287. """
  288. Get my plane axes indexes: these are already
  289. (as compared with higher level planes),
  290. as we are returning a DataFrame axes indexes
  291. """
  292. axis_name = self._get_axis_name(axis)
  293. if axis_name == 'major_axis':
  294. index = 'minor_axis'
  295. columns = 'items'
  296. if axis_name == 'minor_axis':
  297. index = 'major_axis'
  298. columns = 'items'
  299. elif axis_name == 'items':
  300. index = 'major_axis'
  301. columns = 'minor_axis'
  302. return index, columns
  303. def _get_plane_axes(self, axis):
  304. """
  305. Get my plane axes indexes: these are already
  306. (as compared with higher level planes),
  307. as we are returning a DataFrame axes
  308. """
  309. return [ self._get_axis(axi) for axi in self._get_plane_axes_index(axis) ]
  310. fromDict = from_dict
  311. def to_sparse(self, fill_value=None, kind='block'):
  312. """
  313. Convert to SparsePanel
  314. Parameters
  315. ----------
  316. fill_value : float, default NaN
  317. kind : {'block', 'integer'}
  318. Returns
  319. -------
  320. y : SparseDataFrame
  321. """
  322. from pandas.core.sparse import SparsePanel
  323. frames = dict(compat.iteritems(self))
  324. return SparsePanel(frames, items=self.items,
  325. major_axis=self.major_axis,
  326. minor_axis=self.minor_axis,
  327. default_kind=kind,
  328. default_fill_value=fill_value)
  329. def to_excel(self, path, na_rep='', engine=None, **kwargs):
  330. """
  331. Write each DataFrame in Panel to a separate excel sheet
  332. Parameters
  333. ----------
  334. path : string or ExcelWriter object
  335. File path or existing ExcelWriter
  336. na_rep : string, default ''
  337. Missing data representation
  338. engine : string, default None
  339. write engine to use - you can also set this via the options
  340. ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
  341. ``io.excel.xlsm.writer``.
  342. Other Parameters
  343. ----------------
  344. float_format : string, default None
  345. Format string for floating point numbers
  346. cols : sequence, optional
  347. Columns to write
  348. header : boolean or list of string, default True
  349. Write out column names. If a list of string is given it is
  350. assumed to be aliases for the column names
  351. index : boolean, default True
  352. Write row names (index)
  353. index_label : string or sequence, default None
  354. Column label for index column(s) if desired. If None is given, and
  355. `header` and `index` are True, then the index names are used. A
  356. sequence should be given if the DataFrame uses MultiIndex.
  357. startrow : upper left cell row to dump data frame
  358. startcol : upper left cell column to dump data frame
  359. Notes
  360. -----
  361. Keyword arguments (and na_rep) are passed to the ``to_excel`` method
  362. for each DataFrame written.
  363. """
  364. from pandas.io.excel import ExcelWriter
  365. if isinstance(path, compat.string_types):
  366. writer = ExcelWriter(path, engine=engine)
  367. else:
  368. writer = path
  369. kwargs['na_rep'] = na_rep
  370. for item, df in compat.iteritems(self):
  371. name = str(item)
  372. df.to_excel(writer, name, **kwargs)
  373. writer.save()
  374. def as_matrix(self):
  375. self._consolidate_inplace()
  376. return self._data.as_matrix()
  377. #----------------------------------------------------------------------
  378. # Getting and setting elements
  379. def get_value(self, *args, **kwargs):
  380. """
  381. Quickly retrieve single value at (item, major, minor) location
  382. Parameters
  383. ----------
  384. item : item label (panel item)
  385. major : major axis label (panel item row)
  386. minor : minor axis label (panel item column)
  387. takeable : interpret the passed labels as indexers, default False
  388. Returns
  389. -------
  390. value : scalar value
  391. """
  392. nargs = len(args)
  393. nreq = self._AXIS_LEN
  394. # require an arg for each axis
  395. if nargs != nreq:
  396. raise TypeError('There must be an argument for each axis, you gave'
  397. ' {0} args, but {1} are required'.format(nargs,
  398. nreq))
  399. takeable = kwargs.get('takeable')
  400. if takeable is True:
  401. lower = self._iget_item_cache(args[0])
  402. else:
  403. lower = self._get_item_cache(args[0])
  404. return lower.get_value(*args[1:], takeable=takeable)
  405. def set_value(self, *args, **kwargs):
  406. """
  407. Quickly set single value at (item, major, minor) location
  408. Parameters
  409. ----------
  410. item : item label (panel item)
  411. major : major axis label (panel item row)
  412. minor : minor axis label (panel item column)
  413. value : scalar
  414. takeable : interpret the passed labels as indexers, default False
  415. Returns
  416. -------
  417. panel : Panel
  418. If label combo is contained, will be reference to calling Panel,
  419. otherwise a new object
  420. """
  421. # require an arg for each axis and the value
  422. nargs = len(args)
  423. nreq = self._AXIS_LEN + 1
  424. if nargs != nreq:
  425. raise TypeError('There must be an argument for each axis plus the '
  426. 'value provided, you gave {0} args, but {1} are '
  427. 'required'.format(nargs, nreq))
  428. takeable = kwargs.get('takeable')
  429. try:
  430. if takeable is True:
  431. lower = self._iget_item_cache(args[0])
  432. else:
  433. lower = self._get_item_cache(args[0])
  434. lower.set_value(*args[1:], takeable=takeable)
  435. return self
  436. except KeyError:
  437. axes = self._expand_axes(args)
  438. d = self._construct_axes_dict_from(self, axes, copy=False)
  439. result = self.reindex(**d)
  440. args = list(args)
  441. likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1])
  442. made_bigger = not np.array_equal(
  443. axes[0], self._info_axis)
  444. # how to make this logic simpler?
  445. if made_bigger:
  446. com._possibly_cast_item(result, args[0], likely_dtype)
  447. return result.set_value(*args)
  448. def _box_item_values(self, key, values):
  449. if self.ndim == values.ndim:
  450. result = self._constructor(values)
  451. # a dup selection will yield a full ndim
  452. if result._get_axis(0).is_unique:
  453. result = result[key]
  454. return result
  455. d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])
  456. return self._constructor_sliced(values, **d)
  457. def __setitem__(self, key, value):
  458. shape = tuple(self.shape)
  459. if isinstance(value, self._constructor_sliced):
  460. value = value.reindex(
  461. **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
  462. mat = value.values
  463. elif isinstance(value, np.ndarray):
  464. if value.shape != shape[1:]:
  465. raise ValueError(
  466. 'shape of value must be {0}, shape of given object was '
  467. '{1}'.format(shape[1:], tuple(map(int, value.shape))))
  468. mat = np.asarray(value)
  469. elif np.isscalar(value):
  470. dtype, value = _infer_dtype_from_scalar(value)
  471. mat = np.empty(shape[1:], dtype=dtype)
  472. mat.fill(value)
  473. else:
  474. raise TypeError('Cannot set item of type: %s' % str(type(value)))
  475. mat = mat.reshape(tuple([1]) + shape[1:])
  476. NDFrame._set_item(self, key, mat)
  477. def _unpickle_panel_compat(self, state): # pragma: no cover
  478. "Unpickle the panel"
  479. _unpickle = com._unpickle_array
  480. vals, items, major, minor = state
  481. items = _unpickle(items)
  482. major = _unpickle(major)
  483. minor = _unpickle(minor)
  484. values = _unpickle(vals)
  485. wp = Panel(values, items, major, minor)
  486. self._data = wp._data
  487. def conform(self, frame, axis='items'):
  488. """
  489. Conform input DataFrame to align with chosen axis pair.
  490. Parameters
  491. ----------
  492. frame : DataFrame
  493. axis : {'items', 'major', 'minor'}
  494. Axis the input corresponds to. E.g., if axis='major', then
  495. the frame's columns would be items, and the index would be
  496. values of the minor axis
  497. Returns
  498. -------
  499. DataFrame
  500. """
  501. axes = self._get_plane_axes(axis)
  502. return frame.reindex(**self._extract_axes_for_slice(self, axes))
  503. def head(self, n=5):
  504. raise NotImplementedError
  505. def tail(self, n=5):
  506. raise NotImplementedError
  507. def _needs_reindex_multi(self, axes, method, level):
  508. """ don't allow a multi reindex on Panel or above ndim """
  509. return False
  510. def dropna(self, axis=0, how='any', inplace=False, **kwargs):
  511. """
  512. Drop 2D from panel, holding passed axis constant
  513. Parameters
  514. ----------
  515. axis : int, default 0
  516. Axis to hold constant. E.g. axis=1 will drop major_axis entries
  517. having a certain amount of NA data
  518. how : {'all', 'any'}, default 'any'
  519. 'any': one or more values are NA in the DataFrame along the
  520. axis. For 'all' they all must be.
  521. inplace : bool, default False
  522. If True, do operation inplace and return None.
  523. Returns
  524. -------
  525. dropped : Panel
  526. """
  527. axis = self._get_axis_number(axis)
  528. values = self.values
  529. mask = com.notnull(values)
  530. for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))):
  531. mask = mask.sum(ax)
  532. per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:])
  533. if how == 'all':
  534. cond = mask > 0
  535. else:
  536. cond = mask == per_slice
  537. new_ax = self._get_axis(axis)[cond]
  538. result = self.reindex_axis(new_ax, axis=axis)
  539. if inplace:
  540. self._update_inplace(result)
  541. else:
  542. return result
  543. def _combine(self, other, func, axis=0):
  544. if isinstance(other, Panel):
  545. return self._combine_panel(other, func)
  546. elif isinstance(other, DataFrame):
  547. return self._combine_frame(other, func, axis=axis)
  548. elif np.isscalar(other):
  549. return self._combine_const(other, func)
  550. def _combine_const(self, other, func):
  551. new_values = func(self.values, other)
  552. d = self._construct_axes_dict()
  553. return self._constructor(new_values, **d)
  554. def _combine_frame(self, other, func, axis=0):
  555. index, columns = self._get_plane_axes(axis)
  556. axis = self._get_axis_number(axis)
  557. other = other.reindex(index=index, columns=columns)
  558. if axis == 0:
  559. new_values = func(self.values, other.values)
  560. elif axis == 1:
  561. new_values = func(self.values.swapaxes(0, 1), other.values.T)
  562. new_values = new_values.swapaxes(0, 1)
  563. elif axis == 2:
  564. new_values = func(self.values.swapaxes(0, 2), other.values)
  565. new_values = new_values.swapaxes(0, 2)
  566. return self._constructor(new_values, self.items, self.major_axis,
  567. self.minor_axis)
  568. def _combine_panel(self, other, func):
  569. items = self.items + other.items
  570. major = self.major_axis + other.major_axis
  571. minor = self.minor_axis + other.minor_axis
  572. # could check that everything's the same size, but forget it
  573. this = self.reindex(items=items, major=major, minor=minor)
  574. other = other.reindex(items=items, major=major, minor=minor)
  575. result_values = func(this.values, other.values)
  576. return self._constructor(result_values, items, major, minor)
  577. def major_xs(self, key, copy=None):
  578. """
  579. Return slice of panel along major axis
  580. Parameters
  581. ----------
  582. key : object
  583. Major axis label
  584. copy : boolean [deprecated]
  585. Whether to make a copy of the data
  586. Returns
  587. -------
  588. y : DataFrame
  589. index -> minor axis, columns -> items
  590. Notes
  591. -----
  592. major_xs is only for getting, not setting values.
  593. MultiIndex Slicers is a generic way to get/set values on any level or levels
  594. it is a superset of major_xs functionality, see :ref:`MultiIndex Slicers <indexing.mi_slicers>`
  595. """
  596. if copy is not None:
  597. warnings.warn("copy keyword is deprecated, "
  598. "default is to return a copy or a view if possible")
  599. return self.xs(key, axis=self._AXIS_LEN - 2)
  600. def minor_xs(self, key, copy=None):
  601. """
  602. Return slice of panel along minor axis
  603. Parameters
  604. ----------
  605. key : object
  606. Minor axis label
  607. copy : boolean [deprecated]
  608. Whether to make a copy of the data
  609. Returns
  610. -------
  611. y : DataFrame
  612. index -> major axis, columns -> items
  613. Notes
  614. -----
  615. minor_xs is only for getting, not setting values.
  616. MultiIndex Slicers is a generic way to get/set values on any level or levels
  617. it is a superset of minor_xs functionality, see :ref:`MultiIndex Slicers <indexing.mi_slicers>`
  618. """
  619. if copy is not None:
  620. warnings.warn("copy keyword is deprecated, "
  621. "default is to return a copy or a view if possible")
  622. return self.xs(key, axis=self._AXIS_LEN - 1)
  623. def xs(self, key, axis=1, copy=None):
  624. """
  625. Return slice of panel along selected axis
  626. Parameters
  627. ----------
  628. key : object
  629. Label
  630. axis : {'items', 'major', 'minor}, default 1/'major'
  631. copy : boolean [deprecated]
  632. Whether to make a copy of the data
  633. Returns
  634. -------
  635. y : ndim(self)-1
  636. Notes
  637. -----
  638. xs is only for getting, not setting values.
  639. MultiIndex Slicers is a generic way to get/set values on any level or levels
  640. it is a superset of xs functionality, see :ref:`MultiIndex Slicers <indexing.mi_slicers>`
  641. """
  642. if copy is not None:
  643. warnings.warn("copy keyword is deprecated, "
  644. "default is to return a copy or a view if possible")
  645. axis = self._get_axis_number(axis)
  646. if axis == 0:
  647. return self[key]
  648. self._consolidate_inplace()
  649. axis_number = self._get_axis_number(axis)
  650. new_data = self._data.xs(key, axis=axis_number, copy=False)
  651. result = self._construct_return_type(new_data)
  652. copy = new_data.is_mixed_type
  653. result._set_is_copy(self, copy=copy)
  654. return result
  655. _xs = xs
  656. def _ixs(self, i, axis=0):
  657. """
  658. i : int, slice, or sequence of integers
  659. axis : int
  660. """
  661. ax = self._get_axis(axis)
  662. key = ax[i]
  663. # xs cannot handle a non-scalar key, so just reindex here
  664. # if we have a multi-index and a single tuple, then its a reduction (GH 7516)
  665. if not (isinstance(ax, MultiIndex) and isinstance(key, tuple)):
  666. if _is_list_like(key):
  667. indexer = {self._get_axis_name(axis): key}
  668. return self.reindex(**indexer)
  669. # a reduction
  670. if axis == 0:
  671. values = self._data.iget(i)
  672. return self._box_item_values(key, values)
  673. # xs by position
  674. self._consolidate_inplace()
  675. new_data = self._data.xs(i, axis=axis, copy=True, takeable=True)
  676. return self._construct_return_type(new_data)
  677. def groupby(self, function, axis='major'):
  678. """
  679. Group data on given axis, returning GroupBy object
  680. Parameters
  681. ----------
  682. function : callable
  683. Mapping function for chosen access
  684. axis : {'major', 'minor', 'items'}, default 'major'
  685. Returns
  686. -------
  687. grouped : PanelGroupBy
  688. """
  689. from pandas.core.groupby import PanelGroupBy
  690. axis = self._get_axis_number(axis)
  691. return PanelGroupBy(self, function, axis=axis)
  692. def to_frame(self, filter_observations=True):
  693. """
  694. Transform wide format into long (stacked) format as DataFrame whose
  695. columns are the Panel's items and whose index is a MultiIndex formed
  696. of the Panel's major and minor axes.
  697. Parameters
  698. ----------
  699. filter_observations : boolean, default True
  700. Drop (major, minor) pairs without a complete set of observations
  701. across all the items
  702. Returns
  703. -------
  704. y : DataFrame
  705. """
  706. _, N, K = self.shape
  707. if filter_observations:
  708. # shaped like the return DataFrame
  709. mask = com.notnull(self.values).all(axis=0)
  710. # size = mask.sum()
  711. selector = mask.ravel()
  712. else:
  713. # size = N * K
  714. selector = slice(None, None)
  715. data = {}
  716. for item in self.items:
  717. data[item] = self[item].values.ravel()[selector]
  718. def construct_multi_parts(idx, n_repeat, n_shuffle=1):
  719. axis_idx = idx.to_hierarchical(n_repeat, n_shuffle)
  720. labels = [x[selector] for x in axis_idx.labels]
  721. levels = axis_idx.levels
  722. names = axis_idx.names
  723. return labels, levels, names
  724. def construct_index_parts(idx, major=True):
  725. levels = [idx]
  726. if major:
  727. labels = [np.arange(N).repeat(K)[selector]]
  728. names = idx.name or 'major'
  729. else:
  730. labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)]
  731. labels = [labels.ravel()[selector]]
  732. names = idx.name or 'minor'
  733. names = [names]
  734. return labels, levels, names
  735. if isinstance(self.major_axis, MultiIndex):
  736. major_labels, major_levels, major_names = construct_multi_parts(
  737. self.major_axis, n_repeat=K)
  738. else:
  739. major_labels, major_levels, major_names = construct_index_parts(
  740. self.major_axis)
  741. if isinstance(self.minor_axis, MultiIndex):
  742. minor_labels, minor_levels, minor_names = construct_multi_parts(
  743. self.minor_axis, n_repeat=N, n_shuffle=K)
  744. else:
  745. minor_labels, minor_levels, minor_names = construct_index_parts(
  746. self.minor_axis, major=False)
  747. levels = major_levels + minor_levels
  748. labels = major_labels + minor_labels
  749. names = major_names + minor_names
  750. index = MultiIndex(levels=levels, labels=labels,
  751. names=names, verify_integrity=False)
  752. return DataFrame(data, index=index, columns=self.items)
  753. to_long = deprecate('to_long', to_frame)
  754. toLong = deprecate('toLong', to_frame)
  755. def apply(self, func, axis='major', **kwargs):
  756. """
  757. Applies function along input axis of the Panel
  758. Parameters
  759. ----------
  760. func : function
  761. Function to apply to each combination of 'other' axes
  762. e.g. if axis = 'items', then the combination of major_axis/minor_axis
  763. will be passed a Series
  764. axis : {'major', 'minor', 'items'}
  765. Additional keyword arguments will be passed as keywords to the function
  766. Examples
  767. --------
  768. >>> p.apply(numpy.sqrt) # returns a Panel
  769. >>> p.apply(lambda x: x.sum(), axis=0) # equiv to p.sum(0)
  770. >>> p.apply(lambda x: x.sum(), axis=1) # equiv to p.sum(1)
  771. >>> p.apply(lambda x: x.sum(), axis=2) # equiv to p.sum(2)
  772. Returns
  773. -------
  774. result : Pandas Object
  775. """
  776. if kwargs and not isinstance(func, np.ufunc):
  777. f = lambda x: func(x, **kwargs)
  778. else:
  779. f = func
  780. # 2d-slabs
  781. if isinstance(axis, (tuple,list)) and len(axis) == 2:
  782. return self._apply_2d(f, axis=axis)
  783. axis = self._get_axis_number(axis)
  784. # try ufunc like
  785. if isinstance(f, np.ufunc):
  786. try:
  787. result = np.apply_along_axis(func, axis, self.values)
  788. return self._wrap_result(result, axis=axis)
  789. except (AttributeError):
  790. pass
  791. # 1d
  792. return self._apply_1d(f, axis=axis)
  793. def _apply_1d(self, func, axis):
  794. axis_name = self._get_axis_name(axis)
  795. ax = self._get_axis(axis)
  796. ndim = self.ndim
  797. values = self.values
  798. # iter thru the axes
  799. slice_axis = self._get_axis(axis)
  800. slice_indexer = [0]*(ndim-1)
  801. indexer = np.zeros(ndim, 'O')
  802. indlist = list(range(ndim))
  803. indlist.remove(axis)
  804. indexer[axis] = slice(None, None)
  805. indexer.put(indlist, slice_indexer)
  806. planes = [ self._get_axis(axi) for axi in indlist ]
  807. shape = np.array(self.shape).take(indlist)
  808. # all the iteration points
  809. points = cartesian_product(planes)
  810. results = []
  811. for i in range(np.prod(shape)):
  812. # construct the object
  813. pts = tuple([ p[i] for p in points ])
  814. indexer.put(indlist, slice_indexer)
  815. obj = Series(values[tuple(indexer)],index=slice_axis,name=pts)
  816. result = func(obj)
  817. results.append(result)
  818. # increment the indexer
  819. slice_indexer[-1] += 1
  820. n = -1
  821. while (slice_indexer[n] >= shape[n]) and (n > (1-ndim)):
  822. slice_indexer[n-1] += 1
  823. slice_indexer[n] = 0
  824. n -= 1
  825. # empty object
  826. if not len(results):
  827. return self._constructor(**self._construct_axes_dict())
  828. # same ndim as current
  829. if isinstance(results[0],Series):
  830. arr = np.vstack([ r.values for r in results ])
  831. arr = arr.T.reshape(tuple([len(slice_axis)] + list(shape)))
  832. tranp = np.array([axis]+indlist).argsort()
  833. arr = arr.transpose(tuple(list(tranp)))
  834. return self._constructor(arr,**self._construct_axes_dict())
  835. # ndim-1 shape
  836. results = np.array(results).reshape(shape)
  837. if results.ndim == 2 and axis_name != self._info_axis_name:
  838. results = results.T
  839. planes = planes[::-1]
  840. return self._construct_return_type(results,planes)
  841. def _apply_2d(self, func, axis):
  842. """ handle 2-d slices, equiv to iterating over the other axis """
  843. ndim = self.ndim
  844. axis = [ self._get_axis_number(a) for a in axis ]
  845. # construct slabs, in 2-d this is a DataFrame result
  846. indexer_axis = list(range(ndim))
  847. for a in axis:
  848. indexer_axis.remove(a)
  849. indexer_axis = indexer_axis[0]
  850. slicer = [ slice(None,None) ] * ndim
  851. ax = self._get_axis(indexer_axis)
  852. results = []
  853. for i, e in enumerate(ax):
  854. slicer[indexer_axis] = i
  855. sliced = self.iloc[tuple(slicer)]
  856. obj = func(sliced)
  857. results.append((e,obj))
  858. return self._construct_return_type(dict(results))
  859. def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
  860. filter_type=None, **kwds):
  861. axis_name = self._get_axis_name(axis)
  862. axis_number = self._get_axis_number(axis_name)
  863. f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)
  864. result = f(self.values)
  865. axes = self._get_plane_axes(axis_name)
  866. if result.ndim == 2 and axis_name != self._info_axis_name:
  867. result = result.T
  868. return self._construct_return_type(result, axes)
  869. def _construct_return_type(self, result, axes=None, **kwargs):
  870. """ return the type for the ndim of the result """
  871. ndim = getattr(result,'ndim',None)
  872. # need to assume they are the same
  873. if ndim is None:
  874. if isinstance(result,dict):
  875. ndim = getattr(list(compat.itervalues(result))[0],'ndim',None)
  876. # a saclar result
  877. if ndim is None:
  878. ndim = 0
  879. # have a dict, so top-level is +1 dim
  880. else:
  881. ndim += 1
  882. # scalar
  883. if ndim == 0:
  884. return Series(result)
  885. # same as self
  886. elif self.ndim == ndim:
  887. """ return the construction dictionary for these axes """
  888. if axes is None:
  889. return self._constructor(result)
  890. return self._constructor(result, **self._construct_axes_dict())
  891. # sliced
  892. elif self.ndim == ndim + 1:
  893. if axes is None:
  894. return self._constructor_sliced(result)
  895. return self._constructor_sliced(
  896. result, **self._extract_axes_for_slice(self, axes))
  897. raise PandasError('invalid _construct_return_type [self->%s] '
  898. '[result->%s]' % (self, result))
  899. def _wrap_result(self, result, axis):
  900. axis = self._get_axis_name(axis)
  901. axes = self._get_plane_axes(axis)
  902. if result.ndim == 2 and axis != self._info_axis_name:
  903. result = result.T
  904. return self._construct_return_type(result, axes)
  905. @Appender(_shared_docs['reindex'] % _shared_doc_kwargs)
  906. def reindex(self, items=None, major_axis=None, minor_axis=None, **kwargs):
  907. major_axis = (major_axis if major_axis is not None
  908. else kwargs.pop('major', None))
  909. minor_axis = (minor_axis if minor_axis is not None
  910. else kwargs.pop('minor', None))
  911. return super(Panel, self).reindex(items=items, major_axis=major_axis,
  912. minor_axis=minor_axis, **kwargs)
  913. @Appender(_shared_docs['rename'] % _shared_doc_kwargs)
  914. def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs):
  915. major_axis = (major_axis if major_axis is not None
  916. else kwargs.pop('major', None))
  917. minor_axis = (minor_axis if minor_axis is not None
  918. else kwargs.pop('minor', None))
  919. return super(Panel, self).rename(items=items, major_axis=major_axis,
  920. minor_axis=minor_axis, **kwargs)
  921. @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
  922. def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
  923. limit=None, fill_value=np.nan):
  924. return super(Panel, self).reindex_axis(labels=labels, axis=axis,
  925. method=method, level=level,
  926. copy=copy, limit=limit,
  927. fill_value=fill_value)
  928. @Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
  929. def transpose(self, *args, **kwargs):
  930. return super(Panel, self).transpose(*args, **kwargs)
  931. def count(self, axis='major'):
  932. """
  933. Return number of observations over requested axis.
  934. Parameters
  935. ----------
  936. axis : {'items', 'major', 'minor'} or {0, 1, 2}
  937. Returns
  938. -------
  939. count : DataFrame
  940. """
  941. i = self._get_axis_number(axis)
  942. values = self.values
  943. mask = np.isfinite(values)
  944. result = mask.sum(axis=i,dtype='int64')
  945. return self._wrap_result(result, axis)
  946. @deprecate_kwarg(old_arg_name='lags', new_arg_name='periods')
  947. def shift(self, periods=1, freq=None, axis='major'):
  948. """
  949. Shift major or minor axis by specified number of leads/lags. Drops
  950. periods right now compared with DataFrame.shift
  951. Parameters
  952. ----------
  953. lags : int
  954. axis : {'major', 'minor'}
  955. Returns
  956. -------
  957. shifted : Panel
  958. """
  959. if freq:
  960. return self.tshift(periods, freq, axis=axis)
  961. if axis == 'items':
  962. raise ValueError('Invalid axis')
  963. return super(Panel, self).slice_shift(periods, axis=axis)
  964. def tshift(self, periods=1, freq=None, axis='major', **kwds):
  965. return super(Panel, self).tshift(periods, freq, axis, **kwds)
  966. def join(self, other, how='left', lsuffix='', rsuffix=''):
  967. """
  968. Join items with other Panel either on major and minor axes column
  969. Parameters
  970. ----------
  971. other : Panel or list of Panels
  972. Index should be similar to one of the columns in this one
  973. how : {'left', 'right', 'outer', 'inner'}
  974. How to handle indexes of the two objects. Default: 'left'
  975. for joining on index, None otherwise
  976. * left: use calling frame's index
  977. * right: use input frame's index
  978. * outer: form union of indexes
  979. * inner: use intersection of indexes
  980. lsuffix : string
  981. Suffix to use from left frame's overlapping columns
  982. rsuffix : string
  983. Suffix to use from right frame's overlapping columns
  984. Returns
  985. -------
  986. joined : Panel
  987. """
  988. from pandas.tools.merge import concat
  989. if isinstance(other, Panel):
  990. join_major, join_minor = self._get_join_index(other, how)
  991. this = self.reindex(major=join_major, minor=join_minor)
  992. other = other.reindex(major=join_major, minor=join_minor)
  993. merged_data = this._data.merge(other._data, lsuffix, rsuffix)
  994. return self._constructor(merged_data)
  995. else:
  996. if lsuffix or rsuffix:
  997. raise ValueError('Suffixes not supported when passing '
  998. 'multiple panels')
  999. if how == 'left':
  1000. how = 'outer'
  1001. join_axes = [self.major_axis, self.minor_axis]
  1002. elif how == 'right':
  1003. raise ValueError('Right join not supported with multiple '
  1004. 'panels')
  1005. else:
  1006. join_axes = None
  1007. return concat([self] + list(other), axis=0, join=how,
  1008. join_axes=join_axes, verify_integrity=True)
  1009. def update(self, other, join='left', overwrite=True, filter_func=None,
  1010. raise_conflict=False):
  1011. """
  1012. Modify Panel in place using non-NA values from passed
  1013. Panel, or object coercible to Panel. Aligns on items
  1014. Parameters
  1015. ----------
  1016. other : Panel, or object coercible to Panel
  1017. join : How to join individual DataFrames
  1018. {'left', 'right', 'outer', 'inner'}, default 'left'
  1019. overwrite : boolean, default True
  1020. If True then overwrite values for common keys in the calling panel
  1021. filter_func : callable(1d-array) -> 1d-array<boolean>, default None
  1022. Can choose to replace values other than NA. Return True for values
  1023. that should be updated
  1024. raise_conflict : bool
  1025. If True, will raise an error if a DataFrame and other both
  1026. contain data in the same place.
  1027. """
  1028. if not isinstance(other, self._constructor):
  1029. other = self._constructor(other)
  1030. axis_name = self._info_axis_name
  1031. axis_values = self._info_axis
  1032. other = other.reindex(**{axis_name: axis_values})
  1033. for frame in axis_values:
  1034. self[frame].update(other[frame], join, overwrite, filter_func,
  1035. raise_conflict)
  1036. def _get_join_index(self, other, how):
  1037. if how == 'left':
  1038. join_major, join_minor = self.major_axis, self.minor_axis
  1039. elif how == 'right':
  1040. join_major, join_minor = other.major_axis, other.minor_axis
  1041. elif how == 'inner':
  1042. join_major = self.major_axis.intersection(other.major_axis)
  1043. join_minor = self.minor_axis.intersection(other.minor_axis)
  1044. elif how == 'outer':
  1045. join_major = self.major_axis.union(other.major_axis)
  1046. join_minor = self.minor_axis.union(other.minor_axis)
  1047. return join_major, join_minor
  1048. # miscellaneous data creation
  1049. @staticmethod
  1050. def _extract_axes(self, data, axes, **kwargs):
  1051. """ return a list of the axis indicies """
  1052. return [self._extract_axis(self, data, axis=i, **kwargs) for i, a
  1053. in enumerate(axes)]
  1054. @staticmethod
  1055. def _extract_axes_for_slice(self, axes):
  1056. """ return the slice dictionary for these axes """
  1057. return dict([(self._AXIS_SLICEMAP[i], a)
  1058. for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN -
  1059. len(axes):], axes)])
  1060. @staticmethod
  1061. def _prep_ndarray(self, values, copy=True):
  1062. if not isinstance(values, np.ndarray):
  1063. values = np.asarray(values)
  1064. # NumPy strings are a pain, convert to object
  1065. if issubclass(values.dtype.type, compat.string_types):
  1066. values = np.array(values, dtype=object, copy=True)
  1067. else:
  1068. if copy:
  1069. values = values.copy()
  1070. if values.ndim != self._AXIS_LEN:
  1071. raise ValueError("The number of dimensions required is {0}, "
  1072. "but the number of dimensions of the "
  1073. "ndarray given was {1}".format(self._AXIS_LEN,
  1074. values.ndim))
  1075. return values
  1076. @staticmethod
  1077. def _homogenize_dict(self, frames, intersect=True, dtype=None):
  1078. """
  1079. Conform set of _constructor_sliced-like objects to either
  1080. an intersection of indices / columns or a union.
  1081. Parameters
  1082. ----------
  1083. frames : dict
  1084. intersect : boolean, default True
  1085. Returns
  1086. -------
  1087. dict of aligned results & indicies
  1088. """
  1089. result = dict()
  1090. # caller differs dict/ODict, presered type
  1091. if isinstance(frames, OrderedDict):
  1092. result = OrderedDict()
  1093. adj_frames = OrderedDict()
  1094. for k, v in compat.iteritems(frames):
  1095. if isinstance(v, dict):
  1096. adj_frames[k] = self._constructor_sliced(v)
  1097. else:
  1098. adj_frames[k] = v
  1099. axes = self._AXIS_ORDERS[1:]
  1100. axes_dict = dict([(a, ax) for a, ax in zip(axes, self._extract_axes(
  1101. self, adj_frames, axes, intersect=intersect))])
  1102. reindex_dict = dict(
  1103. [(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes])
  1104. reindex_dict['copy'] = False
  1105. for key, frame in compat.iteritems(adj_frames):
  1106. if frame is not None:
  1107. result[key] = frame.reindex(**reindex_dict)
  1108. else:
  1109. result[key] = None
  1110. axes_dict['data'] = result
  1111. return axes_dict
  1112. @staticmethod
  1113. def _extract_axis(self, data, axis=0, intersect=False):
  1114. index = None
  1115. if len(data) == 0:
  1116. index = Index([])
  1117. elif len(data) > 0:
  1118. raw_lengths = []
  1119. indexes = []
  1120. have_raw_arrays = False
  1121. have_frames = False
  1122. for v in data.values():
  1123. if isinstance(v, self._constructor_sliced):
  1124. have_frames = True
  1125. indexes.append(v._get_axis(axis))
  1126. elif v is not None:
  1127. have_raw_arrays = True
  1128. raw_lengths.append(v.shape[axis])
  1129. if have_frames:
  1130. index = _get_combined_index(indexes, intersect=intersect)
  1131. if have_raw_arrays:
  1132. lengths = list(set(raw_lengths))
  1133. if len(lengths) > 1:
  1134. raise ValueError('ndarrays must match shape on axis %d' % axis)
  1135. if have_frames:
  1136. if lengths[0] != len(index):
  1137. raise AssertionError('Length of data and index must match')
  1138. else:
  1139. index = Index(np.arange(lengths[0]))
  1140. if index is None:
  1141. index = Index([])
  1142. return _ensure_index(index)
  1143. @classmethod
  1144. def _add_aggregate_operations(cls, use_numexpr=True):
  1145. """ add the operations to the cls; evaluate the doc strings again """
  1146. # doc strings substitors
  1147. _agg_doc = """
  1148. Wrapper method for %%s
  1149. Parameters
  1150. ----------
  1151. other : %s or %s""" % (cls._constructor_sliced.__name__, cls.__name__) + """
  1152. axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """
  1153. Axis to broadcast over
  1154. Returns
  1155. -------
  1156. """ + cls.__name__ + "\n"
  1157. def _panel_arith_method(op, name, str_rep=None, default_axis=None,
  1158. fill_zeros=None, **eval_kwargs):
  1159. def na_op(x, y):
  1160. try:
  1161. result = expressions.evaluate(op, str_rep, x, y,
  1162. raise_on_error=True,
  1163. **eval_kwargs)
  1164. except TypeError:
  1165. result = op(x, y)
  1166. # handles discrepancy between numpy and numexpr on division/mod
  1167. # by 0 though, given that these are generally (always?)
  1168. # non-scalars, I'm not sure whether it's worth it at the moment
  1169. result = com._fill_zeros(result, x, y, name, fill_zeros)
  1170. return result
  1171. @Substitution(name)
  1172. @Appender(_agg_doc)
  1173. def f(self, other, axis=0):
  1174. return self._combine(other, na_op, axis=axis)
  1175. f.__name__ = name
  1176. return f
  1177. # add `div`, `mul`, `pow`, etc..
  1178. ops.add_flex_arithmetic_methods(
  1179. cls, _panel_arith_method, use_numexpr=use_numexpr,
  1180. flex_comp_method=ops._comp_method_PANEL)
  1181. Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'],
  1182. info_axis=0,
  1183. stat_axis=1,
  1184. aliases={'major': 'major_axis',
  1185. 'minor': 'minor_axis'},
  1186. slicers={'major_axis': 'index',
  1187. 'minor_axis': 'columns'})
  1188. ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs)
  1189. Panel._add_aggregate_operations()
  1190. Panel._add_numeric_operations()
  1191. WidePanel = Panel
  1192. LongPanel = DataFrame