PageRenderTime 61ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/panel.py

http://github.com/wesm/pandas
Python | 1588 lines | 1554 code | 12 blank | 22 comment | 7 complexity | e8622c215d1b9d285e4bd6bcdf74a61f MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """
  2. Contains data structures designed for manipulating panel (3-dimensional) data
  3. """
  4. # pylint: disable=E1103,W0231,W0212,W0621
  5. from __future__ import division
  6. from collections import OrderedDict
  7. import warnings
  8. import numpy as np
  9. import pandas.compat as compat
  10. from pandas.compat import map, range, u, zip
  11. from pandas.compat.numpy import function as nv
  12. from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
  13. from pandas.util._validators import validate_axis_style_args
  14. from pandas.core.dtypes.cast import (
  15. cast_scalar_to_array, infer_dtype_from_scalar, maybe_cast_item)
  16. from pandas.core.dtypes.common import (
  17. is_integer, is_list_like, is_scalar, is_string_like)
  18. from pandas.core.dtypes.missing import notna
  19. import pandas.core.common as com
  20. from pandas.core.frame import DataFrame
  21. from pandas.core.generic import NDFrame, _shared_docs
  22. from pandas.core.index import (
  23. Index, MultiIndex, _get_objs_combined_axis, ensure_index)
  24. import pandas.core.indexes.base as ibase
  25. from pandas.core.indexing import maybe_droplevels
  26. from pandas.core.internals import (
  27. BlockManager, create_block_manager_from_arrays,
  28. create_block_manager_from_blocks)
  29. import pandas.core.ops as ops
  30. from pandas.core.reshape.util import cartesian_product
  31. from pandas.core.series import Series
  32. from pandas.io.formats.printing import pprint_thing
  33. _shared_doc_kwargs = dict(
  34. axes='items, major_axis, minor_axis',
  35. klass="Panel",
  36. axes_single_arg="{0, 1, 2, 'items', 'major_axis', 'minor_axis'}",
  37. optional_mapper='', optional_axis='', optional_labels='')
  38. _shared_doc_kwargs['args_transpose'] = (
  39. "{ax_single}\n\tThree positional arguments from given options.".format(
  40. ax_single=_shared_doc_kwargs['axes_single_arg']))
  41. def _ensure_like_indices(time, panels):
  42. """
  43. Makes sure that time and panels are conformable.
  44. """
  45. n_time = len(time)
  46. n_panel = len(panels)
  47. u_panels = np.unique(panels) # this sorts!
  48. u_time = np.unique(time)
  49. if len(u_time) == n_time:
  50. time = np.tile(u_time, len(u_panels))
  51. if len(u_panels) == n_panel:
  52. panels = np.repeat(u_panels, len(u_time))
  53. return time, panels
  54. def panel_index(time, panels, names=None):
  55. """
  56. Returns a multi-index suitable for a panel-like DataFrame.
  57. Parameters
  58. ----------
  59. time : array-like
  60. Time index, does not have to repeat
  61. panels : array-like
  62. Panel index, does not have to repeat
  63. names : list, optional
  64. List containing the names of the indices
  65. Returns
  66. -------
  67. multi_index : MultiIndex
  68. Time index is the first level, the panels are the second level.
  69. Examples
  70. --------
  71. >>> years = range(1960,1963)
  72. >>> panels = ['A', 'B', 'C']
  73. >>> panel_idx = panel_index(years, panels)
  74. >>> panel_idx
  75. MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'),
  76. (1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'),
  77. (1962, 'C')], dtype=object)
  78. or
  79. >>> years = np.repeat(range(1960,1963), 3)
  80. >>> panels = np.tile(['A', 'B', 'C'], 3)
  81. >>> panel_idx = panel_index(years, panels)
  82. >>> panel_idx
  83. MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'),
  84. (1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'),
  85. (1962, 'C')], dtype=object)
  86. """
  87. if names is None:
  88. names = ['time', 'panel']
  89. time, panels = _ensure_like_indices(time, panels)
  90. return MultiIndex.from_arrays([time, panels], sortorder=None, names=names)
  91. class Panel(NDFrame):
  92. """
  93. Represents wide format panel data, stored as 3-dimensional array.
  94. .. deprecated:: 0.20.0
  95. The recommended way to represent 3-D data are with a MultiIndex on a
  96. DataFrame via the :attr:`~Panel.to_frame()` method or with the
  97. `xarray package <http://xarray.pydata.org/en/stable/>`__.
  98. Pandas provides a :attr:`~Panel.to_xarray()` method to automate this
  99. conversion.
  100. Parameters
  101. ----------
  102. data : ndarray (items x major x minor), or dict of DataFrames
  103. items : Index or array-like
  104. axis=0
  105. major_axis : Index or array-like
  106. axis=1
  107. minor_axis : Index or array-like
  108. axis=2
  109. copy : boolean, default False
  110. Copy data from inputs. Only affects DataFrame / 2d ndarray input
  111. dtype : dtype, default None
  112. Data type to force, otherwise infer
  113. """
  114. @property
  115. def _constructor(self):
  116. return type(self)
  117. _constructor_sliced = DataFrame
  118. def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
  119. copy=False, dtype=None):
  120. # deprecation GH13563
  121. warnings.warn("\nPanel is deprecated and will be removed in a "
  122. "future version.\nThe recommended way to represent "
  123. "these types of 3-dimensional data are with a "
  124. "MultiIndex on a DataFrame, via the "
  125. "Panel.to_frame() method\n"
  126. "Alternatively, you can use the xarray package "
  127. "http://xarray.pydata.org/en/stable/.\n"
  128. "Pandas provides a `.to_xarray()` method to help "
  129. "automate this conversion.\n",
  130. FutureWarning, stacklevel=3)
  131. self._init_data(data=data, items=items, major_axis=major_axis,
  132. minor_axis=minor_axis, copy=copy, dtype=dtype)
  133. def _init_data(self, data, copy, dtype, **kwargs):
  134. """
  135. Generate ND initialization; axes are passed
  136. as required objects to __init__.
  137. """
  138. if data is None:
  139. data = {}
  140. if dtype is not None:
  141. dtype = self._validate_dtype(dtype)
  142. passed_axes = [kwargs.pop(a, None) for a in self._AXIS_ORDERS]
  143. if kwargs:
  144. raise TypeError('_init_data() got an unexpected keyword '
  145. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  146. axes = None
  147. if isinstance(data, BlockManager):
  148. if com._any_not_none(*passed_axes):
  149. axes = [x if x is not None else y
  150. for x, y in zip(passed_axes, data.axes)]
  151. mgr = data
  152. elif isinstance(data, dict):
  153. mgr = self._init_dict(data, passed_axes, dtype=dtype)
  154. copy = False
  155. dtype = None
  156. elif isinstance(data, (np.ndarray, list)):
  157. mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
  158. copy = False
  159. dtype = None
  160. elif is_scalar(data) and com._all_not_none(*passed_axes):
  161. values = cast_scalar_to_array([len(x) for x in passed_axes],
  162. data, dtype=dtype)
  163. mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
  164. copy=False)
  165. copy = False
  166. else: # pragma: no cover
  167. raise ValueError('Panel constructor not properly called!')
  168. NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
  169. def _init_dict(self, data, axes, dtype=None):
  170. haxis = axes.pop(self._info_axis_number)
  171. # prefilter if haxis passed
  172. if haxis is not None:
  173. haxis = ensure_index(haxis)
  174. data = OrderedDict((k, v)
  175. for k, v in compat.iteritems(data)
  176. if k in haxis)
  177. else:
  178. keys = com.dict_keys_to_ordered_list(data)
  179. haxis = Index(keys)
  180. for k, v in compat.iteritems(data):
  181. if isinstance(v, dict):
  182. data[k] = self._constructor_sliced(v)
  183. # extract axis for remaining axes & create the slicemap
  184. raxes = [self._extract_axis(self, data, axis=i) if a is None else a
  185. for i, a in enumerate(axes)]
  186. raxes_sm = self._extract_axes_for_slice(self, raxes)
  187. # shallow copy
  188. arrays = []
  189. haxis_shape = [len(a) for a in raxes]
  190. for h in haxis:
  191. v = values = data.get(h)
  192. if v is None:
  193. values = np.empty(haxis_shape, dtype=dtype)
  194. values.fill(np.nan)
  195. elif isinstance(v, self._constructor_sliced):
  196. d = raxes_sm.copy()
  197. d['copy'] = False
  198. v = v.reindex(**d)
  199. if dtype is not None:
  200. v = v.astype(dtype)
  201. values = v.values
  202. arrays.append(values)
  203. return self._init_arrays(arrays, haxis, [haxis] + raxes)
  204. def _init_arrays(self, arrays, arr_names, axes):
  205. return create_block_manager_from_arrays(arrays, arr_names, axes)
  206. @classmethod
  207. def from_dict(cls, data, intersect=False, orient='items', dtype=None):
  208. """
  209. Construct Panel from dict of DataFrame objects.
  210. Parameters
  211. ----------
  212. data : dict
  213. {field : DataFrame}
  214. intersect : boolean
  215. Intersect indexes of input DataFrames
  216. orient : {'items', 'minor'}, default 'items'
  217. The "orientation" of the data. If the keys of the passed dict
  218. should be the items of the result panel, pass 'items'
  219. (default). Otherwise if the columns of the values of the passed
  220. DataFrame objects should be the items (which in the case of
  221. mixed-dtype data you should do), instead pass 'minor'
  222. dtype : dtype, default None
  223. Data type to force, otherwise infer
  224. Returns
  225. -------
  226. Panel
  227. """
  228. from collections import defaultdict
  229. orient = orient.lower()
  230. if orient == 'minor':
  231. new_data = defaultdict(OrderedDict)
  232. for col, df in compat.iteritems(data):
  233. for item, s in compat.iteritems(df):
  234. new_data[item][col] = s
  235. data = new_data
  236. elif orient != 'items': # pragma: no cover
  237. raise ValueError('Orientation must be one of {items, minor}.')
  238. d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype)
  239. ks = list(d['data'].keys())
  240. if not isinstance(d['data'], OrderedDict):
  241. ks = list(sorted(ks))
  242. d[cls._info_axis_name] = Index(ks)
  243. return cls(**d)
  244. def __getitem__(self, key):
  245. key = com.apply_if_callable(key, self)
  246. if isinstance(self._info_axis, MultiIndex):
  247. return self._getitem_multilevel(key)
  248. if not (is_list_like(key) or isinstance(key, slice)):
  249. return super(Panel, self).__getitem__(key)
  250. return self.loc[key]
  251. def _getitem_multilevel(self, key):
  252. info = self._info_axis
  253. loc = info.get_loc(key)
  254. if isinstance(loc, (slice, np.ndarray)):
  255. new_index = info[loc]
  256. result_index = maybe_droplevels(new_index, key)
  257. slices = [loc] + [slice(None)] * (self._AXIS_LEN - 1)
  258. new_values = self.values[slices]
  259. d = self._construct_axes_dict(self._AXIS_ORDERS[1:])
  260. d[self._info_axis_name] = result_index
  261. result = self._constructor(new_values, **d)
  262. return result
  263. else:
  264. return self._get_item_cache(key)
  265. def _init_matrix(self, data, axes, dtype=None, copy=False):
  266. values = self._prep_ndarray(self, data, copy=copy)
  267. if dtype is not None:
  268. try:
  269. values = values.astype(dtype)
  270. except Exception:
  271. raise ValueError('failed to cast to '
  272. '{datatype}'.format(datatype=dtype))
  273. shape = values.shape
  274. fixed_axes = []
  275. for i, ax in enumerate(axes):
  276. if ax is None:
  277. ax = ibase.default_index(shape[i])
  278. else:
  279. ax = ensure_index(ax)
  280. fixed_axes.append(ax)
  281. return create_block_manager_from_blocks([values], fixed_axes)
  282. # ----------------------------------------------------------------------
  283. # Comparison methods
  284. def _compare_constructor(self, other, func):
  285. if not self._indexed_same(other):
  286. raise Exception('Can only compare identically-labeled '
  287. 'same type objects')
  288. new_data = {col: func(self[col], other[col])
  289. for col in self._info_axis}
  290. d = self._construct_axes_dict(copy=False)
  291. return self._constructor(data=new_data, **d)
  292. # ----------------------------------------------------------------------
  293. # Magic methods
  294. def __unicode__(self):
  295. """
  296. Return a string representation for a particular Panel.
  297. Invoked by unicode(df) in py2 only.
  298. Yields a Unicode String in both py2/py3.
  299. """
  300. class_name = str(self.__class__)
  301. dims = u('Dimensions: {dimensions}'.format(dimensions=' x '.join(
  302. ["{shape} ({axis})".format(shape=shape, axis=axis) for axis, shape
  303. in zip(self._AXIS_ORDERS, self.shape)])))
  304. def axis_pretty(a):
  305. v = getattr(self, a)
  306. if len(v) > 0:
  307. return u('{ax} axis: {x} to {y}'.format(ax=a.capitalize(),
  308. x=pprint_thing(v[0]),
  309. y=pprint_thing(v[-1])))
  310. else:
  311. return u('{ax} axis: None'.format(ax=a.capitalize()))
  312. output = '\n'.join(
  313. [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS])
  314. return output
  315. def _get_plane_axes_index(self, axis):
  316. """
  317. Get my plane axes indexes: these are already
  318. (as compared with higher level planes),
  319. as we are returning a DataFrame axes indexes.
  320. """
  321. axis_name = self._get_axis_name(axis)
  322. if axis_name == 'major_axis':
  323. index = 'minor_axis'
  324. columns = 'items'
  325. if axis_name == 'minor_axis':
  326. index = 'major_axis'
  327. columns = 'items'
  328. elif axis_name == 'items':
  329. index = 'major_axis'
  330. columns = 'minor_axis'
  331. return index, columns
  332. def _get_plane_axes(self, axis):
  333. """
  334. Get my plane axes indexes: these are already
  335. (as compared with higher level planes),
  336. as we are returning a DataFrame axes.
  337. """
  338. return [self._get_axis(axi)
  339. for axi in self._get_plane_axes_index(axis)]
  340. fromDict = from_dict
  341. def to_sparse(self, *args, **kwargs):
  342. """
  343. NOT IMPLEMENTED: do not call this method, as sparsifying is not
  344. supported for Panel objects and will raise an error.
  345. Convert to SparsePanel.
  346. """
  347. raise NotImplementedError("sparsifying is not supported "
  348. "for Panel objects")
  349. def to_excel(self, path, na_rep='', engine=None, **kwargs):
  350. """
  351. Write each DataFrame in Panel to a separate excel sheet.
  352. Parameters
  353. ----------
  354. path : string or ExcelWriter object
  355. File path or existing ExcelWriter
  356. na_rep : string, default ''
  357. Missing data representation
  358. engine : string, default None
  359. write engine to use - you can also set this via the options
  360. ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
  361. ``io.excel.xlsm.writer``.
  362. Other Parameters
  363. ----------------
  364. float_format : string, default None
  365. Format string for floating point numbers
  366. cols : sequence, optional
  367. Columns to write
  368. header : boolean or list of string, default True
  369. Write out column names. If a list of string is given it is
  370. assumed to be aliases for the column names
  371. index : boolean, default True
  372. Write row names (index)
  373. index_label : string or sequence, default None
  374. Column label for index column(s) if desired. If None is given, and
  375. `header` and `index` are True, then the index names are used. A
  376. sequence should be given if the DataFrame uses MultiIndex.
  377. startrow : upper left cell row to dump data frame
  378. startcol : upper left cell column to dump data frame
  379. Notes
  380. -----
  381. Keyword arguments (and na_rep) are passed to the ``to_excel`` method
  382. for each DataFrame written.
  383. """
  384. from pandas.io.excel import ExcelWriter
  385. if isinstance(path, compat.string_types):
  386. writer = ExcelWriter(path, engine=engine)
  387. else:
  388. writer = path
  389. kwargs['na_rep'] = na_rep
  390. for item, df in self.iteritems():
  391. name = str(item)
  392. df.to_excel(writer, name, **kwargs)
  393. writer.save()
  394. def as_matrix(self):
  395. self._consolidate_inplace()
  396. return self._data.as_array()
  397. # ----------------------------------------------------------------------
  398. # Getting and setting elements
  399. def get_value(self, *args, **kwargs):
  400. """
  401. Quickly retrieve single value at (item, major, minor) location.
  402. .. deprecated:: 0.21.0
  403. Please use .at[] or .iat[] accessors.
  404. Parameters
  405. ----------
  406. item : item label (panel item)
  407. major : major axis label (panel item row)
  408. minor : minor axis label (panel item column)
  409. takeable : interpret the passed labels as indexers, default False
  410. Returns
  411. -------
  412. value : scalar value
  413. """
  414. warnings.warn("get_value is deprecated and will be removed "
  415. "in a future release. Please use "
  416. ".at[] or .iat[] accessors instead", FutureWarning,
  417. stacklevel=2)
  418. return self._get_value(*args, **kwargs)
  419. def _get_value(self, *args, **kwargs):
  420. nargs = len(args)
  421. nreq = self._AXIS_LEN
  422. # require an arg for each axis
  423. if nargs != nreq:
  424. raise TypeError('There must be an argument for each axis, you gave'
  425. ' {0} args, but {1} are required'.format(nargs,
  426. nreq))
  427. takeable = kwargs.pop('takeable', None)
  428. if kwargs:
  429. raise TypeError('get_value() got an unexpected keyword '
  430. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  431. if takeable is True:
  432. lower = self._iget_item_cache(args[0])
  433. else:
  434. lower = self._get_item_cache(args[0])
  435. return lower._get_value(*args[1:], takeable=takeable)
  436. _get_value.__doc__ = get_value.__doc__
  437. def set_value(self, *args, **kwargs):
  438. """
  439. Quickly set single value at (item, major, minor) location.
  440. .. deprecated:: 0.21.0
  441. Please use .at[] or .iat[] accessors.
  442. Parameters
  443. ----------
  444. item : item label (panel item)
  445. major : major axis label (panel item row)
  446. minor : minor axis label (panel item column)
  447. value : scalar
  448. takeable : interpret the passed labels as indexers, default False
  449. Returns
  450. -------
  451. panel : Panel
  452. If label combo is contained, will be reference to calling Panel,
  453. otherwise a new object.
  454. """
  455. warnings.warn("set_value is deprecated and will be removed "
  456. "in a future release. Please use "
  457. ".at[] or .iat[] accessors instead", FutureWarning,
  458. stacklevel=2)
  459. return self._set_value(*args, **kwargs)
  460. def _set_value(self, *args, **kwargs):
  461. # require an arg for each axis and the value
  462. nargs = len(args)
  463. nreq = self._AXIS_LEN + 1
  464. if nargs != nreq:
  465. raise TypeError('There must be an argument for each axis plus the '
  466. 'value provided, you gave {0} args, but {1} are '
  467. 'required'.format(nargs, nreq))
  468. takeable = kwargs.pop('takeable', None)
  469. if kwargs:
  470. raise TypeError('set_value() got an unexpected keyword '
  471. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  472. try:
  473. if takeable is True:
  474. lower = self._iget_item_cache(args[0])
  475. else:
  476. lower = self._get_item_cache(args[0])
  477. lower._set_value(*args[1:], takeable=takeable)
  478. return self
  479. except KeyError:
  480. axes = self._expand_axes(args)
  481. d = self._construct_axes_dict_from(self, axes, copy=False)
  482. result = self.reindex(**d)
  483. args = list(args)
  484. likely_dtype, args[-1] = infer_dtype_from_scalar(args[-1])
  485. made_bigger = not np.array_equal(axes[0], self._info_axis)
  486. # how to make this logic simpler?
  487. if made_bigger:
  488. maybe_cast_item(result, args[0], likely_dtype)
  489. return result._set_value(*args)
  490. _set_value.__doc__ = set_value.__doc__
  491. def _box_item_values(self, key, values):
  492. if self.ndim == values.ndim:
  493. result = self._constructor(values)
  494. # a dup selection will yield a full ndim
  495. if result._get_axis(0).is_unique:
  496. result = result[key]
  497. return result
  498. d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])
  499. return self._constructor_sliced(values, **d)
  500. def __setitem__(self, key, value):
  501. key = com.apply_if_callable(key, self)
  502. shape = tuple(self.shape)
  503. if isinstance(value, self._constructor_sliced):
  504. value = value.reindex(
  505. **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
  506. mat = value.values
  507. elif isinstance(value, np.ndarray):
  508. if value.shape != shape[1:]:
  509. raise ValueError('shape of value must be {0}, shape of given '
  510. 'object was {1}'.format(
  511. shape[1:], tuple(map(int, value.shape))))
  512. mat = np.asarray(value)
  513. elif is_scalar(value):
  514. mat = cast_scalar_to_array(shape[1:], value)
  515. else:
  516. raise TypeError('Cannot set item of '
  517. 'type: {dtype!s}'.format(dtype=type(value)))
  518. mat = mat.reshape(tuple([1]) + shape[1:])
  519. NDFrame._set_item(self, key, mat)
  520. def _unpickle_panel_compat(self, state): # pragma: no cover
  521. """
  522. Unpickle the panel.
  523. """
  524. from pandas.io.pickle import _unpickle_array
  525. _unpickle = _unpickle_array
  526. vals, items, major, minor = state
  527. items = _unpickle(items)
  528. major = _unpickle(major)
  529. minor = _unpickle(minor)
  530. values = _unpickle(vals)
  531. wp = Panel(values, items, major, minor)
  532. self._data = wp._data
  533. def conform(self, frame, axis='items'):
  534. """
  535. Conform input DataFrame to align with chosen axis pair.
  536. Parameters
  537. ----------
  538. frame : DataFrame
  539. axis : {'items', 'major', 'minor'}
  540. Axis the input corresponds to. E.g., if axis='major', then
  541. the frame's columns would be items, and the index would be
  542. values of the minor axis
  543. Returns
  544. -------
  545. DataFrame
  546. """
  547. axes = self._get_plane_axes(axis)
  548. return frame.reindex(**self._extract_axes_for_slice(self, axes))
  549. def head(self, n=5):
  550. raise NotImplementedError
  551. def tail(self, n=5):
  552. raise NotImplementedError
  553. def round(self, decimals=0, *args, **kwargs):
  554. """
  555. Round each value in Panel to a specified number of decimal places.
  556. .. versionadded:: 0.18.0
  557. Parameters
  558. ----------
  559. decimals : int
  560. Number of decimal places to round to (default: 0).
  561. If decimals is negative, it specifies the number of
  562. positions to the left of the decimal point.
  563. Returns
  564. -------
  565. Panel object
  566. See Also
  567. --------
  568. numpy.around
  569. """
  570. nv.validate_round(args, kwargs)
  571. if is_integer(decimals):
  572. result = np.apply_along_axis(np.round, 0, self.values)
  573. return self._wrap_result(result, axis=0)
  574. raise TypeError("decimals must be an integer")
  575. def _needs_reindex_multi(self, axes, method, level):
  576. """
  577. Don't allow a multi reindex on Panel or above ndim.
  578. """
  579. return False
  580. def align(self, other, **kwargs):
  581. raise NotImplementedError
  582. def dropna(self, axis=0, how='any', inplace=False):
  583. """
  584. Drop 2D from panel, holding passed axis constant.
  585. Parameters
  586. ----------
  587. axis : int, default 0
  588. Axis to hold constant. E.g. axis=1 will drop major_axis entries
  589. having a certain amount of NA data
  590. how : {'all', 'any'}, default 'any'
  591. 'any': one or more values are NA in the DataFrame along the
  592. axis. For 'all' they all must be.
  593. inplace : bool, default False
  594. If True, do operation inplace and return None.
  595. Returns
  596. -------
  597. dropped : Panel
  598. """
  599. axis = self._get_axis_number(axis)
  600. values = self.values
  601. mask = notna(values)
  602. for ax in reversed(sorted(set(range(self._AXIS_LEN)) - {axis})):
  603. mask = mask.sum(ax)
  604. per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:])
  605. if how == 'all':
  606. cond = mask > 0
  607. else:
  608. cond = mask == per_slice
  609. new_ax = self._get_axis(axis)[cond]
  610. result = self.reindex_axis(new_ax, axis=axis)
  611. if inplace:
  612. self._update_inplace(result)
  613. else:
  614. return result
  615. def _combine(self, other, func, axis=0):
  616. if isinstance(other, Panel):
  617. return self._combine_panel(other, func)
  618. elif isinstance(other, DataFrame):
  619. return self._combine_frame(other, func, axis=axis)
  620. elif is_scalar(other):
  621. return self._combine_const(other, func)
  622. else:
  623. raise NotImplementedError(
  624. "{otype!s} is not supported in combine operation with "
  625. "{selftype!s}".format(otype=type(other), selftype=type(self)))
  626. def _combine_const(self, other, func):
  627. with np.errstate(all='ignore'):
  628. new_values = func(self.values, other)
  629. d = self._construct_axes_dict()
  630. return self._constructor(new_values, **d)
  631. def _combine_frame(self, other, func, axis=0):
  632. index, columns = self._get_plane_axes(axis)
  633. axis = self._get_axis_number(axis)
  634. other = other.reindex(index=index, columns=columns)
  635. with np.errstate(all='ignore'):
  636. if axis == 0:
  637. new_values = func(self.values, other.values)
  638. elif axis == 1:
  639. new_values = func(self.values.swapaxes(0, 1), other.values.T)
  640. new_values = new_values.swapaxes(0, 1)
  641. elif axis == 2:
  642. new_values = func(self.values.swapaxes(0, 2), other.values)
  643. new_values = new_values.swapaxes(0, 2)
  644. return self._constructor(new_values, self.items, self.major_axis,
  645. self.minor_axis)
  646. def _combine_panel(self, other, func):
  647. items = self.items.union(other.items)
  648. major = self.major_axis.union(other.major_axis)
  649. minor = self.minor_axis.union(other.minor_axis)
  650. # could check that everything's the same size, but forget it
  651. this = self.reindex(items=items, major=major, minor=minor)
  652. other = other.reindex(items=items, major=major, minor=minor)
  653. with np.errstate(all='ignore'):
  654. result_values = func(this.values, other.values)
  655. return self._constructor(result_values, items, major, minor)
  656. def major_xs(self, key):
  657. """
  658. Return slice of panel along major axis.
  659. Parameters
  660. ----------
  661. key : object
  662. Major axis label
  663. Returns
  664. -------
  665. y : DataFrame
  666. Index -> minor axis, columns -> items.
  667. Notes
  668. -----
  669. major_xs is only for getting, not setting values.
  670. MultiIndex Slicers is a generic way to get/set values on any level or
  671. levels and is a superset of major_xs functionality, see
  672. :ref:`MultiIndex Slicers <advanced.mi_slicers>`
  673. """
  674. return self.xs(key, axis=self._AXIS_LEN - 2)
  675. def minor_xs(self, key):
  676. """
  677. Return slice of panel along minor axis.
  678. Parameters
  679. ----------
  680. key : object
  681. Minor axis label
  682. Returns
  683. -------
  684. y : DataFrame
  685. Index -> major axis, columns -> items.
  686. Notes
  687. -----
  688. minor_xs is only for getting, not setting values.
  689. MultiIndex Slicers is a generic way to get/set values on any level or
  690. levels and is a superset of minor_xs functionality, see
  691. :ref:`MultiIndex Slicers <advanced.mi_slicers>`
  692. """
  693. return self.xs(key, axis=self._AXIS_LEN - 1)
  694. def xs(self, key, axis=1):
  695. """
  696. Return slice of panel along selected axis.
  697. Parameters
  698. ----------
  699. key : object
  700. Label
  701. axis : {'items', 'major', 'minor}, default 1/'major'
  702. Returns
  703. -------
  704. y : ndim(self)-1
  705. Notes
  706. -----
  707. xs is only for getting, not setting values.
  708. MultiIndex Slicers is a generic way to get/set values on any level or
  709. levels and is a superset of xs functionality, see
  710. :ref:`MultiIndex Slicers <advanced.mi_slicers>`
  711. """
  712. axis = self._get_axis_number(axis)
  713. if axis == 0:
  714. return self[key]
  715. self._consolidate_inplace()
  716. axis_number = self._get_axis_number(axis)
  717. new_data = self._data.xs(key, axis=axis_number, copy=False)
  718. result = self._construct_return_type(new_data)
  719. copy = new_data.is_mixed_type
  720. result._set_is_copy(self, copy=copy)
  721. return result
  722. _xs = xs
  723. def _ixs(self, i, axis=0):
  724. """
  725. Parameters
  726. ----------
  727. i : int, slice, or sequence of integers
  728. axis : int
  729. """
  730. ax = self._get_axis(axis)
  731. key = ax[i]
  732. # xs cannot handle a non-scalar key, so just reindex here
  733. # if we have a multi-index and a single tuple, then its a reduction
  734. # (GH 7516)
  735. if not (isinstance(ax, MultiIndex) and isinstance(key, tuple)):
  736. if is_list_like(key):
  737. indexer = {self._get_axis_name(axis): key}
  738. return self.reindex(**indexer)
  739. # a reduction
  740. if axis == 0:
  741. values = self._data.iget(i)
  742. return self._box_item_values(key, values)
  743. # xs by position
  744. self._consolidate_inplace()
  745. new_data = self._data.xs(i, axis=axis, copy=True, takeable=True)
  746. return self._construct_return_type(new_data)
  747. def groupby(self, function, axis='major'):
  748. """
  749. Group data on given axis, returning GroupBy object.
  750. Parameters
  751. ----------
  752. function : callable
  753. Mapping function for chosen access
  754. axis : {'major', 'minor', 'items'}, default 'major'
  755. Returns
  756. -------
  757. grouped : PanelGroupBy
  758. """
  759. raise NotImplementedError("Panel is removed in pandas 0.25.0")
  760. def to_frame(self, filter_observations=True):
  761. """
  762. Transform wide format into long (stacked) format as DataFrame whose
  763. columns are the Panel's items and whose index is a MultiIndex formed
  764. of the Panel's major and minor axes.
  765. Parameters
  766. ----------
  767. filter_observations : boolean, default True
  768. Drop (major, minor) pairs without a complete set of observations
  769. across all the items
  770. Returns
  771. -------
  772. y : DataFrame
  773. """
  774. _, N, K = self.shape
  775. if filter_observations:
  776. # shaped like the return DataFrame
  777. mask = notna(self.values).all(axis=0)
  778. # size = mask.sum()
  779. selector = mask.ravel()
  780. else:
  781. # size = N * K
  782. selector = slice(None, None)
  783. data = {item: self[item].values.ravel()[selector]
  784. for item in self.items}
  785. def construct_multi_parts(idx, n_repeat, n_shuffle=1):
  786. # Replicates and shuffles MultiIndex, returns individual attributes
  787. codes = [np.repeat(x, n_repeat) for x in idx.codes]
  788. # Assumes that each label is divisible by n_shuffle
  789. codes = [x.reshape(n_shuffle, -1).ravel(order='F')
  790. for x in codes]
  791. codes = [x[selector] for x in codes]
  792. levels = idx.levels
  793. names = idx.names
  794. return codes, levels, names
  795. def construct_index_parts(idx, major=True):
  796. levels = [idx]
  797. if major:
  798. codes = [np.arange(N).repeat(K)[selector]]
  799. names = idx.name or 'major'
  800. else:
  801. codes = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)]
  802. codes = [codes.ravel()[selector]]
  803. names = idx.name or 'minor'
  804. names = [names]
  805. return codes, levels, names
  806. if isinstance(self.major_axis, MultiIndex):
  807. major_codes, major_levels, major_names = construct_multi_parts(
  808. self.major_axis, n_repeat=K)
  809. else:
  810. major_codes, major_levels, major_names = construct_index_parts(
  811. self.major_axis)
  812. if isinstance(self.minor_axis, MultiIndex):
  813. minor_codes, minor_levels, minor_names = construct_multi_parts(
  814. self.minor_axis, n_repeat=N, n_shuffle=K)
  815. else:
  816. minor_codes, minor_levels, minor_names = construct_index_parts(
  817. self.minor_axis, major=False)
  818. levels = major_levels + minor_levels
  819. codes = major_codes + minor_codes
  820. names = major_names + minor_names
  821. index = MultiIndex(levels=levels, codes=codes, names=names,
  822. verify_integrity=False)
  823. return DataFrame(data, index=index, columns=self.items)
  824. def apply(self, func, axis='major', **kwargs):
  825. """
  826. Apply function along axis (or axes) of the Panel.
  827. Parameters
  828. ----------
  829. func : function
  830. Function to apply to each combination of 'other' axes
  831. e.g. if axis = 'items', the combination of major_axis/minor_axis
  832. will each be passed as a Series; if axis = ('items', 'major'),
  833. DataFrames of items & major axis will be passed
  834. axis : {'items', 'minor', 'major'}, or {0, 1, 2}, or a tuple with two
  835. axes
  836. **kwargs
  837. Additional keyword arguments will be passed to the function.
  838. Returns
  839. -------
  840. result : Panel, DataFrame, or Series
  841. Examples
  842. --------
  843. Returns a Panel with the square root of each element
  844. >>> p = pd.Panel(np.random.rand(4, 3, 2)) # doctest: +SKIP
  845. >>> p.apply(np.sqrt)
  846. Equivalent to p.sum(1), returning a DataFrame
  847. >>> p.apply(lambda x: x.sum(), axis=1) # doctest: +SKIP
  848. Equivalent to previous:
  849. >>> p.apply(lambda x: x.sum(), axis='major') # doctest: +SKIP
  850. Return the shapes of each DataFrame over axis 2 (i.e the shapes of
  851. items x major), as a Series
  852. >>> p.apply(lambda x: x.shape, axis=(0,1)) # doctest: +SKIP
  853. """
  854. if kwargs and not isinstance(func, np.ufunc):
  855. f = lambda x: func(x, **kwargs)
  856. else:
  857. f = func
  858. # 2d-slabs
  859. if isinstance(axis, (tuple, list)) and len(axis) == 2:
  860. return self._apply_2d(f, axis=axis)
  861. axis = self._get_axis_number(axis)
  862. # try ufunc like
  863. if isinstance(f, np.ufunc):
  864. try:
  865. with np.errstate(all='ignore'):
  866. result = np.apply_along_axis(func, axis, self.values)
  867. return self._wrap_result(result, axis=axis)
  868. except (AttributeError):
  869. pass
  870. # 1d
  871. return self._apply_1d(f, axis=axis)
  872. def _apply_1d(self, func, axis):
  873. axis_name = self._get_axis_name(axis)
  874. ndim = self.ndim
  875. values = self.values
  876. # iter thru the axes
  877. slice_axis = self._get_axis(axis)
  878. slice_indexer = [0] * (ndim - 1)
  879. indexer = np.zeros(ndim, 'O')
  880. indlist = list(range(ndim))
  881. indlist.remove(axis)
  882. indexer[axis] = slice(None, None)
  883. indexer.put(indlist, slice_indexer)
  884. planes = [self._get_axis(axi) for axi in indlist]
  885. shape = np.array(self.shape).take(indlist)
  886. # all the iteration points
  887. points = cartesian_product(planes)
  888. results = []
  889. for i in range(np.prod(shape)):
  890. # construct the object
  891. pts = tuple(p[i] for p in points)
  892. indexer.put(indlist, slice_indexer)
  893. obj = Series(values[tuple(indexer)], index=slice_axis, name=pts)
  894. result = func(obj)
  895. results.append(result)
  896. # increment the indexer
  897. slice_indexer[-1] += 1
  898. n = -1
  899. while (slice_indexer[n] >= shape[n]) and (n > (1 - ndim)):
  900. slice_indexer[n - 1] += 1
  901. slice_indexer[n] = 0
  902. n -= 1
  903. # empty object
  904. if not len(results):
  905. return self._constructor(**self._construct_axes_dict())
  906. # same ndim as current
  907. if isinstance(results[0], Series):
  908. arr = np.vstack([r.values for r in results])
  909. arr = arr.T.reshape(tuple([len(slice_axis)] + list(shape)))
  910. tranp = np.array([axis] + indlist).argsort()
  911. arr = arr.transpose(tuple(list(tranp)))
  912. return self._constructor(arr, **self._construct_axes_dict())
  913. # ndim-1 shape
  914. results = np.array(results).reshape(shape)
  915. if results.ndim == 2 and axis_name != self._info_axis_name:
  916. results = results.T
  917. planes = planes[::-1]
  918. return self._construct_return_type(results, planes)
  919. def _apply_2d(self, func, axis):
  920. """
  921. Handle 2-d slices, equiv to iterating over the other axis.
  922. """
  923. ndim = self.ndim
  924. axis = [self._get_axis_number(a) for a in axis]
  925. # construct slabs, in 2-d this is a DataFrame result
  926. indexer_axis = list(range(ndim))
  927. for a in axis:
  928. indexer_axis.remove(a)
  929. indexer_axis = indexer_axis[0]
  930. slicer = [slice(None, None)] * ndim
  931. ax = self._get_axis(indexer_axis)
  932. results = []
  933. for i, e in enumerate(ax):
  934. slicer[indexer_axis] = i
  935. sliced = self.iloc[tuple(slicer)]
  936. obj = func(sliced)
  937. results.append((e, obj))
  938. return self._construct_return_type(dict(results))
  939. def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
  940. filter_type=None, **kwds):
  941. if numeric_only:
  942. raise NotImplementedError('Panel.{0} does not implement '
  943. 'numeric_only.'.format(name))
  944. if axis is None and filter_type == 'bool':
  945. # labels = None
  946. # constructor = None
  947. axis_number = None
  948. axis_name = None
  949. else:
  950. # TODO: Make other agg func handle axis=None properly
  951. axis = self._get_axis_number(axis)
  952. # labels = self._get_agg_axis(axis)
  953. # constructor = self._constructor
  954. axis_name = self._get_axis_name(axis)
  955. axis_number = self._get_axis_number(axis_name)
  956. f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)
  957. with np.errstate(all='ignore'):
  958. result = f(self.values)
  959. if axis is None and filter_type == 'bool':
  960. return np.bool_(result)
  961. axes = self._get_plane_axes(axis_name)
  962. if result.ndim == 2 and axis_name != self._info_axis_name:
  963. result = result.T
  964. return self._construct_return_type(result, axes)
  965. def _construct_return_type(self, result, axes=None):
  966. """
  967. Return the type for the ndim of the result.
  968. """
  969. ndim = getattr(result, 'ndim', None)
  970. # need to assume they are the same
  971. if ndim is None:
  972. if isinstance(result, dict):
  973. ndim = getattr(list(compat.itervalues(result))[0], 'ndim', 0)
  974. # have a dict, so top-level is +1 dim
  975. if ndim != 0:
  976. ndim += 1
  977. # scalar
  978. if ndim == 0:
  979. return Series(result)
  980. # same as self
  981. elif self.ndim == ndim:
  982. # return the construction dictionary for these axes
  983. if axes is None:
  984. return self._constructor(result)
  985. return self._constructor(result, **self._construct_axes_dict())
  986. # sliced
  987. elif self.ndim == ndim + 1:
  988. if axes is None:
  989. return self._constructor_sliced(result)
  990. return self._constructor_sliced(
  991. result, **self._extract_axes_for_slice(self, axes))
  992. raise ValueError('invalid _construct_return_type [self->{self}] '
  993. '[result->{result}]'.format(self=self, result=result))
  994. def _wrap_result(self, result, axis):
  995. axis = self._get_axis_name(axis)
  996. axes = self._get_plane_axes(axis)
  997. if result.ndim == 2 and axis != self._info_axis_name:
  998. result = result.T
  999. return self._construct_return_type(result, axes)
  1000. @Substitution(**_shared_doc_kwargs)
  1001. @Appender(NDFrame.reindex.__doc__)
  1002. def reindex(self, *args, **kwargs):
  1003. major = kwargs.pop("major", None)
  1004. minor = kwargs.pop('minor', None)
  1005. if major is not None:
  1006. if kwargs.get("major_axis"):
  1007. raise TypeError("Cannot specify both 'major' and 'major_axis'")
  1008. kwargs['major_axis'] = major
  1009. if minor is not None:
  1010. if kwargs.get("minor_axis"):
  1011. raise TypeError("Cannot specify both 'minor' and 'minor_axis'")
  1012. kwargs['minor_axis'] = minor
  1013. axes = validate_axis_style_args(self, args, kwargs, 'labels',
  1014. 'reindex')
  1015. kwargs.update(axes)
  1016. kwargs.pop('axis', None)
  1017. kwargs.pop('labels', None)
  1018. with warnings.catch_warnings():
  1019. warnings.simplefilter("ignore", FutureWarning)
  1020. # do not warn about constructing Panel when reindexing
  1021. result = super(Panel, self).reindex(**kwargs)
  1022. return result
  1023. @Substitution(**_shared_doc_kwargs)
  1024. @Appender(NDFrame.rename.__doc__)
  1025. def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs):
  1026. major_axis = (major_axis if major_axis is not None else
  1027. kwargs.pop('major', None))
  1028. minor_axis = (minor_axis if minor_axis is not None else
  1029. kwargs.pop('minor', None))
  1030. return super(Panel, self).rename(items=items, major_axis=major_axis,
  1031. minor_axis=minor_axis, **kwargs)
  1032. @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
  1033. def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
  1034. limit=None, fill_value=np.nan):
  1035. return super(Panel, self).reindex_axis(labels=labels, axis=axis,
  1036. method=method, level=level,
  1037. copy=copy, limit=limit,
  1038. fill_value=fill_value)
  1039. @Substitution(**_shared_doc_kwargs)
  1040. @Appender(NDFrame.transpose.__doc__)
  1041. def transpose(self, *args, **kwargs):
  1042. # check if a list of axes was passed in instead as a
  1043. # single *args element
  1044. if (len(args) == 1 and hasattr(args[0], '__iter__') and
  1045. not is_string_like(args[0])):
  1046. axes = args[0]
  1047. else:
  1048. axes = args
  1049. if 'axes' in kwargs and axes:
  1050. raise TypeError("transpose() got multiple values for "
  1051. "keyword argument 'axes'")
  1052. elif not axes:
  1053. axes = kwargs.pop('axes', ())
  1054. return super(Panel, self).transpose(*axes, **kwargs)
  1055. @Substitution(**_shared_doc_kwargs)
  1056. @Appender(NDFrame.fillna.__doc__)
  1057. def fillna(self, value=None, method=None, axis=None, inplace=False,
  1058. limit=None, downcast=None, **kwargs):
  1059. return super(Panel, self).fillna(value=value, method=method, axis=axis,
  1060. inplace=inplace, limit=limit,
  1061. downcast=downcast, **kwargs)
  1062. def count(self, axis='major'):
  1063. """
  1064. Return number of observations over requested axis.
  1065. Parameters
  1066. ----------
  1067. axis : {'items', 'major', 'minor'} or {0, 1, 2}
  1068. Returns
  1069. -------
  1070. count : DataFrame
  1071. """
  1072. i = self._get_axis_number(axis)
  1073. values = self.values
  1074. mask = np.isfinite(values)
  1075. result = mask.sum(axis=i, dtype='int64')
  1076. return self._wrap_result(result, axis)
  1077. def shift(self, periods=1, freq=None, axis='major'):
  1078. """
  1079. Shift index by desired number of periods with an optional time freq.
  1080. The shifted data will not include the dropped periods and the
  1081. shifted axis will be smaller than the original. This is different
  1082. from the behavior of DataFrame.shift()
  1083. Parameters
  1084. ----------
  1085. periods : int
  1086. Number of periods to move, can be positive or negative
  1087. freq : DateOffset, timedelta, or time rule string, optional
  1088. axis : {'items', 'major', 'minor'} or {0, 1, 2}
  1089. Returns
  1090. -------
  1091. shifted : Panel
  1092. """
  1093. if freq:
  1094. return self.tshift(periods, freq, axis=axis)
  1095. return super(Panel, self).slice_shift(periods, axis=axis)
  1096. def tshift(self, periods=1, freq=None, axis='major'):
  1097. return super(Panel, self).tshift(periods, freq, axis)
  1098. def join(self, other, how='left', lsuffix='', rsuffix=''):
  1099. """
  1100. Join items with other Panel either on major and minor axes column.
  1101. Parameters
  1102. ----------
  1103. other : Panel or list of Panels
  1104. Index should be similar to one of the columns in this one
  1105. how : {'left', 'right', 'outer', 'inner'}
  1106. How to handle indexes of the two objects. Default: 'left'
  1107. for joining on index, None otherwise
  1108. * left: use calling frame's index
  1109. * right: use input frame's index
  1110. * outer: form union of indexes
  1111. * inner: use intersection of indexes
  1112. lsuffix : string
  1113. Suffix to use from left frame's overlapping columns
  1114. rsuffix : string
  1115. Suffix to use from right frame's overlapping columns
  1116. Returns
  1117. -------
  1118. joined : Panel
  1119. """
  1120. from pandas.core.reshape.concat import concat
  1121. if isinstance(other, Panel):
  1122. join_major, join_minor = self._get_join_index(other, how)
  1123. this = self.reindex(major=join_major, minor=join_minor)
  1124. other = other.reindex(major=join_major, minor=join_minor)
  1125. merged_data = this._data.merge(other._data, lsuffix, rsuffix)
  1126. return self._constructor(merged_data)
  1127. else:
  1128. if lsuffix or rsuffix:
  1129. raise ValueError('Suffixes not supported when passing '
  1130. 'multiple panels')
  1131. if how == 'left':
  1132. how = 'outer'
  1133. join_axes = [self.major_axis, self.minor_axis]
  1134. elif how == 'right':
  1135. raise ValueError('Right join not supported with multiple '
  1136. 'panels')
  1137. else:
  1138. join_axes = None
  1139. return concat([self] + list(other), axis=0, join=how,
  1140. join_axes=join_axes, verify_integrity=True)
  1141. @deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors',
  1142. mapping={False: 'ignore', True: 'raise'})
  1143. def update(self, other, join='left', overwrite=True, filter_func=None,
  1144. errors='ignore'):
  1145. """
  1146. Modify Panel in place using non-NA values from other Panel.
  1147. May also use object coercible to Panel. Will align on items.
  1148. Parameters
  1149. ----------
  1150. other : Panel, or object coercible to Panel
  1151. The object from which the caller will be udpated.
  1152. join : {'left', 'right', 'outer', 'inner'}, default 'left'
  1153. How individual DataFrames are joined.
  1154. overwrite : bool, default True
  1155. If True then overwrite values for common keys in the calling Panel.
  1156. filter_func : callable(1d-array) -> 1d-array<bool>, default None
  1157. Can choose to replace values other than NA. Return True for values
  1158. that should be updated.
  1159. errors : {'raise', 'ignore'}, default 'ignore'
  1160. If 'raise', will raise an error if a DataFrame and other both.
  1161. .. versionchanged :: 0.24.0
  1162. Changed from `raise_conflict=False|True`
  1163. to `errors='ignore'|'raise'`.
  1164. See Also
  1165. --------
  1166. DataFrame.update : Similar method for DataFrames.
  1167. dict.update : Similar method for dictionaries.

Large files files are truncated, but you can click here to view the full file