PageRenderTime 67ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/core/internals.py

http://github.com/wesm/pandas
Python | 5218 lines | 4262 code | 469 blank | 487 comment | 452 complexity | 791ba33868e7ff745da90d2b43722e0a MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. import copy
  2. import itertools
  3. import re
  4. import operator
  5. from datetime import datetime, timedelta, date
  6. from collections import defaultdict
  7. import numpy as np
  8. from numpy import percentile as _quantile
  9. from pandas.core.base import PandasObject
  10. from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
  11. from pandas.types.common import (_TD_DTYPE, _NS_DTYPE,
  12. _ensure_int64, _ensure_platform_int,
  13. is_integer,
  14. is_dtype_equal,
  15. is_timedelta64_dtype,
  16. is_datetime64_dtype, is_datetimetz, is_sparse,
  17. is_categorical, is_categorical_dtype,
  18. is_integer_dtype,
  19. is_datetime64tz_dtype,
  20. is_object_dtype,
  21. is_datetimelike_v_numeric,
  22. is_numeric_v_string_like, is_extension_type,
  23. is_list_like,
  24. is_re,
  25. is_re_compilable,
  26. is_scalar,
  27. _get_dtype)
  28. from pandas.types.cast import (_possibly_downcast_to_dtype,
  29. _maybe_convert_string_to_object,
  30. _maybe_upcast,
  31. _maybe_convert_scalar, _maybe_promote,
  32. _infer_dtype_from_scalar,
  33. _soft_convert_objects,
  34. _possibly_convert_objects,
  35. _astype_nansafe,
  36. _find_common_type)
  37. from pandas.types.missing import (isnull, array_equivalent,
  38. _is_na_compat,
  39. is_null_datelike_scalar)
  40. import pandas.types.concat as _concat
  41. from pandas.types.generic import ABCSeries
  42. from pandas.core.common import is_null_slice
  43. import pandas.core.algorithms as algos
  44. from pandas.core.index import Index, MultiIndex, _ensure_index
  45. from pandas.core.indexing import maybe_convert_indices, length_of_indexer
  46. from pandas.core.categorical import Categorical, maybe_to_categorical
  47. from pandas.tseries.index import DatetimeIndex
  48. from pandas.formats.printing import pprint_thing
  49. import pandas.core.missing as missing
  50. from pandas.sparse.array import _maybe_to_sparse, SparseArray
  51. import pandas.lib as lib
  52. import pandas.tslib as tslib
  53. import pandas.computation.expressions as expressions
  54. from pandas.util.decorators import cache_readonly
  55. from pandas.tslib import Timedelta
  56. from pandas import compat, _np_version_under1p9
  57. from pandas.compat import range, map, zip, u
  58. from pandas.lib import BlockPlacement
  59. class Block(PandasObject):
  60. """
  61. Canonical n-dimensional unit of homogeneous dtype contained in a pandas
  62. data structure
  63. Index-ignorant; let the container take care of that
  64. """
  65. __slots__ = ['_mgr_locs', 'values', 'ndim']
  66. is_numeric = False
  67. is_float = False
  68. is_integer = False
  69. is_complex = False
  70. is_datetime = False
  71. is_datetimetz = False
  72. is_timedelta = False
  73. is_bool = False
  74. is_object = False
  75. is_categorical = False
  76. is_sparse = False
  77. _box_to_block_values = True
  78. _can_hold_na = False
  79. _downcast_dtype = None
  80. _can_consolidate = True
  81. _verify_integrity = True
  82. _validate_ndim = True
  83. _ftype = 'dense'
  84. _holder = None
  85. def __init__(self, values, placement, ndim=None, fastpath=False):
  86. if ndim is None:
  87. ndim = values.ndim
  88. elif values.ndim != ndim:
  89. raise ValueError('Wrong number of dimensions')
  90. self.ndim = ndim
  91. self.mgr_locs = placement
  92. self.values = values
  93. if ndim and len(self.mgr_locs) != len(self.values):
  94. raise ValueError('Wrong number of items passed %d, placement '
  95. 'implies %d' % (len(self.values),
  96. len(self.mgr_locs)))
  97. @property
  98. def _consolidate_key(self):
  99. return (self._can_consolidate, self.dtype.name)
  100. @property
  101. def _is_single_block(self):
  102. return self.ndim == 1
  103. @property
  104. def is_view(self):
  105. """ return a boolean if I am possibly a view """
  106. return self.values.base is not None
  107. @property
  108. def is_datelike(self):
  109. """ return True if I am a non-datelike """
  110. return self.is_datetime or self.is_timedelta
  111. def is_categorical_astype(self, dtype):
  112. """
  113. validate that we have a astypeable to categorical,
  114. returns a boolean if we are a categorical
  115. """
  116. if is_categorical_dtype(dtype):
  117. if dtype == CategoricalDtype():
  118. return True
  119. # this is a pd.Categorical, but is not
  120. # a valid type for astypeing
  121. raise TypeError("invalid type {0} for astype".format(dtype))
  122. return False
  123. def external_values(self, dtype=None):
  124. """ return an outside world format, currently just the ndarray """
  125. return self.values
  126. def internal_values(self, dtype=None):
  127. """ return an internal format, currently just the ndarray
  128. this should be the pure internal API format
  129. """
  130. return self.values
  131. def get_values(self, dtype=None):
  132. """
  133. return an internal format, currently just the ndarray
  134. this is often overriden to handle to_dense like operations
  135. """
  136. if is_object_dtype(dtype):
  137. return self.values.astype(object)
  138. return self.values
  139. def to_dense(self):
  140. return self.values.view()
  141. def to_object_block(self, mgr):
  142. """ return myself as an object block """
  143. values = self.get_values(dtype=object)
  144. return self.make_block(values, klass=ObjectBlock)
  145. @property
  146. def _na_value(self):
  147. return np.nan
  148. @property
  149. def fill_value(self):
  150. return np.nan
  151. @property
  152. def mgr_locs(self):
  153. return self._mgr_locs
  154. @property
  155. def array_dtype(self):
  156. """ the dtype to return if I want to construct this block as an
  157. array
  158. """
  159. return self.dtype
  160. def make_block(self, values, placement=None, ndim=None, **kwargs):
  161. """
  162. Create a new block, with type inference propagate any values that are
  163. not specified
  164. """
  165. if placement is None:
  166. placement = self.mgr_locs
  167. if ndim is None:
  168. ndim = self.ndim
  169. return make_block(values, placement=placement, ndim=ndim, **kwargs)
  170. def make_block_scalar(self, values, **kwargs):
  171. """
  172. Create a ScalarBlock
  173. """
  174. return ScalarBlock(values)
  175. def make_block_same_class(self, values, placement=None, fastpath=True,
  176. **kwargs):
  177. """ Wrap given values in a block of same type as self. """
  178. if placement is None:
  179. placement = self.mgr_locs
  180. return make_block(values, placement=placement, klass=self.__class__,
  181. fastpath=fastpath, **kwargs)
  182. @mgr_locs.setter
  183. def mgr_locs(self, new_mgr_locs):
  184. if not isinstance(new_mgr_locs, BlockPlacement):
  185. new_mgr_locs = BlockPlacement(new_mgr_locs)
  186. self._mgr_locs = new_mgr_locs
  187. def __unicode__(self):
  188. # don't want to print out all of the items here
  189. name = pprint_thing(self.__class__.__name__)
  190. if self._is_single_block:
  191. result = '%s: %s dtype: %s' % (name, len(self), self.dtype)
  192. else:
  193. shape = ' x '.join([pprint_thing(s) for s in self.shape])
  194. result = '%s: %s, %s, dtype: %s' % (name, pprint_thing(
  195. self.mgr_locs.indexer), shape, self.dtype)
  196. return result
  197. def __len__(self):
  198. return len(self.values)
  199. def __getstate__(self):
  200. return self.mgr_locs.indexer, self.values
  201. def __setstate__(self, state):
  202. self.mgr_locs = BlockPlacement(state[0])
  203. self.values = state[1]
  204. self.ndim = self.values.ndim
  205. def _slice(self, slicer):
  206. """ return a slice of my values """
  207. return self.values[slicer]
  208. def reshape_nd(self, labels, shape, ref_items, mgr=None):
  209. """
  210. Parameters
  211. ----------
  212. labels : list of new axis labels
  213. shape : new shape
  214. ref_items : new ref_items
  215. return a new block that is transformed to a nd block
  216. """
  217. return _block2d_to_blocknd(values=self.get_values().T,
  218. placement=self.mgr_locs, shape=shape,
  219. labels=labels, ref_items=ref_items)
  220. def getitem_block(self, slicer, new_mgr_locs=None):
  221. """
  222. Perform __getitem__-like, return result as block.
  223. As of now, only supports slices that preserve dimensionality.
  224. """
  225. if new_mgr_locs is None:
  226. if isinstance(slicer, tuple):
  227. axis0_slicer = slicer[0]
  228. else:
  229. axis0_slicer = slicer
  230. new_mgr_locs = self.mgr_locs[axis0_slicer]
  231. new_values = self._slice(slicer)
  232. if self._validate_ndim and new_values.ndim != self.ndim:
  233. raise ValueError("Only same dim slicing is allowed")
  234. return self.make_block_same_class(new_values, new_mgr_locs)
  235. @property
  236. def shape(self):
  237. return self.values.shape
  238. @property
  239. def itemsize(self):
  240. return self.values.itemsize
  241. @property
  242. def dtype(self):
  243. return self.values.dtype
  244. @property
  245. def ftype(self):
  246. return "%s:%s" % (self.dtype, self._ftype)
  247. def merge(self, other):
  248. return _merge_blocks([self, other])
  249. def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
  250. limit=None, mask_info=None):
  251. """
  252. Reindex using pre-computed indexer information
  253. """
  254. if axis < 1:
  255. raise AssertionError('axis must be at least 1, got %d' % axis)
  256. if fill_value is None:
  257. fill_value = self.fill_value
  258. new_values = algos.take_nd(self.values, indexer, axis,
  259. fill_value=fill_value, mask_info=mask_info)
  260. return self.make_block(new_values, fastpath=True)
  261. def get(self, item):
  262. loc = self.items.get_loc(item)
  263. return self.values[loc]
  264. def iget(self, i):
  265. return self.values[i]
  266. def set(self, locs, values, check=False):
  267. """
  268. Modify Block in-place with new item value
  269. Returns
  270. -------
  271. None
  272. """
  273. self.values[locs] = values
  274. def delete(self, loc):
  275. """
  276. Delete given loc(-s) from block in-place.
  277. """
  278. self.values = np.delete(self.values, loc, 0)
  279. self.mgr_locs = self.mgr_locs.delete(loc)
  280. def apply(self, func, mgr=None, **kwargs):
  281. """ apply the function to my values; return a block if we are not
  282. one
  283. """
  284. result = func(self.values, **kwargs)
  285. if not isinstance(result, Block):
  286. result = self.make_block(values=_block_shape(result,
  287. ndim=self.ndim))
  288. return result
  289. def fillna(self, value, limit=None, inplace=False, downcast=None,
  290. mgr=None):
  291. """ fillna on the block with the value. If we fail, then convert to
  292. ObjectBlock and try again
  293. """
  294. if not self._can_hold_na:
  295. if inplace:
  296. return self
  297. else:
  298. return self.copy()
  299. original_value = value
  300. mask = isnull(self.values)
  301. if limit is not None:
  302. if self.ndim > 2:
  303. raise NotImplementedError("number of dimensions for 'fillna' "
  304. "is currently limited to 2")
  305. mask[mask.cumsum(self.ndim - 1) > limit] = False
  306. # fillna, but if we cannot coerce, then try again as an ObjectBlock
  307. try:
  308. values, _, value, _ = self._try_coerce_args(self.values, value)
  309. blocks = self.putmask(mask, value, inplace=inplace)
  310. blocks = [b.make_block(values=self._try_coerce_result(b.values))
  311. for b in blocks]
  312. return self._maybe_downcast(blocks, downcast)
  313. except (TypeError, ValueError):
  314. # we can't process the value, but nothing to do
  315. if not mask.any():
  316. return self if inplace else self.copy()
  317. # we cannot coerce the underlying object, so
  318. # make an ObjectBlock
  319. return self.to_object_block(mgr=mgr).fillna(original_value,
  320. limit=limit,
  321. inplace=inplace,
  322. downcast=False)
  323. def _maybe_downcast(self, blocks, downcast=None):
  324. # no need to downcast our float
  325. # unless indicated
  326. if downcast is None and self.is_float:
  327. return blocks
  328. elif downcast is None and (self.is_timedelta or self.is_datetime):
  329. return blocks
  330. return _extend_blocks([b.downcast(downcast) for b in blocks])
  331. def downcast(self, dtypes=None, mgr=None):
  332. """ try to downcast each item to the dict of dtypes if present """
  333. # turn it off completely
  334. if dtypes is False:
  335. return self
  336. values = self.values
  337. # single block handling
  338. if self._is_single_block:
  339. # try to cast all non-floats here
  340. if dtypes is None:
  341. dtypes = 'infer'
  342. nv = _possibly_downcast_to_dtype(values, dtypes)
  343. return self.make_block(nv, fastpath=True)
  344. # ndim > 1
  345. if dtypes is None:
  346. return self
  347. if not (dtypes == 'infer' or isinstance(dtypes, dict)):
  348. raise ValueError("downcast must have a dictionary or 'infer' as "
  349. "its argument")
  350. # item-by-item
  351. # this is expensive as it splits the blocks items-by-item
  352. blocks = []
  353. for i, rl in enumerate(self.mgr_locs):
  354. if dtypes == 'infer':
  355. dtype = 'infer'
  356. else:
  357. raise AssertionError("dtypes as dict is not supported yet")
  358. # TODO: This either should be completed or removed
  359. dtype = dtypes.get(item, self._downcast_dtype) # noqa
  360. if dtype is None:
  361. nv = _block_shape(values[i], ndim=self.ndim)
  362. else:
  363. nv = _possibly_downcast_to_dtype(values[i], dtype)
  364. nv = _block_shape(nv, ndim=self.ndim)
  365. blocks.append(self.make_block(nv, fastpath=True, placement=[rl]))
  366. return blocks
  367. def astype(self, dtype, copy=False, raise_on_error=True, values=None,
  368. **kwargs):
  369. return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
  370. values=values, **kwargs)
  371. def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
  372. klass=None, mgr=None, **kwargs):
  373. """
  374. Coerce to the new type (if copy=True, return a new copy)
  375. raise on an except if raise == True
  376. """
  377. # may need to convert to categorical
  378. # this is only called for non-categoricals
  379. if self.is_categorical_astype(dtype):
  380. return self.make_block(Categorical(self.values, **kwargs))
  381. # astype processing
  382. dtype = np.dtype(dtype)
  383. if self.dtype == dtype:
  384. if copy:
  385. return self.copy()
  386. return self
  387. if klass is None:
  388. if dtype == np.object_:
  389. klass = ObjectBlock
  390. try:
  391. # force the copy here
  392. if values is None:
  393. if issubclass(dtype.type,
  394. (compat.text_type, compat.string_types)):
  395. # use native type formatting for datetime/tz/timedelta
  396. if self.is_datelike:
  397. values = self.to_native_types()
  398. # astype formatting
  399. else:
  400. values = self.values
  401. else:
  402. values = self.get_values(dtype=dtype)
  403. # _astype_nansafe works fine with 1-d only
  404. values = _astype_nansafe(values.ravel(), dtype, copy=True)
  405. values = values.reshape(self.shape)
  406. newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
  407. klass=klass)
  408. except:
  409. if raise_on_error is True:
  410. raise
  411. newb = self.copy() if copy else self
  412. if newb.is_numeric and self.is_numeric:
  413. if newb.shape != self.shape:
  414. raise TypeError("cannot set astype for copy = [%s] for dtype "
  415. "(%s [%s]) with smaller itemsize that current "
  416. "(%s [%s])" % (copy, self.dtype.name,
  417. self.itemsize, newb.dtype.name,
  418. newb.itemsize))
  419. return newb
  420. def convert(self, copy=True, **kwargs):
  421. """ attempt to coerce any object types to better types return a copy
  422. of the block (if copy = True) by definition we are not an ObjectBlock
  423. here!
  424. """
  425. return self.copy() if copy else self
  426. def _can_hold_element(self, value):
  427. raise NotImplementedError()
  428. def _try_cast(self, value):
  429. raise NotImplementedError()
  430. def _try_cast_result(self, result, dtype=None):
  431. """ try to cast the result to our original type, we may have
  432. roundtripped thru object in the mean-time
  433. """
  434. if dtype is None:
  435. dtype = self.dtype
  436. if self.is_integer or self.is_bool or self.is_datetime:
  437. pass
  438. elif self.is_float and result.dtype == self.dtype:
  439. # protect against a bool/object showing up here
  440. if isinstance(dtype, compat.string_types) and dtype == 'infer':
  441. return result
  442. if not isinstance(dtype, type):
  443. dtype = dtype.type
  444. if issubclass(dtype, (np.bool_, np.object_)):
  445. if issubclass(dtype, np.bool_):
  446. if isnull(result).all():
  447. return result.astype(np.bool_)
  448. else:
  449. result = result.astype(np.object_)
  450. result[result == 1] = True
  451. result[result == 0] = False
  452. return result
  453. else:
  454. return result.astype(np.object_)
  455. return result
  456. # may need to change the dtype here
  457. return _possibly_downcast_to_dtype(result, dtype)
  458. def _try_operate(self, values):
  459. """ return a version to operate on as the input """
  460. return values
  461. def _try_coerce_args(self, values, other):
  462. """ provide coercion to our input arguments """
  463. return values, False, other, False
  464. def _try_coerce_result(self, result):
  465. """ reverse of try_coerce_args """
  466. return result
  467. def _try_coerce_and_cast_result(self, result, dtype=None):
  468. result = self._try_coerce_result(result)
  469. result = self._try_cast_result(result, dtype=dtype)
  470. return result
  471. def _try_fill(self, value):
  472. return value
  473. def to_native_types(self, slicer=None, na_rep='nan', quoting=None,
  474. **kwargs):
  475. """ convert to our native types format, slicing if desired """
  476. values = self.values
  477. if slicer is not None:
  478. values = values[:, slicer]
  479. mask = isnull(values)
  480. if not self.is_object and not quoting:
  481. values = values.astype(str)
  482. else:
  483. values = np.array(values, dtype='object')
  484. values[mask] = na_rep
  485. return values
  486. # block actions ####
  487. def copy(self, deep=True, mgr=None):
  488. """ copy constructor """
  489. values = self.values
  490. if deep:
  491. values = values.copy()
  492. return self.make_block_same_class(values)
  493. def replace(self, to_replace, value, inplace=False, filter=None,
  494. regex=False, convert=True, mgr=None):
  495. """ replace the to_replace value with value, possible to create new
  496. blocks here this is just a call to putmask. regex is not used here.
  497. It is used in ObjectBlocks. It is here for API
  498. compatibility.
  499. """
  500. original_to_replace = to_replace
  501. mask = isnull(self.values)
  502. # try to replace, if we raise an error, convert to ObjectBlock and
  503. # retry
  504. try:
  505. values, _, to_replace, _ = self._try_coerce_args(self.values,
  506. to_replace)
  507. mask = missing.mask_missing(values, to_replace)
  508. if filter is not None:
  509. filtered_out = ~self.mgr_locs.isin(filter)
  510. mask[filtered_out.nonzero()[0]] = False
  511. blocks = self.putmask(mask, value, inplace=inplace)
  512. if convert:
  513. blocks = [b.convert(by_item=True, numeric=False,
  514. copy=not inplace) for b in blocks]
  515. return blocks
  516. except (TypeError, ValueError):
  517. # we can't process the value, but nothing to do
  518. if not mask.any():
  519. return self if inplace else self.copy()
  520. return self.to_object_block(mgr=mgr).replace(
  521. to_replace=original_to_replace, value=value, inplace=inplace,
  522. filter=filter, regex=regex, convert=convert)
  523. def _replace_single(self, *args, **kwargs):
  524. """ no-op on a non-ObjectBlock """
  525. return self if kwargs['inplace'] else self.copy()
  526. def setitem(self, indexer, value, mgr=None):
  527. """ set the value inplace; return a new block (of a possibly different
  528. dtype)
  529. indexer is a direct slice/positional indexer; value must be a
  530. compatible shape
  531. """
  532. # coerce None values, if appropriate
  533. if value is None:
  534. if self.is_numeric:
  535. value = np.nan
  536. # coerce args
  537. values, _, value, _ = self._try_coerce_args(self.values, value)
  538. arr_value = np.array(value)
  539. # cast the values to a type that can hold nan (if necessary)
  540. if not self._can_hold_element(value):
  541. dtype, _ = _maybe_promote(arr_value.dtype)
  542. values = values.astype(dtype)
  543. transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
  544. values = transf(values)
  545. l = len(values)
  546. # length checking
  547. # boolean with truth values == len of the value is ok too
  548. if isinstance(indexer, (np.ndarray, list)):
  549. if is_list_like(value) and len(indexer) != len(value):
  550. if not (isinstance(indexer, np.ndarray) and
  551. indexer.dtype == np.bool_ and
  552. len(indexer[indexer]) == len(value)):
  553. raise ValueError("cannot set using a list-like indexer "
  554. "with a different length than the value")
  555. # slice
  556. elif isinstance(indexer, slice):
  557. if is_list_like(value) and l:
  558. if len(value) != length_of_indexer(indexer, values):
  559. raise ValueError("cannot set using a slice indexer with a "
  560. "different length than the value")
  561. try:
  562. def _is_scalar_indexer(indexer):
  563. # return True if we are all scalar indexers
  564. if arr_value.ndim == 1:
  565. if not isinstance(indexer, tuple):
  566. indexer = tuple([indexer])
  567. return all([is_scalar(idx) for idx in indexer])
  568. return False
  569. def _is_empty_indexer(indexer):
  570. # return a boolean if we have an empty indexer
  571. if arr_value.ndim == 1:
  572. if not isinstance(indexer, tuple):
  573. indexer = tuple([indexer])
  574. return any(isinstance(idx, np.ndarray) and len(idx) == 0
  575. for idx in indexer)
  576. return False
  577. # empty indexers
  578. # 8669 (empty)
  579. if _is_empty_indexer(indexer):
  580. pass
  581. # setting a single element for each dim and with a rhs that could
  582. # be say a list
  583. # GH 6043
  584. elif _is_scalar_indexer(indexer):
  585. values[indexer] = value
  586. # if we are an exact match (ex-broadcasting),
  587. # then use the resultant dtype
  588. elif (len(arr_value.shape) and
  589. arr_value.shape[0] == values.shape[0] and
  590. np.prod(arr_value.shape) == np.prod(values.shape)):
  591. values[indexer] = value
  592. values = values.astype(arr_value.dtype)
  593. # set
  594. else:
  595. values[indexer] = value
  596. # coerce and try to infer the dtypes of the result
  597. if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
  598. value.dtype):
  599. dtype = value.dtype
  600. elif is_scalar(value):
  601. dtype, _ = _infer_dtype_from_scalar(value)
  602. else:
  603. dtype = 'infer'
  604. values = self._try_coerce_and_cast_result(values, dtype)
  605. block = self.make_block(transf(values), fastpath=True)
  606. # may have to soft convert_objects here
  607. if block.is_object and not self.is_object:
  608. block = block.convert(numeric=False)
  609. return block
  610. except ValueError:
  611. raise
  612. except TypeError:
  613. # cast to the passed dtype if possible
  614. # otherwise raise the original error
  615. try:
  616. # e.g. we are uint32 and our value is uint64
  617. # this is for compat with older numpies
  618. block = self.make_block(transf(values.astype(value.dtype)))
  619. return block.setitem(indexer=indexer, value=value, mgr=mgr)
  620. except:
  621. pass
  622. raise
  623. except Exception:
  624. pass
  625. return [self]
  626. def putmask(self, mask, new, align=True, inplace=False, axis=0,
  627. transpose=False, mgr=None):
  628. """ putmask the data to the block; it is possible that we may create a
  629. new dtype of block
  630. return the resulting block(s)
  631. Parameters
  632. ----------
  633. mask : the condition to respect
  634. new : a ndarray/object
  635. align : boolean, perform alignment on other/cond, default is True
  636. inplace : perform inplace modification, default is False
  637. axis : int
  638. transpose : boolean
  639. Set to True if self is stored with axes reversed
  640. Returns
  641. -------
  642. a list of new blocks, the result of the putmask
  643. """
  644. new_values = self.values if inplace else self.values.copy()
  645. if hasattr(new, 'reindex_axis'):
  646. new = new.values
  647. if hasattr(mask, 'reindex_axis'):
  648. mask = mask.values
  649. # if we are passed a scalar None, convert it here
  650. if not is_list_like(new) and isnull(new) and not self.is_object:
  651. new = self.fill_value
  652. if self._can_hold_element(new):
  653. if transpose:
  654. new_values = new_values.T
  655. new = self._try_cast(new)
  656. # If the default repeat behavior in np.putmask would go in the
  657. # wrong direction, then explictly repeat and reshape new instead
  658. if getattr(new, 'ndim', 0) >= 1:
  659. if self.ndim - 1 == new.ndim and axis == 1:
  660. new = np.repeat(
  661. new, new_values.shape[-1]).reshape(self.shape)
  662. new = new.astype(new_values.dtype)
  663. np.putmask(new_values, mask, new)
  664. # maybe upcast me
  665. elif mask.any():
  666. if transpose:
  667. mask = mask.T
  668. if isinstance(new, np.ndarray):
  669. new = new.T
  670. axis = new_values.ndim - axis - 1
  671. # Pseudo-broadcast
  672. if getattr(new, 'ndim', 0) >= 1:
  673. if self.ndim - 1 == new.ndim:
  674. new_shape = list(new.shape)
  675. new_shape.insert(axis, 1)
  676. new = new.reshape(tuple(new_shape))
  677. # need to go column by column
  678. new_blocks = []
  679. if self.ndim > 1:
  680. for i, ref_loc in enumerate(self.mgr_locs):
  681. m = mask[i]
  682. v = new_values[i]
  683. # need a new block
  684. if m.any():
  685. if isinstance(new, np.ndarray):
  686. n = np.squeeze(new[i % new.shape[0]])
  687. else:
  688. n = np.array(new)
  689. # type of the new block
  690. dtype, _ = _maybe_promote(n.dtype)
  691. # we need to explicitly astype here to make a copy
  692. n = n.astype(dtype)
  693. nv = _putmask_smart(v, m, n)
  694. else:
  695. nv = v if inplace else v.copy()
  696. # Put back the dimension that was taken from it and make
  697. # a block out of the result.
  698. block = self.make_block(values=nv[np.newaxis],
  699. placement=[ref_loc], fastpath=True)
  700. new_blocks.append(block)
  701. else:
  702. nv = _putmask_smart(new_values, mask, new)
  703. new_blocks.append(self.make_block(values=nv, fastpath=True))
  704. return new_blocks
  705. if inplace:
  706. return [self]
  707. if transpose:
  708. new_values = new_values.T
  709. return [self.make_block(new_values, fastpath=True)]
  710. def interpolate(self, method='pad', axis=0, index=None, values=None,
  711. inplace=False, limit=None, limit_direction='forward',
  712. fill_value=None, coerce=False, downcast=None, mgr=None,
  713. **kwargs):
  714. def check_int_bool(self, inplace):
  715. # Only FloatBlocks will contain NaNs.
  716. # timedelta subclasses IntBlock
  717. if (self.is_bool or self.is_integer) and not self.is_timedelta:
  718. if inplace:
  719. return self
  720. else:
  721. return self.copy()
  722. # a fill na type method
  723. try:
  724. m = missing.clean_fill_method(method)
  725. except:
  726. m = None
  727. if m is not None:
  728. r = check_int_bool(self, inplace)
  729. if r is not None:
  730. return r
  731. return self._interpolate_with_fill(method=m, axis=axis,
  732. inplace=inplace, limit=limit,
  733. fill_value=fill_value,
  734. coerce=coerce,
  735. downcast=downcast, mgr=mgr)
  736. # try an interp method
  737. try:
  738. m = missing.clean_interp_method(method, **kwargs)
  739. except:
  740. m = None
  741. if m is not None:
  742. r = check_int_bool(self, inplace)
  743. if r is not None:
  744. return r
  745. return self._interpolate(method=m, index=index, values=values,
  746. axis=axis, limit=limit,
  747. limit_direction=limit_direction,
  748. fill_value=fill_value, inplace=inplace,
  749. downcast=downcast, mgr=mgr, **kwargs)
  750. raise ValueError("invalid method '{0}' to interpolate.".format(method))
  751. def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
  752. limit=None, fill_value=None, coerce=False,
  753. downcast=None, mgr=None):
  754. """ fillna but using the interpolate machinery """
  755. # if we are coercing, then don't force the conversion
  756. # if the block can't hold the type
  757. if coerce:
  758. if not self._can_hold_na:
  759. if inplace:
  760. return [self]
  761. else:
  762. return [self.copy()]
  763. values = self.values if inplace else self.values.copy()
  764. values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
  765. values = self._try_operate(values)
  766. values = missing.interpolate_2d(values, method=method, axis=axis,
  767. limit=limit, fill_value=fill_value,
  768. dtype=self.dtype)
  769. values = self._try_coerce_result(values)
  770. blocks = [self.make_block(values, klass=self.__class__, fastpath=True)]
  771. return self._maybe_downcast(blocks, downcast)
  772. def _interpolate(self, method=None, index=None, values=None,
  773. fill_value=None, axis=0, limit=None,
  774. limit_direction='forward', inplace=False, downcast=None,
  775. mgr=None, **kwargs):
  776. """ interpolate using scipy wrappers """
  777. data = self.values if inplace else self.values.copy()
  778. # only deal with floats
  779. if not self.is_float:
  780. if not self.is_integer:
  781. return self
  782. data = data.astype(np.float64)
  783. if fill_value is None:
  784. fill_value = self.fill_value
  785. if method in ('krogh', 'piecewise_polynomial', 'pchip'):
  786. if not index.is_monotonic:
  787. raise ValueError("{0} interpolation requires that the "
  788. "index be monotonic.".format(method))
  789. # process 1-d slices in the axis direction
  790. def func(x):
  791. # process a 1-d slice, returning it
  792. # should the axis argument be handled below in apply_along_axis?
  793. # i.e. not an arg to missing.interpolate_1d
  794. return missing.interpolate_1d(index, x, method=method, limit=limit,
  795. limit_direction=limit_direction,
  796. fill_value=fill_value,
  797. bounds_error=False, **kwargs)
  798. # interp each column independently
  799. interp_values = np.apply_along_axis(func, axis, data)
  800. blocks = [self.make_block(interp_values, klass=self.__class__,
  801. fastpath=True)]
  802. return self._maybe_downcast(blocks, downcast)
  803. def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
  804. """
  805. Take values according to indexer and return them as a block.bb
  806. """
  807. # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
  808. # so need to preserve types
  809. # sparse is treated like an ndarray, but needs .get_values() shaping
  810. values = self.values
  811. if self.is_sparse:
  812. values = self.get_values()
  813. if fill_tuple is None:
  814. fill_value = self.fill_value
  815. new_values = algos.take_nd(values, indexer, axis=axis,
  816. allow_fill=False)
  817. else:
  818. fill_value = fill_tuple[0]
  819. new_values = algos.take_nd(values, indexer, axis=axis,
  820. allow_fill=True, fill_value=fill_value)
  821. if new_mgr_locs is None:
  822. if axis == 0:
  823. slc = lib.indexer_as_slice(indexer)
  824. if slc is not None:
  825. new_mgr_locs = self.mgr_locs[slc]
  826. else:
  827. new_mgr_locs = self.mgr_locs[indexer]
  828. else:
  829. new_mgr_locs = self.mgr_locs
  830. if not is_dtype_equal(new_values.dtype, self.dtype):
  831. return self.make_block(new_values, new_mgr_locs)
  832. else:
  833. return self.make_block_same_class(new_values, new_mgr_locs)
  834. def diff(self, n, axis=1, mgr=None):
  835. """ return block for the diff of the values """
  836. new_values = algos.diff(self.values, n, axis=axis)
  837. return [self.make_block(values=new_values, fastpath=True)]
  838. def shift(self, periods, axis=0, mgr=None):
  839. """ shift the block by periods, possibly upcast """
  840. # convert integer to float if necessary. need to do a lot more than
  841. # that, handle boolean etc also
  842. new_values, fill_value = _maybe_upcast(self.values)
  843. # make sure array sent to np.roll is c_contiguous
  844. f_ordered = new_values.flags.f_contiguous
  845. if f_ordered:
  846. new_values = new_values.T
  847. axis = new_values.ndim - axis - 1
  848. if np.prod(new_values.shape):
  849. new_values = np.roll(new_values, _ensure_platform_int(periods),
  850. axis=axis)
  851. axis_indexer = [slice(None)] * self.ndim
  852. if periods > 0:
  853. axis_indexer[axis] = slice(None, periods)
  854. else:
  855. axis_indexer[axis] = slice(periods, None)
  856. new_values[tuple(axis_indexer)] = fill_value
  857. # restore original order
  858. if f_ordered:
  859. new_values = new_values.T
  860. return [self.make_block(new_values, fastpath=True)]
  861. def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None):
  862. """
  863. evaluate the block; return result block from the result
  864. Parameters
  865. ----------
  866. func : how to combine self, other
  867. other : a ndarray/object
  868. raise_on_error : if True, raise when I can't perform the function,
  869. False by default (and just return the data that we had coming in)
  870. try_cast : try casting the results to the input type
  871. Returns
  872. -------
  873. a new block, the result of the func
  874. """
  875. values = self.values
  876. if hasattr(other, 'reindex_axis'):
  877. other = other.values
  878. # make sure that we can broadcast
  879. is_transposed = False
  880. if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
  881. if values.ndim != other.ndim:
  882. is_transposed = True
  883. else:
  884. if values.shape == other.shape[::-1]:
  885. is_transposed = True
  886. elif values.shape[0] == other.shape[-1]:
  887. is_transposed = True
  888. else:
  889. # this is a broadcast error heree
  890. raise ValueError("cannot broadcast shape [%s] with block "
  891. "values [%s]" % (values.T.shape,
  892. other.shape))
  893. transf = (lambda x: x.T) if is_transposed else (lambda x: x)
  894. # coerce/transpose the args if needed
  895. values, values_mask, other, other_mask = self._try_coerce_args(
  896. transf(values), other)
  897. # get the result, may need to transpose the other
  898. def get_result(other):
  899. # avoid numpy warning of comparisons again None
  900. if other is None:
  901. result = not func.__name__ == 'eq'
  902. # avoid numpy warning of elementwise comparisons to object
  903. elif is_numeric_v_string_like(values, other):
  904. result = False
  905. else:
  906. result = func(values, other)
  907. # mask if needed
  908. if isinstance(values_mask, np.ndarray) and values_mask.any():
  909. result = result.astype('float64', copy=False)
  910. result[values_mask] = np.nan
  911. if other_mask is True:
  912. result = result.astype('float64', copy=False)
  913. result[:] = np.nan
  914. elif isinstance(other_mask, np.ndarray) and other_mask.any():
  915. result = result.astype('float64', copy=False)
  916. result[other_mask.ravel()] = np.nan
  917. return self._try_coerce_result(result)
  918. # error handler if we have an issue operating with the function
  919. def handle_error():
  920. if raise_on_error:
  921. raise TypeError('Could not operate %s with block values %s' %
  922. (repr(other), str(detail)))
  923. else:
  924. # return the values
  925. result = np.empty(values.shape, dtype='O')
  926. result.fill(np.nan)
  927. return result
  928. # get the result
  929. try:
  930. result = get_result(other)
  931. # if we have an invalid shape/broadcast error
  932. # GH4576, so raise instead of allowing to pass through
  933. except ValueError as detail:
  934. raise
  935. except Exception as detail:
  936. result = handle_error()
  937. # technically a broadcast error in numpy can 'work' by returning a
  938. # boolean False
  939. if not isinstance(result, np.ndarray):
  940. if not isinstance(result, np.ndarray):
  941. # differentiate between an invalid ndarray-ndarray comparison
  942. # and an invalid type comparison
  943. if isinstance(values, np.ndarray) and is_list_like(other):
  944. raise ValueError('Invalid broadcasting comparison [%s] '
  945. 'with block values' % repr(other))
  946. raise TypeError('Could not compare [%s] with block values' %
  947. repr(other))
  948. # transpose if needed
  949. result = transf(result)
  950. # try to cast if requested
  951. if try_cast:
  952. result = self._try_cast_result(result)
  953. return [self.make_block(result, fastpath=True, )]
  954. def where(self, other, cond, align=True, raise_on_error=True,
  955. try_cast=False, axis=0, transpose=False, mgr=None):
  956. """
  957. evaluate the block; return result block(s) from the result
  958. Parameters
  959. ----------
  960. other : a ndarray/object
  961. cond : the condition to respect
  962. align : boolean, perform alignment on other/cond
  963. raise_on_error : if True, raise when I can't perform the function,
  964. False by default (and just return the data that we had coming in)
  965. axis : int
  966. transpose : boolean
  967. Set to True if self is stored with axes reversed
  968. Returns
  969. -------
  970. a new block(s), the result of the func
  971. """
  972. values = self.values
  973. if transpose:
  974. values = values.T
  975. if hasattr(other, 'reindex_axis'):
  976. other = other.values
  977. if hasattr(cond, 'reindex_axis'):
  978. cond = cond.values
  979. # If the default broadcasting would go in the wrong direction, then
  980. # explictly reshape other instead
  981. if getattr(other, 'ndim', 0) >= 1:
  982. if values.ndim - 1 == other.ndim and axis == 1:
  983. other = other.reshape(tuple(other.shape + (1, )))
  984. if not hasattr(cond, 'shape'):
  985. raise ValueError("where must have a condition that is ndarray "
  986. "like")
  987. other = _maybe_convert_string_to_object(other)
  988. other = _maybe_convert_scalar(other)
  989. # our where function
  990. def func(cond, values, other):
  991. if cond.ravel().all():
  992. return values
  993. values, values_mask, other, other_mask = self._try_coerce_args(
  994. values, other)
  995. try:
  996. return self._try_coerce_result(expressions.where(
  997. cond, values, other, raise_on_error=True))
  998. except Exception as detail:
  999. if raise_on_error:
  1000. raise TypeError('Could not operate [%s] with block values '
  1001. '[%s]' % (repr(other), str(detail)))
  1002. else:
  1003. # return the values
  1004. result = np.empty(values.shape, dtype='float64')
  1005. result.fill(np.nan)
  1006. return result
  1007. # see if we can operate on the entire block, or need item-by-item
  1008. # or if we are a single block (ndim == 1)
  1009. result = func(cond, values, other)
  1010. if self._can_hold_na or self.ndim == 1:
  1011. if transpose:
  1012. result = result.T
  1013. # try to cast if requested
  1014. if try_cast:
  1015. result = self._try_cast_result(result)
  1016. return self.make_block(result)
  1017. # might need to separate out blocks
  1018. axis = cond.ndim - 1
  1019. cond = cond.swapaxes(axis, 0)
  1020. mask = np.array([cond[i].all() for i in range(cond.shape[0])],
  1021. dtype=bool)
  1022. result_blocks = []
  1023. for m in [mask, ~mask]:
  1024. if m.any():
  1025. r = self._try_cast_result(result.take(m.nonzero()[0],
  1026. axis=axis))
  1027. result_blocks.append(
  1028. self.make_block(r.T, placement=self.mgr_locs[m]))
  1029. return result_blocks
  1030. def equals(self, other):
  1031. if self.dtype != other.dtype or self.shape != other.shape:
  1032. return False
  1033. return array_equivalent(self.values, other.values)
  1034. def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
  1035. """
  1036. compute the quantiles of the
  1037. Parameters
  1038. ----------
  1039. qs: a scalar or list of the quantiles to be computed
  1040. interpolation: type of interpolation, default 'linear'
  1041. axis: axis to compute, default 0
  1042. Returns
  1043. -------
  1044. tuple of (axis, block)
  1045. """
  1046. if _np_version_under1p9:
  1047. if interpolation != 'linear':
  1048. raise ValueError("Interpolation methods other than linear "
  1049. "are not supported in numpy < 1.9.")
  1050. kw = {}
  1051. if not _np_version_under1p9:
  1052. kw.update({'interpolation': interpolation})
  1053. values = self.get_values()
  1054. values, _, _, _ = self._try_coerce_args(values, values)
  1055. mask = isnull(self.values)
  1056. if not lib.isscalar(mask) and mask.any():
  1057. # even though this could be a 2-d mask it appears
  1058. # as a 1-d result
  1059. mask = mask.reshape(values.shape)
  1060. result_shape = tuple([values.shape[0]] + [-1] * (self.ndim - 1))
  1061. values = _block_shape(values[~mask], ndim=self.ndim)
  1062. if self.ndim > 1:
  1063. values = values.reshape(result_shape)
  1064. from pandas import Float64Index
  1065. is_empty = values.shape[axis] == 0
  1066. if is_list_like(qs):
  1067. ax = Float64Index(qs)
  1068. if is_empty:
  1069. if self.ndim == 1:
  1070. result = self._na_value
  1071. else:
  1072. # create the array of na_values
  1073. # 2d len(values) * len(qs)
  1074. result = np.repeat(np.array([self._na_value] * len(qs)),
  1075. len(values)).reshape(len(values),
  1076. len(qs))
  1077. else:
  1078. try:
  1079. result = _quantile(values, np.array(qs) * 100,
  1080. axis=axis, **kw)
  1081. except ValueError:
  1082. # older numpies don't handle an array for q
  1083. result = [_quantile(values, q * 100,
  1084. axis=axis, **kw) for q in qs]
  1085. result = np.array(result, copy=False)
  1086. if self.ndim > 1:
  1087. result = result.T
  1088. else:
  1089. if self.ndim == 1:
  1090. ax = Float64Index([qs])
  1091. else:
  1092. ax = mgr.axes[0]
  1093. if is_empty:
  1094. if self.ndim == 1:
  1095. result = self._na_value
  1096. else:
  1097. result = np.array([self._na_value] * len(self))
  1098. else:
  1099. result = _quantile(values, qs * 100, axis=axis, **kw)
  1100. ndim = getattr(result, 'ndim', None) or 0
  1101. result = self._try_coerce_result(result)
  1102. if is_scalar(result):
  1103. return ax, self.make_block_scalar(result)
  1104. return ax, make_block(result,
  1105. placement=np.arange(len(result)),
  1106. ndim=ndim)
  1107. class ScalarBlock(Block):
  1108. """
  1109. a scalar compat Block
  1110. """
  1111. __slots__ = ['_mgr_locs', 'values', 'ndim']
  1112. def __init__(self, values):
  1113. self.ndim = 0
  1114. self.mgr_locs = [0]
  1115. self.values = values
  1116. @property
  1117. def dtype(self):
  1118. return type(self.values)
  1119. @property
  1120. def shape(self):
  1121. return tuple([0])
  1122. def __len__(self):
  1123. return 0
  1124. class NonConsolidatableMixIn(object):
  1125. """ hold methods for the nonconsolidatable blocks """
  1126. _can_consolidate = False
  1127. _verify_integrity = False
  1128. _validate_ndim = False
  1129. _holder = None
  1130. def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
  1131. # Placement must be converted to BlockPlacement via property setter
  1132. # before ndim logic, because placement may be a slice which doesn't
  1133. # have a length.
  1134. self.mgr_locs = placement
  1135. # kludgetastic
  1136. if ndim is None:
  1137. if len(self.mgr_locs) != 1:
  1138. ndim = 1
  1139. else:

Large files files are truncated, but you can click here to view the full file