PageRenderTime 89ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/core/internals.py

http://github.com/wesm/pandas
Python | 5218 lines | 4262 code | 469 blank | 487 comment | 452 complexity | 791ba33868e7ff745da90d2b43722e0a MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import copy
  2. import itertools
  3. import re
  4. import operator
  5. from datetime import datetime, timedelta, date
  6. from collections import defaultdict
  7. import numpy as np
  8. from numpy import percentile as _quantile
  9. from pandas.core.base import PandasObject
  10. from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
  11. from pandas.types.common import (_TD_DTYPE, _NS_DTYPE,
  12. _ensure_int64, _ensure_platform_int,
  13. is_integer,
  14. is_dtype_equal,
  15. is_timedelta64_dtype,
  16. is_datetime64_dtype, is_datetimetz, is_sparse,
  17. is_categorical, is_categorical_dtype,
  18. is_integer_dtype,
  19. is_datetime64tz_dtype,
  20. is_object_dtype,
  21. is_datetimelike_v_numeric,
  22. is_numeric_v_string_like, is_extension_type,
  23. is_list_like,
  24. is_re,
  25. is_re_compilable,
  26. is_scalar,
  27. _get_dtype)
  28. from pandas.types.cast import (_possibly_downcast_to_dtype,
  29. _maybe_convert_string_to_object,
  30. _maybe_upcast,
  31. _maybe_convert_scalar, _maybe_promote,
  32. _infer_dtype_from_scalar,
  33. _soft_convert_objects,
  34. _possibly_convert_objects,
  35. _astype_nansafe,
  36. _find_common_type)
  37. from pandas.types.missing import (isnull, array_equivalent,
  38. _is_na_compat,
  39. is_null_datelike_scalar)
  40. import pandas.types.concat as _concat
  41. from pandas.types.generic import ABCSeries
  42. from pandas.core.common import is_null_slice
  43. import pandas.core.algorithms as algos
  44. from pandas.core.index import Index, MultiIndex, _ensure_index
  45. from pandas.core.indexing import maybe_convert_indices, length_of_indexer
  46. from pandas.core.categorical import Categorical, maybe_to_categorical
  47. from pandas.tseries.index import DatetimeIndex
  48. from pandas.formats.printing import pprint_thing
  49. import pandas.core.missing as missing
  50. from pandas.sparse.array import _maybe_to_sparse, SparseArray
  51. import pandas.lib as lib
  52. import pandas.tslib as tslib
  53. import pandas.computation.expressions as expressions
  54. from pandas.util.decorators import cache_readonly
  55. from pandas.tslib import Timedelta
  56. from pandas import compat, _np_version_under1p9
  57. from pandas.compat import range, map, zip, u
  58. from pandas.lib import BlockPlacement
  59. class Block(PandasObject):
  60. """
  61. Canonical n-dimensional unit of homogeneous dtype contained in a pandas
  62. data structure
  63. Index-ignorant; let the container take care of that
  64. """
  65. __slots__ = ['_mgr_locs', 'values', 'ndim']
  66. is_numeric = False
  67. is_float = False
  68. is_integer = False
  69. is_complex = False
  70. is_datetime = False
  71. is_datetimetz = False
  72. is_timedelta = False
  73. is_bool = False
  74. is_object = False
  75. is_categorical = False
  76. is_sparse = False
  77. _box_to_block_values = True
  78. _can_hold_na = False
  79. _downcast_dtype = None
  80. _can_consolidate = True
  81. _verify_integrity = True
  82. _validate_ndim = True
  83. _ftype = 'dense'
  84. _holder = None
  85. def __init__(self, values, placement, ndim=None, fastpath=False):
  86. if ndim is None:
  87. ndim = values.ndim
  88. elif values.ndim != ndim:
  89. raise ValueError('Wrong number of dimensions')
  90. self.ndim = ndim
  91. self.mgr_locs = placement
  92. self.values = values
  93. if ndim and len(self.mgr_locs) != len(self.values):
  94. raise ValueError('Wrong number of items passed %d, placement '
  95. 'implies %d' % (len(self.values),
  96. len(self.mgr_locs)))
  97. @property
  98. def _consolidate_key(self):
  99. return (self._can_consolidate, self.dtype.name)
  100. @property
  101. def _is_single_block(self):
  102. return self.ndim == 1
  103. @property
  104. def is_view(self):
  105. """ return a boolean if I am possibly a view """
  106. return self.values.base is not None
  107. @property
  108. def is_datelike(self):
  109. """ return True if I am a non-datelike """
  110. return self.is_datetime or self.is_timedelta
  111. def is_categorical_astype(self, dtype):
  112. """
  113. validate that we have a astypeable to categorical,
  114. returns a boolean if we are a categorical
  115. """
  116. if is_categorical_dtype(dtype):
  117. if dtype == CategoricalDtype():
  118. return True
  119. # this is a pd.Categorical, but is not
  120. # a valid type for astypeing
  121. raise TypeError("invalid type {0} for astype".format(dtype))
  122. return False
  123. def external_values(self, dtype=None):
  124. """ return an outside world format, currently just the ndarray """
  125. return self.values
  126. def internal_values(self, dtype=None):
  127. """ return an internal format, currently just the ndarray
  128. this should be the pure internal API format
  129. """
  130. return self.values
  131. def get_values(self, dtype=None):
  132. """
  133. return an internal format, currently just the ndarray
  134. this is often overriden to handle to_dense like operations
  135. """
  136. if is_object_dtype(dtype):
  137. return self.values.astype(object)
  138. return self.values
  139. def to_dense(self):
  140. return self.values.view()
  141. def to_object_block(self, mgr):
  142. """ return myself as an object block """
  143. values = self.get_values(dtype=object)
  144. return self.make_block(values, klass=ObjectBlock)
  145. @property
  146. def _na_value(self):
  147. return np.nan
  148. @property
  149. def fill_value(self):
  150. return np.nan
  151. @property
  152. def mgr_locs(self):
  153. return self._mgr_locs
  154. @property
  155. def array_dtype(self):
  156. """ the dtype to return if I want to construct this block as an
  157. array
  158. """
  159. return self.dtype
  160. def make_block(self, values, placement=None, ndim=None, **kwargs):
  161. """
  162. Create a new block, with type inference propagate any values that are
  163. not specified
  164. """
  165. if placement is None:
  166. placement = self.mgr_locs
  167. if ndim is None:
  168. ndim = self.ndim
  169. return make_block(values, placement=placement, ndim=ndim, **kwargs)
  170. def make_block_scalar(self, values, **kwargs):
  171. """
  172. Create a ScalarBlock
  173. """
  174. return ScalarBlock(values)
  175. def make_block_same_class(self, values, placement=None, fastpath=True,
  176. **kwargs):
  177. """ Wrap given values in a block of same type as self. """
  178. if placement is None:
  179. placement = self.mgr_locs
  180. return make_block(values, placement=placement, klass=self.__class__,
  181. fastpath=fastpath, **kwargs)
  182. @mgr_locs.setter
  183. def mgr_locs(self, new_mgr_locs):
  184. if not isinstance(new_mgr_locs, BlockPlacement):
  185. new_mgr_locs = BlockPlacement(new_mgr_locs)
  186. self._mgr_locs = new_mgr_locs
  187. def __unicode__(self):
  188. # don't want to print out all of the items here
  189. name = pprint_thing(self.__class__.__name__)
  190. if self._is_single_block:
  191. result = '%s: %s dtype: %s' % (name, len(self), self.dtype)
  192. else:
  193. shape = ' x '.join([pprint_thing(s) for s in self.shape])
  194. result = '%s: %s, %s, dtype: %s' % (name, pprint_thing(
  195. self.mgr_locs.indexer), shape, self.dtype)
  196. return result
  197. def __len__(self):
  198. return len(self.values)
  199. def __getstate__(self):
  200. return self.mgr_locs.indexer, self.values
  201. def __setstate__(self, state):
  202. self.mgr_locs = BlockPlacement(state[0])
  203. self.values = state[1]
  204. self.ndim = self.values.ndim
  205. def _slice(self, slicer):
  206. """ return a slice of my values """
  207. return self.values[slicer]
  208. def reshape_nd(self, labels, shape, ref_items, mgr=None):
  209. """
  210. Parameters
  211. ----------
  212. labels : list of new axis labels
  213. shape : new shape
  214. ref_items : new ref_items
  215. return a new block that is transformed to a nd block
  216. """
  217. return _block2d_to_blocknd(values=self.get_values().T,
  218. placement=self.mgr_locs, shape=shape,
  219. labels=labels, ref_items=ref_items)
  220. def getitem_block(self, slicer, new_mgr_locs=None):
  221. """
  222. Perform __getitem__-like, return result as block.
  223. As of now, only supports slices that preserve dimensionality.
  224. """
  225. if new_mgr_locs is None:
  226. if isinstance(slicer, tuple):
  227. axis0_slicer = slicer[0]
  228. else:
  229. axis0_slicer = slicer
  230. new_mgr_locs = self.mgr_locs[axis0_slicer]
  231. new_values = self._slice(slicer)
  232. if self._validate_ndim and new_values.ndim != self.ndim:
  233. raise ValueError("Only same dim slicing is allowed")
  234. return self.make_block_same_class(new_values, new_mgr_locs)
  235. @property
  236. def shape(self):
  237. return self.values.shape
  238. @property
  239. def itemsize(self):
  240. return self.values.itemsize
  241. @property
  242. def dtype(self):
  243. return self.values.dtype
  244. @property
  245. def ftype(self):
  246. return "%s:%s" % (self.dtype, self._ftype)
  247. def merge(self, other):
  248. return _merge_blocks([self, other])
  249. def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
  250. limit=None, mask_info=None):
  251. """
  252. Reindex using pre-computed indexer information
  253. """
  254. if axis < 1:
  255. raise AssertionError('axis must be at least 1, got %d' % axis)
  256. if fill_value is None:
  257. fill_value = self.fill_value
  258. new_values = algos.take_nd(self.values, indexer, axis,
  259. fill_value=fill_value, mask_info=mask_info)
  260. return self.make_block(new_values, fastpath=True)
  261. def get(self, item):
  262. loc = self.items.get_loc(item)
  263. return self.values[loc]
  264. def iget(self, i):
  265. return self.values[i]
  266. def set(self, locs, values, check=False):
  267. """
  268. Modify Block in-place with new item value
  269. Returns
  270. -------
  271. None
  272. """
  273. self.values[locs] = values
  274. def delete(self, loc):
  275. """
  276. Delete given loc(-s) from block in-place.
  277. """
  278. self.values = np.delete(self.values, loc, 0)
  279. self.mgr_locs = self.mgr_locs.delete(loc)
  280. def apply(self, func, mgr=None, **kwargs):
  281. """ apply the function to my values; return a block if we are not
  282. one
  283. """
  284. result = func(self.values, **kwargs)
  285. if not isinstance(result, Block):
  286. result = self.make_block(values=_block_shape(result,
  287. ndim=self.ndim))
  288. return result
  289. def fillna(self, value, limit=None, inplace=False, downcast=None,
  290. mgr=None):
  291. """ fillna on the block with the value. If we fail, then convert to
  292. ObjectBlock and try again
  293. """
  294. if not self._can_hold_na:
  295. if inplace:
  296. return self
  297. else:
  298. return self.copy()
  299. original_value = value
  300. mask = isnull(self.values)
  301. if limit is not None:
  302. if self.ndim > 2:
  303. raise NotImplementedError("number of dimensions for 'fillna' "
  304. "is currently limited to 2")
  305. mask[mask.cumsum(self.ndim - 1) > limit] = False
  306. # fillna, but if we cannot coerce, then try again as an ObjectBlock
  307. try:
  308. values, _, value, _ = self._try_coerce_args(self.values, value)
  309. blocks = self.putmask(mask, value, inplace=inplace)
  310. blocks = [b.make_block(values=self._try_coerce_result(b.values))
  311. for b in blocks]
  312. return self._maybe_downcast(blocks, downcast)
  313. except (TypeError, ValueError):
  314. # we can't process the value, but nothing to do
  315. if not mask.any():
  316. return self if inplace else self.copy()
  317. # we cannot coerce the underlying object, so
  318. # make an ObjectBlock
  319. return self.to_object_block(mgr=mgr).fillna(original_value,
  320. limit=limit,
  321. inplace=inplace,
  322. downcast=False)
  323. def _maybe_downcast(self, blocks, downcast=None):
  324. # no need to downcast our float
  325. # unless indicated
  326. if downcast is None and self.is_float:
  327. return blocks
  328. elif downcast is None and (self.is_timedelta or self.is_datetime):
  329. return blocks
  330. return _extend_blocks([b.downcast(downcast) for b in blocks])
  331. def downcast(self, dtypes=None, mgr=None):
  332. """ try to downcast each item to the dict of dtypes if present """
  333. # turn it off completely
  334. if dtypes is False:
  335. return self
  336. values = self.values
  337. # single block handling
  338. if self._is_single_block:
  339. # try to cast all non-floats here
  340. if dtypes is None:
  341. dtypes = 'infer'
  342. nv = _possibly_downcast_to_dtype(values, dtypes)
  343. return self.make_block(nv, fastpath=True)
  344. # ndim > 1
  345. if dtypes is None:
  346. return self
  347. if not (dtypes == 'infer' or isinstance(dtypes, dict)):
  348. raise ValueError("downcast must have a dictionary or 'infer' as "
  349. "its argument")
  350. # item-by-item
  351. # this is expensive as it splits the blocks items-by-item
  352. blocks = []
  353. for i, rl in enumerate(self.mgr_locs):
  354. if dtypes == 'infer':
  355. dtype = 'infer'
  356. else:
  357. raise AssertionError("dtypes as dict is not supported yet")
  358. # TODO: This either should be completed or removed
  359. dtype = dtypes.get(item, self._downcast_dtype) # noqa
  360. if dtype is None:
  361. nv = _block_shape(values[i], ndim=self.ndim)
  362. else:
  363. nv = _possibly_downcast_to_dtype(values[i], dtype)
  364. nv = _block_shape(nv, ndim=self.ndim)
  365. blocks.append(self.make_block(nv, fastpath=True, placement=[rl]))
  366. return blocks
  367. def astype(self, dtype, copy=False, raise_on_error=True, values=None,
  368. **kwargs):
  369. return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
  370. values=values, **kwargs)
  371. def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
  372. klass=None, mgr=None, **kwargs):
  373. """
  374. Coerce to the new type (if copy=True, return a new copy)
  375. raise on an except if raise == True
  376. """
  377. # may need to convert to categorical
  378. # this is only called for non-categoricals
  379. if self.is_categorical_astype(dtype):
  380. return self.make_block(Categorical(self.values, **kwargs))
  381. # astype processing
  382. dtype = np.dtype(dtype)
  383. if self.dtype == dtype:
  384. if copy:
  385. return self.copy()
  386. return self
  387. if klass is None:
  388. if dtype == np.object_:
  389. klass = ObjectBlock
  390. try:
  391. # force the copy here
  392. if values is None:
  393. if issubclass(dtype.type,
  394. (compat.text_type, compat.string_types)):
  395. # use native type formatting for datetime/tz/timedelta
  396. if self.is_datelike:
  397. values = self.to_native_types()
  398. # astype formatting
  399. else:
  400. values = self.values
  401. else:
  402. values = self.get_values(dtype=dtype)
  403. # _astype_nansafe works fine with 1-d only
  404. values = _astype_nansafe(values.ravel(), dtype, copy=True)
  405. values = values.reshape(self.shape)
  406. newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
  407. klass=klass)
  408. except:
  409. if raise_on_error is True:
  410. raise
  411. newb = self.copy() if copy else self
  412. if newb.is_numeric and self.is_numeric:
  413. if newb.shape != self.shape:
  414. raise TypeError("cannot set astype for copy = [%s] for dtype "
  415. "(%s [%s]) with smaller itemsize that current "
  416. "(%s [%s])" % (copy, self.dtype.name,
  417. self.itemsize, newb.dtype.name,
  418. newb.itemsize))
  419. return newb
  420. def convert(self, copy=True, **kwargs):
  421. """ attempt to coerce any object types to better types return a copy
  422. of the block (if copy = True) by definition we are not an ObjectBlock
  423. here!
  424. """
  425. return self.copy() if copy else self
  426. def _can_hold_element(self, value):
  427. raise NotImplementedError()
  428. def _try_cast(self, value):
  429. raise NotImplementedError()
  430. def _try_cast_result(self, result, dtype=None):
  431. """ try to cast the result to our original type, we may have
  432. roundtripped thru object in the mean-time
  433. """
  434. if dtype is None:
  435. dtype = self.dtype
  436. if self.is_integer or self.is_bool or self.is_datetime:
  437. pass
  438. elif self.is_float and result.dtype == self.dtype:
  439. # protect against a bool/object showing up here
  440. if isinstance(dtype, compat.string_types) and dtype == 'infer':
  441. return result
  442. if not isinstance(dtype, type):
  443. dtype = dtype.type
  444. if issubclass(dtype, (np.bool_, np.object_)):
  445. if issubclass(dtype, np.bool_):
  446. if isnull(result).all():
  447. return result.astype(np.bool_)
  448. else:
  449. result = result.astype(np.object_)
  450. result[result == 1] = True
  451. result[result == 0] = False
  452. return result
  453. else:
  454. return result.astype(np.object_)
  455. return result
  456. # may need to change the dtype here
  457. return _possibly_downcast_to_dtype(result, dtype)
  458. def _try_operate(self, values):
  459. """ return a version to operate on as the input """
  460. return values
  461. def _try_coerce_args(self, values, other):
  462. """ provide coercion to our input arguments """
  463. return values, False, other, False
  464. def _try_coerce_result(self, result):
  465. """ reverse of try_coerce_args """
  466. return result
  467. def _try_coerce_and_cast_result(self, result, dtype=None):
  468. result = self._try_coerce_result(result)
  469. result = self._try_cast_result(result, dtype=dtype)
  470. return result
  471. def _try_fill(self, value):
  472. return value
  473. def to_native_types(self, slicer=None, na_rep='nan', quoting=None,
  474. **kwargs):
  475. """ convert to our native types format, slicing if desired """
  476. values = self.values
  477. if slicer is not None:
  478. values = values[:, slicer]
  479. mask = isnull(values)
  480. if not self.is_object and not quoting:
  481. values = values.astype(str)
  482. else:
  483. values = np.array(values, dtype='object')
  484. values[mask] = na_rep
  485. return values
  486. # block actions ####
  487. def copy(self, deep=True, mgr=None):
  488. """ copy constructor """
  489. values = self.values
  490. if deep:
  491. values = values.copy()
  492. return self.make_block_same_class(values)
  493. def replace(self, to_replace, value, inplace=False, filter=None,
  494. regex=False, convert=True, mgr=None):
  495. """ replace the to_replace value with value, possible to create new
  496. blocks here this is just a call to putmask. regex is not used here.
  497. It is used in ObjectBlocks. It is here for API
  498. compatibility.
  499. """
  500. original_to_replace = to_replace
  501. mask = isnull(self.values)
  502. # try to replace, if we raise an error, convert to ObjectBlock and
  503. # retry
  504. try:
  505. values, _, to_replace, _ = self._try_coerce_args(self.values,
  506. to_replace)
  507. mask = missing.mask_missing(values, to_replace)
  508. if filter is not None:
  509. filtered_out = ~self.mgr_locs.isin(filter)
  510. mask[filtered_out.nonzero()[0]] = False
  511. blocks = self.putmask(mask, value, inplace=inplace)
  512. if convert:
  513. blocks = [b.convert(by_item=True, numeric=False,
  514. copy=not inplace) for b in blocks]
  515. return blocks
  516. except (TypeError, ValueError):
  517. # we can't process the value, but nothing to do
  518. if not mask.any():
  519. return self if inplace else self.copy()
  520. return self.to_object_block(mgr=mgr).replace(
  521. to_replace=original_to_replace, value=value, inplace=inplace,
  522. filter=filter, regex=regex, convert=convert)
  523. def _replace_single(self, *args, **kwargs):
  524. """ no-op on a non-ObjectBlock """
  525. return self if kwargs['inplace'] else self.copy()
  526. def setitem(self, indexer, value, mgr=None):
  527. """ set the value inplace; return a new block (of a possibly different
  528. dtype)
  529. indexer is a direct slice/positional indexer; value must be a
  530. compatible shape
  531. """
  532. # coerce None values, if appropriate
  533. if value is None:
  534. if self.is_numeric:
  535. value = np.nan
  536. # coerce args
  537. values, _, value, _ = self._try_coerce_args(self.values, value)
  538. arr_value = np.array(value)
  539. # cast the values to a type that can hold nan (if necessary)
  540. if not self._can_hold_element(value):
  541. dtype, _ = _maybe_promote(arr_value.dtype)
  542. values = values.astype(dtype)
  543. transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
  544. values = transf(values)
  545. l = len(values)
  546. # length checking
  547. # boolean with truth values == len of the value is ok too
  548. if isinstance(indexer, (np.ndarray, list)):
  549. if is_list_like(value) and len(indexer) != len(value):
  550. if not (isinstance(indexer, np.ndarray) and
  551. indexer.dtype == np.bool_ and
  552. len(indexer[indexer]) == len(value)):
  553. raise ValueError("cannot set using a list-like indexer "
  554. "with a different length than the value")
  555. # slice
  556. elif isinstance(indexer, slice):
  557. if is_list_like(value) and l:
  558. if len(value) != length_of_indexer(indexer, values):
  559. raise ValueError("cannot set using a slice indexer with a "
  560. "different length than the value")
  561. try:
  562. def _is_scalar_indexer(indexer):
  563. # return True if we are all scalar indexers
  564. if arr_value.ndim == 1:
  565. if not isinstance(indexer, tuple):
  566. indexer = tuple([indexer])
  567. return all([is_scalar(idx) for idx in indexer])
  568. return False
  569. def _is_empty_indexer(indexer):
  570. # return a boolean if we have an empty indexer
  571. if arr_value.ndim == 1:
  572. if not isinstance(indexer, tuple):
  573. indexer = tuple([indexer])
  574. return any(isinstance(idx, np.ndarray) and len(idx) == 0
  575. for idx in indexer)
  576. return False
  577. # empty indexers
  578. # 8669 (empty)
  579. if _is_empty_indexer(indexer):
  580. pass
  581. # setting a single element for each dim and with a rhs that could
  582. # be say a list
  583. # GH 6043
  584. elif _is_scalar_indexer(indexer):
  585. values[indexer] = value
  586. # if we are an exact match (ex-broadcasting),
  587. # then use the resultant dtype
  588. elif (len(arr_value.shape) and
  589. arr_value.shape[0] == values.shape[0] and
  590. np.prod(arr_value.shape) == np.prod(values.shape)):
  591. values[indexer] = value
  592. values = values.astype(arr_value.dtype)
  593. # set
  594. else:
  595. values[indexer] = value
  596. # coerce and try to infer the dtypes of the result
  597. if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
  598. value.dtype):
  599. dtype = value.dtype
  600. elif is_scalar(value):
  601. dtype, _ = _infer_dtype_from_scalar(value)
  602. else:
  603. dtype = 'infer'
  604. values = self._try_coerce_and_cast_result(values, dtype)
  605. block = self.make_block(transf(values), fastpath=True)
  606. # may have to soft convert_objects here
  607. if block.is_object and not self.is_object:
  608. block = block.convert(numeric=False)
  609. return block
  610. except ValueError:
  611. raise
  612. except TypeError:
  613. # cast to the passed dtype if possible
  614. # otherwise raise the original error
  615. try:
  616. # e.g. we are uint32 and our value is uint64
  617. # this is for compat with older numpies
  618. block = self.make_block(transf(values.astype(value.dtype)))
  619. return block.setitem(indexer=indexer, value=value, mgr=mgr)
  620. except:
  621. pass
  622. raise
  623. except Exception:
  624. pass
  625. return [self]
  626. def putmask(self, mask, new, align=True, inplace=False, axis=0,
  627. transpose=False, mgr=None):
  628. """ putmask the data to the block; it is possible that we may create a
  629. new dtype of block
  630. return the resulting block(s)
  631. Parameters
  632. ----------
  633. mask : the condition to respect
  634. new : a ndarray/object
  635. align : boolean, perform alignment on other/cond, default is True
  636. inplace : perform inplace modification, default is False
  637. axis : int
  638. transpose : boolean
  639. Set to True if self is stored with axes reversed
  640. Returns
  641. -------
  642. a list of new blocks, the result of the putmask
  643. """
  644. new_values = self.values if inplace else self.values.copy()
  645. if hasattr(new, 'reindex_axis'):
  646. new = new.values
  647. if hasattr(mask, 'reindex_axis'):
  648. mask = mask.values
  649. # if we are passed a scalar None, convert it here
  650. if not is_list_like(new) and isnull(new) and not self.is_object:
  651. new = self.fill_value
  652. if self._can_hold_element(new):
  653. if transpose:
  654. new_values = new_values.T
  655. new = self._try_cast(new)
  656. # If the default repeat behavior in np.putmask would go in the
  657. # wrong direction, then explictly repeat and reshape new instead
  658. if getattr(new, 'ndim', 0) >= 1:
  659. if self.ndim - 1 == new.ndim and axis == 1:
  660. new = np.repeat(
  661. new, new_values.shape[-1]).reshape(self.shape)
  662. new = new.astype(new_values.dtype)
  663. np.putmask(new_values, mask, new)
  664. # maybe upcast me
  665. elif mask.any():
  666. if transpose:
  667. mask = mask.T
  668. if isinstance(new, np.ndarray):
  669. new = new.T
  670. axis = new_values.ndim - axis - 1
  671. # Pseudo-broadcast
  672. if getattr(new, 'ndim', 0) >= 1:
  673. if self.ndim - 1 == new.ndim:
  674. new_shape = list(new.shape)
  675. new_shape.insert(axis, 1)
  676. new = new.reshape(tuple(new_shape))
  677. # need to go column by column
  678. new_blocks = []
  679. if self.ndim > 1:
  680. for i, ref_loc in enumerate(self.mgr_locs):
  681. m = mask[i]
  682. v = new_values[i]
  683. # need a new block
  684. if m.any():
  685. if isinstance(new, np.ndarray):
  686. n = np.squeeze(new[i % new.shape[0]])
  687. else:
  688. n = np.array(new)
  689. # type of the new block
  690. dtype, _ = _maybe_promote(n.dtype)
  691. # we need to explicitly astype here to make a copy
  692. n = n.astype(dtype)
  693. nv = _putmask_smart(v, m, n)
  694. else:
  695. nv = v if inplace else v.copy()
  696. # Put back the dimension that was taken from it and make
  697. # a block out of the result.
  698. block = self.make_block(values=nv[np.newaxis],
  699. placement=[ref_loc], fastpath=True)
  700. new_blocks.append(block)
  701. else:
  702. nv = _putmask_smart(new_values, mask, new)
  703. new_blocks.append(self.make_block(values=nv, fastpath=True))
  704. return new_blocks
  705. if inplace:
  706. return [self]
  707. if transpose:
  708. new_values = new_values.T
  709. return [self.make_block(new_values, fastpath=True)]
  710. def interpolate(self, method='pad', axis=0, index=None, values=None,
  711. inplace=False, limit=None, limit_direction='forward',
  712. fill_value=None, coerce=False, downcast=None, mgr=None,
  713. **kwargs):
  714. def check_int_bool(self, inplace):
  715. # Only FloatBlocks will contain NaNs.
  716. # timedelta subclasses IntBlock
  717. if (self.is_bool or self.is_integer) and not self.is_timedelta:
  718. if inplace:
  719. return self
  720. else:
  721. return self.copy()
  722. # a fill na type method
  723. try:
  724. m = missing.clean_fill_method(method)
  725. except:
  726. m = None
  727. if m is not None:
  728. r = check_int_bool(self, inplace)
  729. if r is not None:
  730. return r
  731. return self._interpolate_with_fill(method=m, axis=axis,
  732. inplace=inplace, limit=limit,
  733. fill_value=fill_value,
  734. coerce=coerce,
  735. downcast=downcast, mgr=mgr)
  736. # try an interp method
  737. try:
  738. m = missing.clean_interp_method(method, **kwargs)
  739. except:
  740. m = None
  741. if m is not None:
  742. r = check_int_bool(self, inplace)
  743. if r is not None:
  744. return r
  745. return self._interpolate(method=m, index=index, values=values,
  746. axis=axis, limit=limit,
  747. limit_direction=limit_direction,
  748. fill_value=fill_value, inplace=inplace,
  749. downcast=downcast, mgr=mgr, **kwargs)
  750. raise ValueError("invalid method '{0}' to interpolate.".format(method))
  751. def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
  752. limit=None, fill_value=None, coerce=False,
  753. downcast=None, mgr=None):
  754. """ fillna but using the interpolate machinery """
  755. # if we are coercing, then don't force the conversion
  756. # if the block can't hold the type
  757. if coerce:
  758. if not self._can_hold_na:
  759. if inplace:
  760. return [self]
  761. else:
  762. return [self.copy()]
  763. values = self.values if inplace else self.values.copy()
  764. values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
  765. values = self._try_operate(values)
  766. values = missing.interpolate_2d(values, method=method, axis=axis,
  767. limit=limit, fill_value=fill_value,
  768. dtype=self.dtype)
  769. values = self._try_coerce_result(values)
  770. blocks = [self.make_block(values, klass=self.__class__, fastpath=True)]
  771. return self._maybe_downcast(blocks, downcast)
  772. def _interpolate(self, method=None, index=None, values=None,
  773. fill_value=None, axis=0, limit=None,
  774. limit_direction='forward', inplace=False, downcast=None,
  775. mgr=None, **kwargs):
  776. """ interpolate using scipy wrappers """
  777. data = self.values if inplace else self.values.copy()
  778. # only deal with floats
  779. if not self.is_float:
  780. if not self.is_integer:
  781. return self
  782. data = data.astype(np.float64)
  783. if fill_value is None:
  784. fill_value = self.fill_value
  785. if method in ('krogh', 'piecewise_polynomial', 'pchip'):
  786. if not index.is_monotonic:
  787. raise ValueError("{0} interpolation requires that the "
  788. "index be monotonic.".format(method))
  789. # process 1-d slices in the axis direction
  790. def func(x):
  791. # process a 1-d slice, returning it
  792. # should the axis argument be handled below in apply_along_axis?
  793. # i.e. not an arg to missing.interpolate_1d
  794. return missing.interpolate_1d(index, x, method=method, limit=limit,
  795. limit_direction=limit_direction,
  796. fill_value=fill_value,
  797. bounds_error=False, **kwargs)
  798. # interp each column independently
  799. interp_values = np.apply_along_axis(func, axis, data)
  800. blocks = [self.make_block(interp_values, klass=self.__class__,
  801. fastpath=True)]
  802. return self._maybe_downcast(blocks, downcast)
  803. def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
  804. """
  805. Take values according to indexer and return them as a block.bb
  806. """
  807. # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
  808. # so need to preserve types
  809. # sparse is treated like an ndarray, but needs .get_values() shaping
  810. values = self.values
  811. if self.is_sparse:
  812. values = self.get_values()
  813. if fill_tuple is None:
  814. fill_value = self.fill_value
  815. new_values = algos.take_nd(values, indexer, axis=axis,
  816. allow_fill=False)
  817. else:
  818. fill_value = fill_tuple[0]
  819. new_values = algos.take_nd(values, indexer, axis=axis,
  820. allow_fill=True, fill_value=fill_value)
  821. if new_mgr_locs is None:
  822. if axis == 0:
  823. slc = lib.indexer_as_slice(indexer)
  824. if slc is not None:
  825. new_mgr_locs = self.mgr_locs[slc]
  826. else:
  827. new_mgr_locs = self.mgr_locs[indexer]
  828. else:
  829. new_mgr_locs = self.mgr_locs
  830. if not is_dtype_equal(new_values.dtype, self.dtype):
  831. return self.make_block(new_values, new_mgr_locs)
  832. else:
  833. return self.make_block_same_class(new_values, new_mgr_locs)
  834. def diff(self, n, axis=1, mgr=None):
  835. """ return block for the diff of the values """
  836. new_values = algos.diff(self.values, n, axis=axis)
  837. return [self.make_block(values=new_values, fastpath=True)]
  838. def shift(self, periods, axis=0, mgr=None):
  839. """ shift the block by periods, possibly upcast """
  840. # convert integer to float if necessary. need to do a lot more than
  841. # that, handle boolean etc also
  842. new_values, fill_value = _maybe_upcast(self.values)
  843. # make sure array sent to np.roll is c_contiguous
  844. f_ordered = new_values.flags.f_contiguous
  845. if f_ordered:
  846. new_values = new_values.T
  847. axis = new_values.ndim - axis - 1
  848. if np.prod(new_values.shape):
  849. new_values = np.roll(new_values, _ensure_platform_int(periods),
  850. axis=axis)
  851. axis_indexer = [slice(None)] * self.ndim
  852. if periods > 0:
  853. axis_indexer[axis] = slice(None, periods)
  854. else:
  855. axis_indexer[axis] = slice(periods, None)
  856. new_values[tuple(axis_indexer)] = fill_value
  857. # restore original order
  858. if f_ordered:
  859. new_values = new_values.T
  860. return [self.make_block(new_values, fastpath=True)]
  861. def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None):
  862. """
  863. evaluate the block; return result block from the result
  864. Parameters
  865. ----------
  866. func : how to combine self, other
  867. other : a ndarray/object
  868. raise_on_error : if True, raise when I can't perform the function,
  869. False by default (and just return the data that we had coming in)
  870. try_cast : try casting the results to the input type
  871. Returns
  872. -------
  873. a new block, the result of the func
  874. """
  875. values = self.values
  876. if hasattr(other, 'reindex_axis'):
  877. other = other.values
  878. # make sure that we can broadcast
  879. is_transposed = False
  880. if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
  881. if values.ndim != other.ndim:
  882. is_transposed = True
  883. else:
  884. if values.shape == other.shape[::-1]:
  885. is_transposed = True
  886. elif values.shape[0] == other.shape[-1]:
  887. is_transposed = True
  888. else:
  889. # this is a broadcast error heree
  890. raise ValueError("cannot broadcast shape [%s] with block "
  891. "values [%s]" % (values.T.shape,
  892. other.shape))
  893. transf = (lambda x: x.T) if is_transposed else (lambda x: x)
  894. # coerce/transpose the args if needed
  895. values, values_mask, other, other_mask = self._try_coerce_args(
  896. transf(values), other)
  897. # get the result, may need to transpose the other
  898. def get_result(other):
  899. # avoid numpy warning of comparisons again None
  900. if other is None:
  901. result = not func.__name__ == 'eq'
  902. # avoid numpy warning of elementwise comparisons to object
  903. elif is_numeric_v_string_like(values, other):
  904. result = False
  905. else:
  906. result = func(values, other)
  907. # mask if needed
  908. if isinstance(values_mask, np.ndarray) and values_mask.any():
  909. result = result.astype('float64', copy=False)
  910. result[values_mask] = np.nan
  911. if other_mask is True:
  912. result = result.astype('float64', copy=False)
  913. result[:] = np.nan
  914. elif isinstance(other_mask, np.ndarray) and other_mask.any():
  915. result = result.astype('float64', copy=False)
  916. result[other_mask.ravel()] = np.nan
  917. return self._try_coerce_result(result)
  918. # error handler if we have an issue operating with the function
  919. def handle_error():
  920. if raise_on_error:
  921. raise TypeError('Could not operate %s with block values %s' %
  922. (repr(other), str(detail)))
  923. else:
  924. # return the values
  925. result = np.empty(values.shape, dtype='O')
  926. result.fill(np.nan)
  927. return result
  928. # get the result
  929. try:
  930. result = get_result(other)
  931. # if we have an invalid shape/broadcast error
  932. # GH4576, so raise instead of allowing to pass through
  933. except ValueError as detail:
  934. raise
  935. except Exception as detail:
  936. result = handle_error()
  937. # technically a broadcast error in numpy can 'work' by returning a
  938. # boolean False
  939. if not isinstance(result, np.ndarray):
  940. if not isinstance(result, np.ndarray):
  941. # differentiate between an invalid ndarray-ndarray comparison
  942. # and an invalid type comparison
  943. if isinstance(values, np.ndarray) and is_list_like(other):
  944. raise ValueError('Invalid broadcasting comparison [%s] '
  945. 'with block values' % repr(other))
  946. raise TypeError('Could not compare [%s] with block values' %
  947. repr(other))
  948. # transpose if needed
  949. result = transf(result)
  950. # try to cast if requested
  951. if try_cast:
  952. result = self._try_cast_result(result)
  953. return [self.make_block(result, fastpath=True, )]
  954. def where(self, other, cond, align=True, raise_on_error=True,
  955. try_cast=False, axis=0, transpose=False, mgr=None):
  956. """
  957. evaluate the block; return result block(s) from the result
  958. Parameters
  959. ----------
  960. other : a ndarray/object
  961. cond : the condition to respect
  962. align : boolean, perform alignment on other/cond
  963. raise_on_error : if True, raise when I can't perform the function,
  964. False by default (and just return the data that we had coming in)
  965. axis : int
  966. transpose : boolean
  967. Set to True if self is stored with axes reversed
  968. Returns
  969. -------
  970. a new block(s), the result of the func
  971. """
  972. values = self.values
  973. if transpose:
  974. values = values.T
  975. if hasattr(other, 'reindex_axis'):
  976. other = other.values
  977. if hasattr(cond, 'reindex_axis'):
  978. cond = cond.values
  979. # If the default broadcasting would go in the wrong direction, then
  980. # explictly reshape other instead
  981. if getattr(other, 'ndim', 0) >= 1:
  982. if values.ndim - 1 == other.ndim and axis == 1:
  983. other = other.reshape(tuple(other.shape + (1, )))
  984. if not hasattr(cond, 'shape'):
  985. raise ValueError("where must have a condition that is ndarray "
  986. "like")
  987. other = _maybe_convert_string_to_object(other)
  988. other = _maybe_convert_scalar(other)
  989. # our where function
  990. def func(cond, values, other):
  991. if cond.ravel().all():
  992. return values
  993. values, values_mask, other, other_mask = self._try_coerce_args(
  994. values, other)
  995. try:
  996. return self._try_coerce_result(expressions.where(
  997. cond, values, other, raise_on_error=True))
  998. except Exception as detail:
  999. if raise_on_error:
  1000. raise TypeError('Could not operate [%s] with block values '
  1001. '[%s]' % (repr(other), str(detail)))
  1002. else:
  1003. # return the values
  1004. result = np.empty(values.shape, dtype='float64')
  1005. result.fill(np.nan)
  1006. return result
  1007. # see if we can operate on the entire block, or need item-by-item
  1008. # or if we are a single block (ndim == 1)
  1009. result = func(cond, values, other)
  1010. if self._can_hold_na or self.ndim == 1:
  1011. if transpose:
  1012. result = result.T
  1013. # try to cast if requested
  1014. if try_cast:
  1015. result = self._try_cast_result(result)
  1016. return self.make_block(result)
  1017. # might need to separate out blocks
  1018. axis = cond.ndim - 1
  1019. cond = cond.swapaxes(axis, 0)
  1020. mask = np.array([cond[i].all() for i in range(cond.shape[0])],
  1021. dtype=bool)
  1022. result_blocks = []
  1023. for m in [mask, ~mask]:
  1024. if m.any():
  1025. r = self._try_cast_result(result.take(m.nonzero()[0],
  1026. axis=axis))
  1027. result_blocks.append(
  1028. self.make_block(r.T, placement=self.mgr_locs[m]))
  1029. return result_blocks
  1030. def equals(self, other):
  1031. if self.dtype != other.dtype or self.shape != other.shape:
  1032. return False
  1033. return array_equivalent(self.values, other.values)
  1034. def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
  1035. """
  1036. compute the quantiles of the
  1037. Parameters
  1038. ----------
  1039. qs: a scalar or list of the quantiles to be computed
  1040. interpolation: type of interpolation, default 'linear'
  1041. axis: axis to compute, default 0
  1042. Returns
  1043. -------
  1044. tuple of (axis, block)
  1045. """
  1046. if _np_version_under1p9:
  1047. if interpolation != 'linear':
  1048. raise ValueError("Interpolation methods other than linear "
  1049. "are not supported in numpy < 1.9.")
  1050. kw = {}
  1051. if not _np_version_under1p9:
  1052. kw.update({'interpolation': interpolation})
  1053. values = self.get_values()
  1054. values, _, _, _ = self._try_coerce_args(values, values)
  1055. mask = isnull(self.values)
  1056. if not lib.isscalar(mask) and mask.any():
  1057. # even though this could be a 2-d mask it appears
  1058. # as a 1-d result
  1059. mask = mask.reshape(values.shape)
  1060. result_shape = tuple([values.shape[0]] + [-1] * (self.ndim - 1))
  1061. values = _block_shape(values[~mask], ndim=self.ndim)
  1062. if self.ndim > 1:
  1063. values = values.reshape(result_shape)
  1064. from pandas import Float64Index
  1065. is_empty = values.shape[axis] == 0
  1066. if is_list_like(qs):
  1067. ax = Float64Index(qs)
  1068. if is_empty:
  1069. if self.ndim == 1:
  1070. result = self._na_value
  1071. else:
  1072. # create the array of na_values
  1073. # 2d len(values) * len(qs)
  1074. result = np.repeat(np.array([self._na_value] * len(qs)),
  1075. len(values)).reshape(len(values),
  1076. len(qs))
  1077. else:
  1078. try:
  1079. result = _quantile(values, np.array(qs) * 100,
  1080. axis=axis, **kw)
  1081. except ValueError:
  1082. # older numpies don't handle an array for q
  1083. result = [_quantile(values, q * 100,
  1084. axis=axis, **kw) for q in qs]
  1085. result = np.array(result, copy=False)
  1086. if self.ndim > 1:
  1087. result = result.T
  1088. else:
  1089. if self.ndim == 1:
  1090. ax = Float64Index([qs])
  1091. else:
  1092. ax = mgr.axes[0]
  1093. if is_empty:
  1094. if self.ndim == 1:
  1095. result = self._na_value
  1096. else:
  1097. result = np.array([self._na_value] * len(self))
  1098. else:
  1099. result = _quantile(values, qs * 100, axis=axis, **kw)
  1100. ndim = getattr(result, 'ndim', None) or 0
  1101. result = self._try_coerce_result(result)
  1102. if is_scalar(result):
  1103. return ax, self.make_block_scalar(result)
  1104. return ax, make_block(result,
  1105. placement=np.arange(len(result)),
  1106. ndim=ndim)
  1107. class ScalarBlock(Block):
  1108. """
  1109. a scalar compat Block
  1110. """
  1111. __slots__ = ['_mgr_locs', 'values', 'ndim']
  1112. def __init__(self, values):
  1113. self.ndim = 0
  1114. self.mgr_locs = [0]
  1115. self.values = values
  1116. @property
  1117. def dtype(self):
  1118. return type(self.values)
  1119. @property
  1120. def shape(self):
  1121. return tuple([0])
  1122. def __len__(self):
  1123. return 0
  1124. class NonConsolidatableMixIn(object):
  1125. """ hold methods for the nonconsolidatable blocks """
  1126. _can_consolidate = False
  1127. _verify_integrity = False
  1128. _validate_ndim = False
  1129. _holder = None
  1130. def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
  1131. # Placement must be converted to BlockPlacement via property setter
  1132. # before ndim logic, because placement may be a slice which doesn't
  1133. # have a length.
  1134. self.mgr_locs = placement
  1135. # kludgetastic
  1136. if ndim is None:
  1137. if len(self.mgr_locs) != 1:
  1138. ndim = 1
  1139. else:
  1140. ndim = 2
  1141. self.ndim = ndim
  1142. if not isinstance(values, self._holder):
  1143. raise TypeError("values must be {0}".format(self._holder.__name__))
  1144. self.values = values
  1145. @property
  1146. def shape(self):
  1147. if self.ndim == 1:
  1148. return (len(self.values)),
  1149. return (len(self.mgr_locs), len(self.values))
  1150. def get_values(self, dtype=None):
  1151. """ need to to_dense myself (and always return a ndim sized object) """
  1152. values = self.values.to_dense()
  1153. if values.ndim == self.ndim - 1:
  1154. values = values.reshape((1,) + values.shape)
  1155. return values
  1156. def iget(self, col):
  1157. if self.ndim == 2 and isinstance(col, tuple):
  1158. col, loc = col
  1159. if not is_null_slice(col) and col != 0:
  1160. raise IndexError("{0} only contains one item".format(self))
  1161. return self.values[loc]
  1162. else:
  1163. if col != 0:
  1164. raise IndexError("{0} only contains one item".format(self))
  1165. return self.values
  1166. def should_store(self, value):
  1167. return isinstance(value, self._holder)
  1168. def set(self, locs, values, check=False):
  1169. assert locs.tolist() == [0]
  1170. self.values = values
  1171. def get(self, item):
  1172. if self.ndim == 1:
  1173. loc = self.items.get_loc(item)
  1174. return self.values[loc]
  1175. else:
  1176. return self.values
  1177. def putmask(self, mask, new, align=True, inplace=False, axis=0,
  1178. transpose=False, mgr=None):
  1179. """
  1180. putmask the data to the block; we must be a single block and not
  1181. generate other blocks
  1182. return the resulting block
  1183. Parameters
  1184. ----------
  1185. mask : the condition to respect
  1186. new : a ndarray/object
  1187. align : boolean, perform alignment on other/cond, default is True
  1188. inplace : perform inplace modification, default is False
  1189. Returns
  1190. -------
  1191. a new block(s), the result of the putmask
  1192. """
  1193. new_values = self.values if inplace else self.values.copy()
  1194. new_values, _, new, _ = self._try_coerce_args(new_values, new)
  1195. if isinstance(new, np.ndarray) and len(new) == len(mask):
  1196. new = new[mask]
  1197. mask = _safe_reshape(mask, new_values.shape)
  1198. new_values[mask] = new
  1199. new_values = self._try_coerce_result(new_values)
  1200. return [self.make_block(values=new_values)]
  1201. def _slice(self, slicer):
  1202. """ return a slice of my values (but densify first) """
  1203. return self.get_values()[slicer]
  1204. def _try_cast_result(self, result, dtype=None):
  1205. return result
  1206. class NumericBlock(Block):
  1207. __slots__ = ()
  1208. is_numeric = True
  1209. _can_hold_na = True
  1210. class FloatOrComplexBlock(NumericBlock):
  1211. __slots__ = ()
  1212. def equals(self, other):
  1213. if self.dtype != other.dtype or self.shape != other.shape:
  1214. return False
  1215. left, right = self.values, other.values
  1216. return ((left == right) | (np.isnan(left) & np.isnan(right))).all()
  1217. class FloatBlock(FloatOrComplexBlock):
  1218. __slots__ = ()
  1219. is_float = True
  1220. _downcast_dtype = 'int64'
  1221. def _can_hold_element(self, element):
  1222. if is_list_like(element):
  1223. element = np.array(element)
  1224. tipo = element.dtype.type
  1225. return (issubclass(tipo, (np.floating, np.integer)) and
  1226. not issubclass(tipo, (np.datetime64, np.timedelta64)))
  1227. return (isinstance(element, (float, int, np.float_, np.int_)) and
  1228. not isinstance(element, (bool, np.bool_, datetime, timedelta,
  1229. np.datetime64, np.timedelta64)))
  1230. def _try_cast(self, element):
  1231. try:
  1232. return float(element)
  1233. except: # pragma: no cover
  1234. return element
  1235. def to_native_types(self, slicer=None, na_rep='', float_format=None,
  1236. decimal='.', quoting=None, **kwargs):
  1237. """ convert to our native types format, slicing if desired """
  1238. values = self.values
  1239. if slicer is not None:
  1240. values = values[:, slicer]
  1241. # see gh-13418: no special formatting is desired at the
  1242. # output (important for appropriate 'quoting' behaviour),
  1243. # so do not pass it through the FloatArrayFormatter
  1244. if float_format is None and decimal == '.':
  1245. mask = isnull(values)
  1246. if not quoting:
  1247. values = values.astype(str)
  1248. else:
  1249. values = np.array(values, dtype='object')
  1250. values[mask] = na_rep
  1251. return values
  1252. from pandas.formats.format import FloatArrayFormatter
  1253. formatter = FloatArrayFormatter(values, na_rep=na_rep,
  1254. float_format=float_format,
  1255. decimal=decimal, quoting=quoting,
  1256. fixed_width=False)
  1257. return formatter.get_result_as_array()
  1258. def should_store(self, value):
  1259. # when inserting a column should not coerce integers to floats
  1260. # unnecessarily
  1261. return (issubclass(value.dtype.type, np.floating) and
  1262. value.dtype == self.dtype)
  1263. class ComplexBlock(FloatOrComplexBlock):
  1264. __slots__ = ()
  1265. is_complex = True
  1266. def _can_hold_element(self, element):
  1267. if is_list_like(element):
  1268. element = np.array(element)
  1269. return issubclass(element.dtype.type,
  1270. (np.floating, np.integer, np.complexfloating))
  1271. return (isinstance(element,
  1272. (float, int, complex, np.float_, np.int_)) and
  1273. not isinstance(bool, np.bool_))
  1274. def _try_cast(self, element):
  1275. try:
  1276. return complex(element)
  1277. except: # pragma: no cover
  1278. return element
  1279. def should_store(self, value):
  1280. return issubclass(value.dtype.type, np.complexfloating)
  1281. class IntBlock(NumericBlock):
  1282. __slots__ = ()
  1283. is_integer = True
  1284. _can_hold_na = False
  1285. def _can_hold_element(self, element):
  1286. if is_list_like(element):
  1287. element = np.array(element)
  1288. tipo = element.dtype.type
  1289. return (issubclass(tipo, np.integer) and
  1290. not issubclass(tipo, (np.datetime64, np.timedelta64)))
  1291. return is_integer(element)
  1292. def _try_cast(self, element):
  1293. try:
  1294. return int(element)
  1295. except: # pragma: no cover
  1296. return element
  1297. def should_store(self, value):
  1298. return is_integer_dtype(value) and value.dtype == self.dtype
  1299. class DatetimeLikeBlockMixin(object):
  1300. @property
  1301. def _na_value(self):
  1302. return tslib.NaT
  1303. @property
  1304. def fill_value(self):
  1305. return tslib.iNaT
  1306. def _try_operate(self, values):
  1307. """ return a version to operate on """
  1308. return values.view('i8')
  1309. def get_values(self, dtype=None):
  1310. """
  1311. return object dtype as boxed values, such as Timestamps/Timedelta
  1312. """
  1313. if is_object_dtype(dtype):
  1314. return lib.map_infer(self.values.ravel(),
  1315. self._box_func).reshape(self.values.shape)
  1316. return self.values
  1317. class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
  1318. __slots__ = ()
  1319. is_timedelta = True
  1320. _can_hold_na = True
  1321. is_numeric = False
  1322. @property
  1323. def _box_func(self):
  1324. return lambda x: tslib.Timedelta(x, unit='ns')
  1325. def fillna(self, value, **kwargs):
  1326. # allow filling with integers to be
  1327. # interpreted as seconds
  1328. if not isinstance(value, np.timedelta64) and is_integer(value):
  1329. value = Timedelta(value, unit='s')
  1330. return super(TimeDeltaBlock, self).fillna(value, **kwargs)
  1331. def _try_coerce_args(self, values, other):
  1332. """
  1333. Coerce values and other to int64, with null values converted to
  1334. iNaT. values is always ndarray-like, other may not be
  1335. Parameters
  1336. ----------
  1337. values : ndarray-like
  1338. other : ndarray-like or scalar
  1339. Returns
  1340. -------
  1341. base-type values, values mask, base-type other, other mask
  1342. """
  1343. values_mask = isnull(values)
  1344. values = values.view('i8')
  1345. other_mask = False
  1346. if isinstance(other, bool):
  1347. raise TypeError
  1348. elif is_null_datelike_scalar(other):
  1349. other = tslib.iNaT
  1350. other_mask = True
  1351. elif isinstance(other, Timedelta):
  1352. other_mask = isnull(other)
  1353. other = other.value
  1354. elif isinstance(other, np.timedelta64):
  1355. other_mask = isnull(other)
  1356. other = other.view('i8')
  1357. elif isinstance(other, timedelta):
  1358. other = Timedelta(other).value
  1359. elif isinstance(other, np.ndarray):
  1360. other_mask = isnull(other)
  1361. other = other.astype('i8', copy=False).view('i8')
  1362. else:
  1363. # scalar
  1364. other = Timedelta(other)
  1365. other_mask = isnull(other)
  1366. other = other.value
  1367. return values, values_mask, other, other_mask
  1368. def _try_coerce_result(self, result):
  1369. """ reverse of try_coerce_args / try_operate """
  1370. if isinstance(result, np.ndarray):
  1371. mask = isnull(result)
  1372. if result.dtype.kind in ['i', 'f', 'O']:
  1373. result = result.astype('m8[ns]')
  1374. result[mask] = tslib.iNaT
  1375. elif isinstance(result, (np.integer, np.float)):
  1376. result = self._box_func(result)
  1377. return result
  1378. def should_store(self, value):
  1379. return issubclass(value.dtype.type, np.timedelta64)
  1380. def to_native_types(self, slicer=None, na_rep=None, quoting=None,
  1381. **kwargs):
  1382. """ convert to our native types format, slicing if desired """
  1383. values = self.values
  1384. if slicer is not None:
  1385. values = values[:, slicer]
  1386. mask = isnull(values)
  1387. rvalues = np.empty(values.shape, dtype=object)
  1388. if na_rep is None:
  1389. na_rep = 'NaT'
  1390. rvalues[mask] = na_rep
  1391. imask = (~mask).ravel()
  1392. # FIXME:
  1393. # should use the formats.format.Timedelta64Formatter here
  1394. # to figure what format to pass to the Timedelta
  1395. # e.g. to not show the decimals say
  1396. rvalues.flat[imask] = np.array([Timedelta(val)._repr_base(format='all')
  1397. for val in values.ravel()[imask]],
  1398. dtype=object)
  1399. return rvalues
  1400. class BoolBlock(NumericBlock):
  1401. __slots__ = ()
  1402. is_bool = True
  1403. _can_hold_na = False
  1404. def _can_hold_element(self, element):
  1405. if is_list_like(element):
  1406. element = np.array(element)
  1407. return issubclass(element.dtype.type, np.integer)
  1408. return isinstance(element, (int, bool))
  1409. def _try_cast(self, element):
  1410. try:
  1411. return bool(element)
  1412. except: # pragma: no cover
  1413. return element
  1414. def should_store(self, value):
  1415. return issubclass(value.dtype.type, np.bool_)
  1416. def replace(self, to_replace, value, inplace=False, filter=None,
  1417. regex=False, mgr=None):
  1418. to_replace_values = np.atleast_1d(to_replace)
  1419. if not np.can_cast(to_replace_values, bool):
  1420. return self
  1421. return super(BoolBlock, self).replace(to_replace, value,
  1422. inplace=inplace, filter=filter,
  1423. regex=regex, mgr=mgr)
  1424. class ObjectBlock(Block):
  1425. __slots__ = ()
  1426. is_object = True
  1427. _can_hold_na = True
  1428. def __init__(self, values, ndim=2, fastpath=False, placement=None,
  1429. **kwargs):
  1430. if issubclass(values.dtype.type, compat.string_types):
  1431. values = np.array(values, dtype=object)
  1432. super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath,
  1433. placement=placement, **kwargs)
  1434. @property
  1435. def is_bool(self):
  1436. """ we can be a bool if we have only bool values but are of type
  1437. object
  1438. """
  1439. return lib.is_bool_array(self.values.ravel())
  1440. # TODO: Refactor when convert_objects is removed since there will be 1 path
  1441. def convert(self, *args, **kwargs):
  1442. """ attempt to coerce any object types to better types return a copy of
  1443. the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
  1444. can return multiple blocks!
  1445. """
  1446. if args:
  1447. raise NotImplementedError
  1448. by_item = True if 'by_item' not in kwargs else kwargs['by_item']
  1449. new_inputs = ['coerce', 'datetime', 'numeric', 'timedelta']
  1450. new_style = False
  1451. for kw in new_inputs:
  1452. new_style |= kw in kwargs
  1453. if new_style:
  1454. fn = _soft_convert_objects
  1455. fn_inputs = new_inputs
  1456. else:
  1457. fn = _possibly_convert_objects
  1458. fn_inputs = ['convert_dates', 'convert_numeric',
  1459. 'convert_timedeltas']
  1460. fn_inputs += ['copy']
  1461. fn_kwargs = {}
  1462. for key in fn_inputs:
  1463. if key in kwargs:
  1464. fn_kwargs[key] = kwargs[key]
  1465. # attempt to create new type blocks
  1466. blocks = []
  1467. if by_item and not self._is_single_block:
  1468. for i, rl in enumerate(self.mgr_locs):
  1469. values = self.iget(i)
  1470. shape = values.shape
  1471. values = fn(values.ravel(), **fn_kwargs)
  1472. try:
  1473. values = values.reshape(shape)
  1474. values = _block_shape(values, ndim=self.ndim)
  1475. except (AttributeError, NotImplementedError):
  1476. pass
  1477. newb = make_block(values, ndim=self.ndim, placement=[rl])
  1478. blocks.append(newb)
  1479. else:
  1480. values = fn(
  1481. self.values.ravel(), **fn_kwargs).reshape(self.values.shape)
  1482. blocks.append(make_block(values, ndim=self.ndim,
  1483. placement=self.mgr_locs))
  1484. return blocks
  1485. def set(self, locs, values, check=False):
  1486. """
  1487. Modify Block in-place with new item value
  1488. Returns
  1489. -------
  1490. None
  1491. """
  1492. # GH6026
  1493. if check:
  1494. try:
  1495. if (self.values[locs] == values).all():
  1496. return
  1497. except:
  1498. pass
  1499. try:
  1500. self.values[locs] = values
  1501. except (ValueError):
  1502. # broadcasting error
  1503. # see GH6171
  1504. new_shape = list(values.shape)
  1505. new_shape[0] = len(self.items)
  1506. self.values = np.empty(tuple(new_shape), dtype=self.dtype)
  1507. self.values.fill(np.nan)
  1508. self.values[locs] = values
  1509. def _maybe_downcast(self, blocks, downcast=None):
  1510. if downcast is not None:
  1511. return blocks
  1512. # split and convert the blocks
  1513. return _extend_blocks([b.convert(datetime=True, numeric=False)
  1514. for b in blocks])
  1515. def _can_hold_element(self, element):
  1516. return True
  1517. def _try_cast(self, element):
  1518. return element
  1519. def should_store(self, value):
  1520. return not (issubclass(value.dtype.type,
  1521. (np.integer, np.floating, np.complexfloating,
  1522. np.datetime64, np.bool_)) or
  1523. is_extension_type(value))
  1524. def replace(self, to_replace, value, inplace=False, filter=None,
  1525. regex=False, convert=True, mgr=None):
  1526. to_rep_is_list = is_list_like(to_replace)
  1527. value_is_list = is_list_like(value)
  1528. both_lists = to_rep_is_list and value_is_list
  1529. either_list = to_rep_is_list or value_is_list
  1530. result_blocks = []
  1531. blocks = [self]
  1532. if not either_list and is_re(to_replace):
  1533. return self._replace_single(to_replace, value, inplace=inplace,
  1534. filter=filter, regex=True,
  1535. convert=convert, mgr=mgr)
  1536. elif not (either_list or regex):
  1537. return super(ObjectBlock, self).replace(to_replace, value,
  1538. inplace=inplace,
  1539. filter=filter, regex=regex,
  1540. convert=convert, mgr=mgr)
  1541. elif both_lists:
  1542. for to_rep, v in zip(to_replace, value):
  1543. result_blocks = []
  1544. for b in blocks:
  1545. result = b._replace_single(to_rep, v, inplace=inplace,
  1546. filter=filter, regex=regex,
  1547. convert=convert, mgr=mgr)
  1548. result_blocks = _extend_blocks(result, result_blocks)
  1549. blocks = result_blocks
  1550. return result_blocks
  1551. elif to_rep_is_list and regex:
  1552. for to_rep in to_replace:
  1553. result_blocks = []
  1554. for b in blocks:
  1555. result = b._replace_single(to_rep, value, inplace=inplace,
  1556. filter=filter, regex=regex,
  1557. convert=convert, mgr=mgr)
  1558. result_blocks = _extend_blocks(result, result_blocks)
  1559. blocks = result_blocks
  1560. return result_blocks
  1561. return self._replace_single(to_replace, value, inplace=inplace,
  1562. filter=filter, convert=convert,
  1563. regex=regex, mgr=mgr)
  1564. def _replace_single(self, to_replace, value, inplace=False, filter=None,
  1565. regex=False, convert=True, mgr=None):
  1566. # to_replace is regex compilable
  1567. to_rep_re = regex and is_re_compilable(to_replace)
  1568. # regex is regex compilable
  1569. regex_re = is_re_compilable(regex)
  1570. # only one will survive
  1571. if to_rep_re and regex_re:
  1572. raise AssertionError('only one of to_replace and regex can be '
  1573. 'regex compilable')
  1574. # if regex was passed as something that can be a regex (rather than a
  1575. # boolean)
  1576. if regex_re:
  1577. to_replace = regex
  1578. regex = regex_re or to_rep_re
  1579. # try to get the pattern attribute (compiled re) or it's a string
  1580. try:
  1581. pattern = to_replace.pattern
  1582. except AttributeError:
  1583. pattern = to_replace
  1584. # if the pattern is not empty and to_replace is either a string or a
  1585. # regex
  1586. if regex and pattern:
  1587. rx = re.compile(to_replace)
  1588. else:
  1589. # if the thing to replace is not a string or compiled regex call
  1590. # the superclass method -> to_replace is some kind of object
  1591. return super(ObjectBlock, self).replace(to_replace, value,
  1592. inplace=inplace,
  1593. filter=filter, regex=regex,
  1594. mgr=mgr)
  1595. new_values = self.values if inplace else self.values.copy()
  1596. # deal with replacing values with objects (strings) that match but
  1597. # whose replacement is not a string (numeric, nan, object)
  1598. if isnull(value) or not isinstance(value, compat.string_types):
  1599. def re_replacer(s):
  1600. try:
  1601. return value if rx.search(s) is not None else s
  1602. except TypeError:
  1603. return s
  1604. else:
  1605. # value is guaranteed to be a string here, s can be either a string
  1606. # or null if it's null it gets returned
  1607. def re_replacer(s):
  1608. try:
  1609. return rx.sub(value, s)
  1610. except TypeError:
  1611. return s
  1612. f = np.vectorize(re_replacer, otypes=[self.dtype])
  1613. if filter is None:
  1614. filt = slice(None)
  1615. else:
  1616. filt = self.mgr_locs.isin(filter).nonzero()[0]
  1617. new_values[filt] = f(new_values[filt])
  1618. # convert
  1619. block = self.make_block(new_values)
  1620. if convert:
  1621. block = block.convert(by_item=True, numeric=False)
  1622. return block
  1623. class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
  1624. __slots__ = ()
  1625. is_categorical = True
  1626. _verify_integrity = True
  1627. _can_hold_na = True
  1628. _holder = Categorical
  1629. def __init__(self, values, placement, fastpath=False, **kwargs):
  1630. # coerce to categorical if we can
  1631. super(CategoricalBlock, self).__init__(maybe_to_categorical(values),
  1632. fastpath=True,
  1633. placement=placement, **kwargs)
  1634. @property
  1635. def is_view(self):
  1636. """ I am never a view """
  1637. return False
  1638. def to_dense(self):
  1639. return self.values.to_dense().view()
  1640. def convert(self, copy=True, **kwargs):
  1641. return self.copy() if copy else self
  1642. @property
  1643. def array_dtype(self):
  1644. """ the dtype to return if I want to construct this block as an
  1645. array
  1646. """
  1647. return np.object_
  1648. def _slice(self, slicer):
  1649. """ return a slice of my values """
  1650. # slice the category
  1651. # return same dims as we currently have
  1652. return self.values._slice(slicer)
  1653. def _try_coerce_result(self, result):
  1654. """ reverse of try_coerce_args """
  1655. # GH12564: CategoricalBlock is 1-dim only
  1656. # while returned results could be any dim
  1657. if ((not is_categorical_dtype(result)) and
  1658. isinstance(result, np.ndarray)):
  1659. result = _block_shape(result, ndim=self.ndim)
  1660. return result
  1661. def fillna(self, value, limit=None, inplace=False, downcast=None,
  1662. mgr=None):
  1663. # we may need to upcast our fill to match our dtype
  1664. if limit is not None:
  1665. raise NotImplementedError("specifying a limit for 'fillna' has "
  1666. "not been implemented yet")
  1667. values = self.values if inplace else self.values.copy()
  1668. values = self._try_coerce_result(values.fillna(value=value,
  1669. limit=limit))
  1670. return [self.make_block(values=values)]
  1671. def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
  1672. fill_value=None, **kwargs):
  1673. values = self.values if inplace else self.values.copy()
  1674. return self.make_block_same_class(
  1675. values=values.fillna(fill_value=fill_value, method=method,
  1676. limit=limit),
  1677. placement=self.mgr_locs)
  1678. def shift(self, periods, axis=0, mgr=None):
  1679. return self.make_block_same_class(values=self.values.shift(periods),
  1680. placement=self.mgr_locs)
  1681. def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
  1682. """
  1683. Take values according to indexer and return them as a block.bb
  1684. """
  1685. if fill_tuple is None:
  1686. fill_value = None
  1687. else:
  1688. fill_value = fill_tuple[0]
  1689. # axis doesn't matter; we are really a single-dim object
  1690. # but are passed the axis depending on the calling routing
  1691. # if its REALLY axis 0, then this will be a reindex and not a take
  1692. new_values = self.values.take_nd(indexer, fill_value=fill_value)
  1693. # if we are a 1-dim object, then always place at 0
  1694. if self.ndim == 1:
  1695. new_mgr_locs = [0]
  1696. else:
  1697. if new_mgr_locs is None:
  1698. new_mgr_locs = self.mgr_locs
  1699. return self.make_block_same_class(new_values, new_mgr_locs)
  1700. def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
  1701. klass=None, mgr=None):
  1702. """
  1703. Coerce to the new type (if copy=True, return a new copy)
  1704. raise on an except if raise == True
  1705. """
  1706. if self.is_categorical_astype(dtype):
  1707. values = self.values
  1708. else:
  1709. values = np.asarray(self.values).astype(dtype, copy=False)
  1710. if copy:
  1711. values = values.copy()
  1712. return self.make_block(values)
  1713. def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
  1714. """ convert to our native types format, slicing if desired """
  1715. values = self.values
  1716. if slicer is not None:
  1717. # Categorical is always one dimension
  1718. values = values[slicer]
  1719. mask = isnull(values)
  1720. values = np.array(values, dtype='object')
  1721. values[mask] = na_rep
  1722. # we are expected to return a 2-d ndarray
  1723. return values.reshape(1, len(values))
  1724. class DatetimeBlock(DatetimeLikeBlockMixin, Block):
  1725. __slots__ = ()
  1726. is_datetime = True
  1727. _can_hold_na = True
  1728. def __init__(self, values, placement, fastpath=False, **kwargs):
  1729. if values.dtype != _NS_DTYPE:
  1730. values = tslib.cast_to_nanoseconds(values)
  1731. super(DatetimeBlock, self).__init__(values, fastpath=True,
  1732. placement=placement, **kwargs)
  1733. def _astype(self, dtype, mgr=None, **kwargs):
  1734. """
  1735. these automatically copy, so copy=True has no effect
  1736. raise on an except if raise == True
  1737. """
  1738. # if we are passed a datetime64[ns, tz]
  1739. if is_datetime64tz_dtype(dtype):
  1740. dtype = DatetimeTZDtype(dtype)
  1741. values = self.values
  1742. if getattr(values, 'tz', None) is None:
  1743. values = DatetimeIndex(values).tz_localize('UTC')
  1744. values = values.tz_convert(dtype.tz)
  1745. return self.make_block(values)
  1746. # delegate
  1747. return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
  1748. def _can_hold_element(self, element):
  1749. if is_list_like(element):
  1750. element = np.array(element)
  1751. return element.dtype == _NS_DTYPE or element.dtype == np.int64
  1752. return (is_integer(element) or isinstance(element, datetime) or
  1753. isnull(element))
  1754. def _try_cast(self, element):
  1755. try:
  1756. return int(element)
  1757. except:
  1758. return element
  1759. def _try_coerce_args(self, values, other):
  1760. """
  1761. Coerce values and other to dtype 'i8'. NaN and NaT convert to
  1762. the smallest i8, and will correctly round-trip to NaT if converted
  1763. back in _try_coerce_result. values is always ndarray-like, other
  1764. may not be
  1765. Parameters
  1766. ----------
  1767. values : ndarray-like
  1768. other : ndarray-like or scalar
  1769. Returns
  1770. -------
  1771. base-type values, values mask, base-type other, other mask
  1772. """
  1773. values_mask = isnull(values)
  1774. values = values.view('i8')
  1775. other_mask = False
  1776. if isinstance(other, bool):
  1777. raise TypeError
  1778. elif is_null_datelike_scalar(other):
  1779. other = tslib.iNaT
  1780. other_mask = True
  1781. elif isinstance(other, (datetime, np.datetime64, date)):
  1782. other = self._box_func(other)
  1783. if getattr(other, 'tz') is not None:
  1784. raise TypeError("cannot coerce a Timestamp with a tz on a "
  1785. "naive Block")
  1786. other_mask = isnull(other)
  1787. other = other.asm8.view('i8')
  1788. elif hasattr(other, 'dtype') and is_integer_dtype(other):
  1789. other = other.view('i8')
  1790. else:
  1791. try:
  1792. other = np.asarray(other)
  1793. other_mask = isnull(other)
  1794. other = other.astype('i8', copy=False).view('i8')
  1795. except ValueError:
  1796. # coercion issues
  1797. # let higher levels handle
  1798. raise TypeError
  1799. return values, values_mask, other, other_mask
  1800. def _try_coerce_result(self, result):
  1801. """ reverse of try_coerce_args """
  1802. if isinstance(result, np.ndarray):
  1803. if result.dtype.kind in ['i', 'f', 'O']:
  1804. try:
  1805. result = result.astype('M8[ns]')
  1806. except ValueError:
  1807. pass
  1808. elif isinstance(result, (np.integer, np.float, np.datetime64)):
  1809. result = self._box_func(result)
  1810. return result
  1811. @property
  1812. def _box_func(self):
  1813. return tslib.Timestamp
  1814. def to_native_types(self, slicer=None, na_rep=None, date_format=None,
  1815. quoting=None, **kwargs):
  1816. """ convert to our native types format, slicing if desired """
  1817. values = self.values
  1818. if slicer is not None:
  1819. values = values[..., slicer]
  1820. from pandas.formats.format import _get_format_datetime64_from_values
  1821. format = _get_format_datetime64_from_values(values, date_format)
  1822. result = tslib.format_array_from_datetime(
  1823. values.view('i8').ravel(), tz=getattr(self.values, 'tz', None),
  1824. format=format, na_rep=na_rep).reshape(values.shape)
  1825. return np.atleast_2d(result)
  1826. def should_store(self, value):
  1827. return (issubclass(value.dtype.type, np.datetime64) and
  1828. not is_datetimetz(value))
  1829. def set(self, locs, values, check=False):
  1830. """
  1831. Modify Block in-place with new item value
  1832. Returns
  1833. -------
  1834. None
  1835. """
  1836. if values.dtype != _NS_DTYPE:
  1837. # Workaround for numpy 1.6 bug
  1838. values = tslib.cast_to_nanoseconds(values)
  1839. self.values[locs] = values
  1840. class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
  1841. """ implement a datetime64 block with a tz attribute """
  1842. __slots__ = ()
  1843. _holder = DatetimeIndex
  1844. is_datetimetz = True
  1845. def __init__(self, values, placement, ndim=2, **kwargs):
  1846. if not isinstance(values, self._holder):
  1847. values = self._holder(values)
  1848. dtype = kwargs.pop('dtype', None)
  1849. if dtype is not None:
  1850. if isinstance(dtype, compat.string_types):
  1851. dtype = DatetimeTZDtype.construct_from_string(dtype)
  1852. values = values.tz_localize('UTC').tz_convert(dtype.tz)
  1853. if values.tz is None:
  1854. raise ValueError("cannot create a DatetimeTZBlock without a tz")
  1855. super(DatetimeTZBlock, self).__init__(values, placement=placement,
  1856. ndim=ndim, **kwargs)
  1857. def copy(self, deep=True, mgr=None):
  1858. """ copy constructor """
  1859. values = self.values
  1860. if deep:
  1861. values = values.copy(deep=True)
  1862. return self.make_block_same_class(values)
  1863. def external_values(self):
  1864. """ we internally represent the data as a DatetimeIndex, but for
  1865. external compat with ndarray, export as a ndarray of Timestamps
  1866. """
  1867. return self.values.astype('datetime64[ns]').values
  1868. def get_values(self, dtype=None):
  1869. # return object dtype as Timestamps with the zones
  1870. if is_object_dtype(dtype):
  1871. f = lambda x: lib.Timestamp(x, tz=self.values.tz)
  1872. return lib.map_infer(
  1873. self.values.ravel(), f).reshape(self.values.shape)
  1874. return self.values
  1875. def to_object_block(self, mgr):
  1876. """
  1877. return myself as an object block
  1878. Since we keep the DTI as a 1-d object, this is different
  1879. depends on BlockManager's ndim
  1880. """
  1881. values = self.get_values(dtype=object)
  1882. kwargs = {}
  1883. if mgr.ndim > 1:
  1884. values = _block_shape(values, ndim=mgr.ndim)
  1885. kwargs['ndim'] = mgr.ndim
  1886. kwargs['placement'] = [0]
  1887. return self.make_block(values, klass=ObjectBlock, **kwargs)
  1888. def _slice(self, slicer):
  1889. """ return a slice of my values """
  1890. if isinstance(slicer, tuple):
  1891. col, loc = slicer
  1892. if not is_null_slice(col) and col != 0:
  1893. raise IndexError("{0} only contains one item".format(self))
  1894. return self.values[loc]
  1895. return self.values[slicer]
  1896. def _try_coerce_args(self, values, other):
  1897. """
  1898. localize and return i8 for the values
  1899. Parameters
  1900. ----------
  1901. values : ndarray-like
  1902. other : ndarray-like or scalar
  1903. Returns
  1904. -------
  1905. base-type values, values mask, base-type other, other mask
  1906. """
  1907. values_mask = _block_shape(isnull(values), ndim=self.ndim)
  1908. values = _block_shape(values.tz_localize(None).asi8, ndim=self.ndim)
  1909. other_mask = False
  1910. if isinstance(other, ABCSeries):
  1911. other = self._holder(other)
  1912. other_mask = isnull(other)
  1913. if isinstance(other, bool):
  1914. raise TypeError
  1915. elif is_null_datelike_scalar(other):
  1916. other = tslib.iNaT
  1917. other_mask = True
  1918. elif isinstance(other, self._holder):
  1919. if other.tz != self.values.tz:
  1920. raise ValueError("incompatible or non tz-aware value")
  1921. other = other.tz_localize(None).asi8
  1922. other_mask = isnull(other)
  1923. elif isinstance(other, (np.datetime64, datetime, date)):
  1924. other = lib.Timestamp(other)
  1925. tz = getattr(other, 'tz', None)
  1926. # test we can have an equal time zone
  1927. if tz is None or str(tz) != str(self.values.tz):
  1928. raise ValueError("incompatible or non tz-aware value")
  1929. other_mask = isnull(other)
  1930. other = other.tz_localize(None).value
  1931. return values, values_mask, other, other_mask
  1932. def _try_coerce_result(self, result):
  1933. """ reverse of try_coerce_args """
  1934. if isinstance(result, np.ndarray):
  1935. if result.dtype.kind in ['i', 'f', 'O']:
  1936. result = result.astype('M8[ns]')
  1937. elif isinstance(result, (np.integer, np.float, np.datetime64)):
  1938. result = lib.Timestamp(result).tz_localize(self.values.tz)
  1939. if isinstance(result, np.ndarray):
  1940. # allow passing of > 1dim if its trivial
  1941. if result.ndim > 1:
  1942. result = result.reshape(len(result))
  1943. result = self._holder(result).tz_localize(self.values.tz)
  1944. return result
  1945. @property
  1946. def _box_func(self):
  1947. return lambda x: tslib.Timestamp(x, tz=self.dtype.tz)
  1948. def shift(self, periods, axis=0, mgr=None):
  1949. """ shift the block by periods """
  1950. # think about moving this to the DatetimeIndex. This is a non-freq
  1951. # (number of periods) shift ###
  1952. N = len(self)
  1953. indexer = np.zeros(N, dtype=int)
  1954. if periods > 0:
  1955. indexer[periods:] = np.arange(N - periods)
  1956. else:
  1957. indexer[:periods] = np.arange(-periods, N)
  1958. new_values = self.values.asi8.take(indexer)
  1959. if periods > 0:
  1960. new_values[:periods] = tslib.iNaT
  1961. else:
  1962. new_values[periods:] = tslib.iNaT
  1963. new_values = self.values._shallow_copy(new_values)
  1964. return [self.make_block_same_class(new_values,
  1965. placement=self.mgr_locs)]
  1966. class SparseBlock(NonConsolidatableMixIn, Block):
  1967. """ implement as a list of sparse arrays of the same dtype """
  1968. __slots__ = ()
  1969. is_sparse = True
  1970. is_numeric = True
  1971. _box_to_block_values = False
  1972. _can_hold_na = True
  1973. _ftype = 'sparse'
  1974. _holder = SparseArray
  1975. @property
  1976. def shape(self):
  1977. return (len(self.mgr_locs), self.sp_index.length)
  1978. @property
  1979. def itemsize(self):
  1980. return self.dtype.itemsize
  1981. @property
  1982. def fill_value(self):
  1983. # return np.nan
  1984. return self.values.fill_value
  1985. @fill_value.setter
  1986. def fill_value(self, v):
  1987. # we may need to upcast our fill to match our dtype
  1988. if issubclass(self.dtype.type, np.floating):
  1989. v = float(v)
  1990. self.values.fill_value = v
  1991. def to_dense(self):
  1992. return self.values.to_dense().view()
  1993. @property
  1994. def sp_values(self):
  1995. return self.values.sp_values
  1996. @sp_values.setter
  1997. def sp_values(self, v):
  1998. # reset the sparse values
  1999. self.values = SparseArray(v, sparse_index=self.sp_index,
  2000. kind=self.kind, dtype=v.dtype,
  2001. fill_value=self.values.fill_value,
  2002. copy=False)
  2003. @property
  2004. def sp_index(self):
  2005. return self.values.sp_index
  2006. @property
  2007. def kind(self):
  2008. return self.values.kind
  2009. def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
  2010. klass=None, mgr=None, **kwargs):
  2011. if values is None:
  2012. values = self.values
  2013. values = values.astype(dtype, copy=copy)
  2014. return self.make_block_same_class(values=values,
  2015. placement=self.mgr_locs)
  2016. def __len__(self):
  2017. try:
  2018. return self.sp_index.length
  2019. except:
  2020. return 0
  2021. def copy(self, deep=True, mgr=None):
  2022. return self.make_block_same_class(values=self.values,
  2023. sparse_index=self.sp_index,
  2024. kind=self.kind, copy=deep,
  2025. placement=self.mgr_locs)
  2026. def make_block_same_class(self, values, placement, sparse_index=None,
  2027. kind=None, dtype=None, fill_value=None,
  2028. copy=False, fastpath=True, **kwargs):
  2029. """ return a new block """
  2030. if dtype is None:
  2031. dtype = values.dtype
  2032. if fill_value is None and not isinstance(values, SparseArray):
  2033. fill_value = self.values.fill_value
  2034. # if not isinstance(values, SparseArray) and values.ndim != self.ndim:
  2035. # raise ValueError("ndim mismatch")
  2036. if values.ndim == 2:
  2037. nitems = values.shape[0]
  2038. if nitems == 0:
  2039. # kludgy, but SparseBlocks cannot handle slices, where the
  2040. # output is 0-item, so let's convert it to a dense block: it
  2041. # won't take space since there's 0 items, plus it will preserve
  2042. # the dtype.
  2043. return self.make_block(np.empty(values.shape, dtype=dtype),
  2044. placement,
  2045. fastpath=True)
  2046. elif nitems > 1:
  2047. raise ValueError("Only 1-item 2d sparse blocks are supported")
  2048. else:
  2049. values = values.reshape(values.shape[1])
  2050. new_values = SparseArray(values, sparse_index=sparse_index,
  2051. kind=kind or self.kind, dtype=dtype,
  2052. fill_value=fill_value, copy=copy)
  2053. return self.make_block(new_values, fastpath=fastpath,
  2054. placement=placement)
  2055. def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
  2056. fill_value=None, **kwargs):
  2057. values = missing.interpolate_2d(self.values.to_dense(), method, axis,
  2058. limit, fill_value)
  2059. return self.make_block_same_class(values=values,
  2060. placement=self.mgr_locs)
  2061. def fillna(self, value, limit=None, inplace=False, downcast=None,
  2062. mgr=None):
  2063. # we may need to upcast our fill to match our dtype
  2064. if limit is not None:
  2065. raise NotImplementedError("specifying a limit for 'fillna' has "
  2066. "not been implemented yet")
  2067. values = self.values if inplace else self.values.copy()
  2068. values = values.fillna(value, downcast=downcast)
  2069. return [self.make_block_same_class(values=values,
  2070. placement=self.mgr_locs)]
  2071. def shift(self, periods, axis=0, mgr=None):
  2072. """ shift the block by periods """
  2073. N = len(self.values.T)
  2074. indexer = np.zeros(N, dtype=int)
  2075. if periods > 0:
  2076. indexer[periods:] = np.arange(N - periods)
  2077. else:
  2078. indexer[:periods] = np.arange(-periods, N)
  2079. new_values = self.values.to_dense().take(indexer)
  2080. # convert integer to float if necessary. need to do a lot more than
  2081. # that, handle boolean etc also
  2082. new_values, fill_value = _maybe_upcast(new_values)
  2083. if periods > 0:
  2084. new_values[:periods] = fill_value
  2085. else:
  2086. new_values[periods:] = fill_value
  2087. return [self.make_block_same_class(new_values,
  2088. placement=self.mgr_locs)]
  2089. def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
  2090. limit=None, mask_info=None):
  2091. """
  2092. Reindex using pre-computed indexer information
  2093. """
  2094. if axis < 1:
  2095. raise AssertionError('axis must be at least 1, got %d' % axis)
  2096. # taking on the 0th axis always here
  2097. if fill_value is None:
  2098. fill_value = self.fill_value
  2099. return self.make_block_same_class(self.values.take(indexer),
  2100. fill_value=fill_value,
  2101. placement=self.mgr_locs)
  2102. def sparse_reindex(self, new_index):
  2103. """ sparse reindex and return a new block
  2104. current reindex only works for float64 dtype! """
  2105. values = self.values
  2106. values = values.sp_index.to_int_index().reindex(
  2107. values.sp_values.astype('float64'), values.fill_value, new_index)
  2108. return self.make_block_same_class(values, sparse_index=new_index,
  2109. placement=self.mgr_locs)
  2110. def make_block(values, placement, klass=None, ndim=None, dtype=None,
  2111. fastpath=False):
  2112. if klass is None:
  2113. dtype = dtype or values.dtype
  2114. vtype = dtype.type
  2115. if isinstance(values, SparseArray):
  2116. klass = SparseBlock
  2117. elif issubclass(vtype, np.floating):
  2118. klass = FloatBlock
  2119. elif (issubclass(vtype, np.integer) and
  2120. issubclass(vtype, np.timedelta64)):
  2121. klass = TimeDeltaBlock
  2122. elif (issubclass(vtype, np.integer) and
  2123. not issubclass(vtype, np.datetime64)):
  2124. klass = IntBlock
  2125. elif dtype == np.bool_:
  2126. klass = BoolBlock
  2127. elif issubclass(vtype, np.datetime64):
  2128. if hasattr(values, 'tz'):
  2129. klass = DatetimeTZBlock
  2130. else:
  2131. klass = DatetimeBlock
  2132. elif is_datetimetz(values):
  2133. klass = DatetimeTZBlock
  2134. elif issubclass(vtype, np.complexfloating):
  2135. klass = ComplexBlock
  2136. elif is_categorical(values):
  2137. klass = CategoricalBlock
  2138. else:
  2139. klass = ObjectBlock
  2140. elif klass is DatetimeTZBlock and not is_datetimetz(values):
  2141. return klass(values, ndim=ndim, fastpath=fastpath,
  2142. placement=placement, dtype=dtype)
  2143. return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
  2144. # TODO: flexible with index=None and/or items=None
  2145. class BlockManager(PandasObject):
  2146. """
  2147. Core internal data structure to implement DataFrame, Series, Panel, etc.
  2148. Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a
  2149. lightweight blocked set of labeled data to be manipulated by the DataFrame
  2150. public API class
  2151. Attributes
  2152. ----------
  2153. shape
  2154. ndim
  2155. axes
  2156. values
  2157. items
  2158. Methods
  2159. -------
  2160. set_axis(axis, new_labels)
  2161. copy(deep=True)
  2162. get_dtype_counts
  2163. get_ftype_counts
  2164. get_dtypes
  2165. get_ftypes
  2166. apply(func, axes, block_filter_fn)
  2167. get_bool_data
  2168. get_numeric_data
  2169. get_slice(slice_like, axis)
  2170. get(label)
  2171. iget(loc)
  2172. get_scalar(label_tup)
  2173. take(indexer, axis)
  2174. reindex_axis(new_labels, axis)
  2175. reindex_indexer(new_labels, indexer, axis)
  2176. delete(label)
  2177. insert(loc, label, value)
  2178. set(label, value)
  2179. Parameters
  2180. ----------
  2181. Notes
  2182. -----
  2183. This is *not* a public API class
  2184. """
  2185. __slots__ = ['axes', 'blocks', '_ndim', '_shape', '_known_consolidated',
  2186. '_is_consolidated', '_blknos', '_blklocs']
  2187. def __init__(self, blocks, axes, do_integrity_check=True, fastpath=True):
  2188. self.axes = [_ensure_index(ax) for ax in axes]
  2189. self.blocks = tuple(blocks)
  2190. for block in blocks:
  2191. if block.is_sparse:
  2192. if len(block.mgr_locs) != 1:
  2193. raise AssertionError("Sparse block refers to multiple "
  2194. "items")
  2195. else:
  2196. if self.ndim != block.ndim:
  2197. raise AssertionError('Number of Block dimensions (%d) '
  2198. 'must equal number of axes (%d)' %
  2199. (block.ndim, self.ndim))
  2200. if do_integrity_check:
  2201. self._verify_integrity()
  2202. self._consolidate_check()
  2203. self._rebuild_blknos_and_blklocs()
  2204. def make_empty(self, axes=None):
  2205. """ return an empty BlockManager with the items axis of len 0 """
  2206. if axes is None:
  2207. axes = [_ensure_index([])] + [_ensure_index(a)
  2208. for a in self.axes[1:]]
  2209. # preserve dtype if possible
  2210. if self.ndim == 1:
  2211. blocks = np.array([], dtype=self.array_dtype)
  2212. else:
  2213. blocks = []
  2214. return self.__class__(blocks, axes)
  2215. def __nonzero__(self):
  2216. return True
  2217. # Python3 compat
  2218. __bool__ = __nonzero__
  2219. @property
  2220. def shape(self):
  2221. return tuple(len(ax) for ax in self.axes)
  2222. @property
  2223. def ndim(self):
  2224. return len(self.axes)
  2225. def set_axis(self, axis, new_labels):
  2226. new_labels = _ensure_index(new_labels)
  2227. old_len = len(self.axes[axis])
  2228. new_len = len(new_labels)
  2229. if new_len != old_len:
  2230. raise ValueError('Length mismatch: Expected axis has %d elements, '
  2231. 'new values have %d elements' %
  2232. (old_len, new_len))
  2233. self.axes[axis] = new_labels
  2234. def rename_axis(self, mapper, axis, copy=True):
  2235. """
  2236. Rename one of axes.
  2237. Parameters
  2238. ----------
  2239. mapper : unary callable
  2240. axis : int
  2241. copy : boolean, default True
  2242. """
  2243. obj = self.copy(deep=copy)
  2244. obj.set_axis(axis, _transform_index(self.axes[axis], mapper))
  2245. return obj
  2246. def add_prefix(self, prefix):
  2247. f = (str(prefix) + '%s').__mod__
  2248. return self.rename_axis(f, axis=0)
  2249. def add_suffix(self, suffix):
  2250. f = ('%s' + str(suffix)).__mod__
  2251. return self.rename_axis(f, axis=0)
  2252. @property
  2253. def _is_single_block(self):
  2254. if self.ndim == 1:
  2255. return True
  2256. if len(self.blocks) != 1:
  2257. return False
  2258. blk = self.blocks[0]
  2259. return (blk.mgr_locs.is_slice_like and
  2260. blk.mgr_locs.as_slice == slice(0, len(self), 1))
  2261. def _rebuild_blknos_and_blklocs(self):
  2262. """
  2263. Update mgr._blknos / mgr._blklocs.
  2264. """
  2265. new_blknos = np.empty(self.shape[0], dtype=np.int64)
  2266. new_blklocs = np.empty(self.shape[0], dtype=np.int64)
  2267. new_blknos.fill(-1)
  2268. new_blklocs.fill(-1)
  2269. for blkno, blk in enumerate(self.blocks):
  2270. rl = blk.mgr_locs
  2271. new_blknos[rl.indexer] = blkno
  2272. new_blklocs[rl.indexer] = np.arange(len(rl))
  2273. if (new_blknos == -1).any():
  2274. raise AssertionError("Gaps in blk ref_locs")
  2275. self._blknos = new_blknos
  2276. self._blklocs = new_blklocs
  2277. # make items read only for now
  2278. def _get_items(self):
  2279. return self.axes[0]
  2280. items = property(fget=_get_items)
  2281. def _get_counts(self, f):
  2282. """ return a dict of the counts of the function in BlockManager """
  2283. self._consolidate_inplace()
  2284. counts = dict()
  2285. for b in self.blocks:
  2286. v = f(b)
  2287. counts[v] = counts.get(v, 0) + b.shape[0]
  2288. return counts
  2289. def get_dtype_counts(self):
  2290. return self._get_counts(lambda b: b.dtype.name)
  2291. def get_ftype_counts(self):
  2292. return self._get_counts(lambda b: b.ftype)
  2293. def get_dtypes(self):
  2294. dtypes = np.array([blk.dtype for blk in self.blocks])
  2295. return algos.take_1d(dtypes, self._blknos, allow_fill=False)
  2296. def get_ftypes(self):
  2297. ftypes = np.array([blk.ftype for blk in self.blocks])
  2298. return algos.take_1d(ftypes, self._blknos, allow_fill=False)
  2299. def __getstate__(self):
  2300. block_values = [b.values for b in self.blocks]
  2301. block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks]
  2302. axes_array = [ax for ax in self.axes]
  2303. extra_state = {
  2304. '0.14.1': {
  2305. 'axes': axes_array,
  2306. 'blocks': [dict(values=b.values, mgr_locs=b.mgr_locs.indexer)
  2307. for b in self.blocks]
  2308. }
  2309. }
  2310. # First three elements of the state are to maintain forward
  2311. # compatibility with 0.13.1.
  2312. return axes_array, block_values, block_items, extra_state
  2313. def __setstate__(self, state):
  2314. def unpickle_block(values, mgr_locs):
  2315. # numpy < 1.7 pickle compat
  2316. if values.dtype == 'M8[us]':
  2317. values = values.astype('M8[ns]')
  2318. return make_block(values, placement=mgr_locs)
  2319. if (isinstance(state, tuple) and len(state) >= 4 and
  2320. '0.14.1' in state[3]):
  2321. state = state[3]['0.14.1']
  2322. self.axes = [_ensure_index(ax) for ax in state['axes']]
  2323. self.blocks = tuple(unpickle_block(b['values'], b['mgr_locs'])
  2324. for b in state['blocks'])
  2325. else:
  2326. # discard anything after 3rd, support beta pickling format for a
  2327. # little while longer
  2328. ax_arrays, bvalues, bitems = state[:3]
  2329. self.axes = [_ensure_index(ax) for ax in ax_arrays]
  2330. if len(bitems) == 1 and self.axes[0].equals(bitems[0]):
  2331. # This is a workaround for pre-0.14.1 pickles that didn't
  2332. # support unpickling multi-block frames/panels with non-unique
  2333. # columns/items, because given a manager with items ["a", "b",
  2334. # "a"] there's no way of knowing which block's "a" is where.
  2335. #
  2336. # Single-block case can be supported under the assumption that
  2337. # block items corresponded to manager items 1-to-1.
  2338. all_mgr_locs = [slice(0, len(bitems[0]))]
  2339. else:
  2340. all_mgr_locs = [self.axes[0].get_indexer(blk_items)
  2341. for blk_items in bitems]
  2342. self.blocks = tuple(
  2343. unpickle_block(values, mgr_locs)
  2344. for values, mgr_locs in zip(bvalues, all_mgr_locs))
  2345. self._post_setstate()
  2346. def _post_setstate(self):
  2347. self._is_consolidated = False
  2348. self._known_consolidated = False
  2349. self._rebuild_blknos_and_blklocs()
  2350. def __len__(self):
  2351. return len(self.items)
  2352. def __unicode__(self):
  2353. output = pprint_thing(self.__class__.__name__)
  2354. for i, ax in enumerate(self.axes):
  2355. if i == 0:
  2356. output += u('\nItems: %s') % ax
  2357. else:
  2358. output += u('\nAxis %d: %s') % (i, ax)
  2359. for block in self.blocks:
  2360. output += u('\n%s') % pprint_thing(block)
  2361. return output
  2362. def _verify_integrity(self):
  2363. mgr_shape = self.shape
  2364. tot_items = sum(len(x.mgr_locs) for x in self.blocks)
  2365. for block in self.blocks:
  2366. if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
  2367. construction_error(tot_items, block.shape[1:], self.axes)
  2368. if len(self.items) != tot_items:
  2369. raise AssertionError('Number of manager items must equal union of '
  2370. 'block items\n# manager items: {0}, # '
  2371. 'tot_items: {1}'.format(
  2372. len(self.items), tot_items))
  2373. def apply(self, f, axes=None, filter=None, do_integrity_check=False,
  2374. consolidate=True, **kwargs):
  2375. """
  2376. iterate over the blocks, collect and create a new block manager
  2377. Parameters
  2378. ----------
  2379. f : the callable or function name to operate on at the block level
  2380. axes : optional (if not supplied, use self.axes)
  2381. filter : list, if supplied, only call the block if the filter is in
  2382. the block
  2383. do_integrity_check : boolean, default False. Do the block manager
  2384. integrity check
  2385. consolidate: boolean, default True. Join together blocks having same
  2386. dtype
  2387. Returns
  2388. -------
  2389. Block Manager (new object)
  2390. """
  2391. result_blocks = []
  2392. # filter kwarg is used in replace-* family of methods
  2393. if filter is not None:
  2394. filter_locs = set(self.items.get_indexer_for(filter))
  2395. if len(filter_locs) == len(self.items):
  2396. # All items are included, as if there were no filtering
  2397. filter = None
  2398. else:
  2399. kwargs['filter'] = filter_locs
  2400. if consolidate:
  2401. self._consolidate_inplace()
  2402. if f == 'where':
  2403. align_copy = True
  2404. if kwargs.get('align', True):
  2405. align_keys = ['other', 'cond']
  2406. else:
  2407. align_keys = ['cond']
  2408. elif f == 'putmask':
  2409. align_copy = False
  2410. if kwargs.get('align', True):
  2411. align_keys = ['new', 'mask']
  2412. else:
  2413. align_keys = ['mask']
  2414. elif f == 'eval':
  2415. align_copy = False
  2416. align_keys = ['other']
  2417. elif f == 'fillna':
  2418. # fillna internally does putmask, maybe it's better to do this
  2419. # at mgr, not block level?
  2420. align_copy = False
  2421. align_keys = ['value']
  2422. else:
  2423. align_keys = []
  2424. aligned_args = dict((k, kwargs[k])
  2425. for k in align_keys
  2426. if hasattr(kwargs[k], 'reindex_axis'))
  2427. for b in self.blocks:
  2428. if filter is not None:
  2429. if not b.mgr_locs.isin(filter_locs).any():
  2430. result_blocks.append(b)
  2431. continue
  2432. if aligned_args:
  2433. b_items = self.items[b.mgr_locs.indexer]
  2434. for k, obj in aligned_args.items():
  2435. axis = getattr(obj, '_info_axis_number', 0)
  2436. kwargs[k] = obj.reindex_axis(b_items, axis=axis,
  2437. copy=align_copy)
  2438. kwargs['mgr'] = self
  2439. applied = getattr(b, f)(**kwargs)
  2440. result_blocks = _extend_blocks(applied, result_blocks)
  2441. if len(result_blocks) == 0:
  2442. return self.make_empty(axes or self.axes)
  2443. bm = self.__class__(result_blocks, axes or self.axes,
  2444. do_integrity_check=do_integrity_check)
  2445. bm._consolidate_inplace()
  2446. return bm
  2447. def reduction(self, f, axis=0, consolidate=True, transposed=False,
  2448. **kwargs):
  2449. """
  2450. iterate over the blocks, collect and create a new block manager.
  2451. This routine is intended for reduction type operations and
  2452. will do inference on the generated blocks.
  2453. Parameters
  2454. ----------
  2455. f: the callable or function name to operate on at the block level
  2456. axis: reduction axis, default 0
  2457. consolidate: boolean, default True. Join together blocks having same
  2458. dtype
  2459. transposed: boolean, default False
  2460. we are holding transposed data
  2461. Returns
  2462. -------
  2463. Block Manager (new object)
  2464. """
  2465. if consolidate:
  2466. self._consolidate_inplace()
  2467. axes, blocks = [], []
  2468. for b in self.blocks:
  2469. kwargs['mgr'] = self
  2470. axe, block = getattr(b, f)(axis=axis, **kwargs)
  2471. axes.append(axe)
  2472. blocks.append(block)
  2473. # note that some DatetimeTZ, Categorical are always ndim==1
  2474. ndim = set([b.ndim for b in blocks])
  2475. if 2 in ndim:
  2476. new_axes = list(self.axes)
  2477. # multiple blocks that are reduced
  2478. if len(blocks) > 1:
  2479. new_axes[1] = axes[0]
  2480. # reset the placement to the original
  2481. for b, sb in zip(blocks, self.blocks):
  2482. b.mgr_locs = sb.mgr_locs
  2483. else:
  2484. new_axes[axis] = Index(np.concatenate(
  2485. [ax.values for ax in axes]))
  2486. if transposed:
  2487. new_axes = new_axes[::-1]
  2488. blocks = [b.make_block(b.values.T,
  2489. placement=np.arange(b.shape[1])
  2490. ) for b in blocks]
  2491. return self.__class__(blocks, new_axes)
  2492. # 0 ndim
  2493. if 0 in ndim and 1 not in ndim:
  2494. values = np.array([b.values for b in blocks])
  2495. if len(values) == 1:
  2496. return values.item()
  2497. blocks = [make_block(values, ndim=1)]
  2498. axes = Index([ax[0] for ax in axes])
  2499. # single block
  2500. values = _concat._concat_compat([b.values for b in blocks])
  2501. # compute the orderings of our original data
  2502. if len(self.blocks) > 1:
  2503. indexer = np.empty(len(self.axes[0]), dtype=np.intp)
  2504. i = 0
  2505. for b in self.blocks:
  2506. for j in b.mgr_locs:
  2507. indexer[j] = i
  2508. i = i + 1
  2509. values = values.take(indexer)
  2510. return SingleBlockManager(
  2511. [make_block(values,
  2512. ndim=1,
  2513. placement=np.arange(len(values)))],
  2514. axes[0])
  2515. def isnull(self, **kwargs):
  2516. return self.apply('apply', **kwargs)
  2517. def where(self, **kwargs):
  2518. return self.apply('where', **kwargs)
  2519. def eval(self, **kwargs):
  2520. return self.apply('eval', **kwargs)
  2521. def quantile(self, **kwargs):
  2522. return self.reduction('quantile', **kwargs)
  2523. def setitem(self, **kwargs):
  2524. return self.apply('setitem', **kwargs)
  2525. def putmask(self, **kwargs):
  2526. return self.apply('putmask', **kwargs)
  2527. def diff(self, **kwargs):
  2528. return self.apply('diff', **kwargs)
  2529. def interpolate(self, **kwargs):
  2530. return self.apply('interpolate', **kwargs)
  2531. def shift(self, **kwargs):
  2532. return self.apply('shift', **kwargs)
  2533. def fillna(self, **kwargs):
  2534. return self.apply('fillna', **kwargs)
  2535. def downcast(self, **kwargs):
  2536. return self.apply('downcast', **kwargs)
  2537. def astype(self, dtype, **kwargs):
  2538. return self.apply('astype', dtype=dtype, **kwargs)
  2539. def convert(self, **kwargs):
  2540. return self.apply('convert', **kwargs)
  2541. def replace(self, **kwargs):
  2542. return self.apply('replace', **kwargs)
  2543. def replace_list(self, src_list, dest_list, inplace=False, regex=False,
  2544. mgr=None):
  2545. """ do a list replace """
  2546. if mgr is None:
  2547. mgr = self
  2548. # figure out our mask a-priori to avoid repeated replacements
  2549. values = self.as_matrix()
  2550. def comp(s):
  2551. if isnull(s):
  2552. return isnull(values)
  2553. return _possibly_compare(values, getattr(s, 'asm8', s),
  2554. operator.eq)
  2555. masks = [comp(s) for i, s in enumerate(src_list)]
  2556. result_blocks = []
  2557. for blk in self.blocks:
  2558. # its possible to get multiple result blocks here
  2559. # replace ALWAYS will return a list
  2560. rb = [blk if inplace else blk.copy()]
  2561. for i, (s, d) in enumerate(zip(src_list, dest_list)):
  2562. new_rb = []
  2563. for b in rb:
  2564. if b.dtype == np.object_:
  2565. result = b.replace(s, d, inplace=inplace, regex=regex,
  2566. mgr=mgr)
  2567. new_rb = _extend_blocks(result, new_rb)
  2568. else:
  2569. # get our mask for this element, sized to this
  2570. # particular block
  2571. m = masks[i][b.mgr_locs.indexer]
  2572. if m.any():
  2573. new_rb.extend(b.putmask(m, d, inplace=True))
  2574. else:
  2575. new_rb.append(b)
  2576. rb = new_rb
  2577. result_blocks.extend(rb)
  2578. bm = self.__class__(result_blocks, self.axes)
  2579. bm._consolidate_inplace()
  2580. return bm
  2581. def reshape_nd(self, axes, **kwargs):
  2582. """ a 2d-nd reshape operation on a BlockManager """
  2583. return self.apply('reshape_nd', axes=axes, **kwargs)
  2584. def is_consolidated(self):
  2585. """
  2586. Return True if more than one block with the same dtype
  2587. """
  2588. if not self._known_consolidated:
  2589. self._consolidate_check()
  2590. return self._is_consolidated
  2591. def _consolidate_check(self):
  2592. ftypes = [blk.ftype for blk in self.blocks]
  2593. self._is_consolidated = len(ftypes) == len(set(ftypes))
  2594. self._known_consolidated = True
  2595. @property
  2596. def is_mixed_type(self):
  2597. # Warning, consolidation needs to get checked upstairs
  2598. self._consolidate_inplace()
  2599. return len(self.blocks) > 1
  2600. @property
  2601. def is_numeric_mixed_type(self):
  2602. # Warning, consolidation needs to get checked upstairs
  2603. self._consolidate_inplace()
  2604. return all([block.is_numeric for block in self.blocks])
  2605. @property
  2606. def is_datelike_mixed_type(self):
  2607. # Warning, consolidation needs to get checked upstairs
  2608. self._consolidate_inplace()
  2609. return any([block.is_datelike for block in self.blocks])
  2610. @property
  2611. def is_view(self):
  2612. """ return a boolean if we are a single block and are a view """
  2613. if len(self.blocks) == 1:
  2614. return self.blocks[0].is_view
  2615. # It is technically possible to figure out which blocks are views
  2616. # e.g. [ b.values.base is not None for b in self.blocks ]
  2617. # but then we have the case of possibly some blocks being a view
  2618. # and some blocks not. setting in theory is possible on the non-view
  2619. # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit
  2620. # complicated
  2621. return False
  2622. def get_bool_data(self, copy=False):
  2623. """
  2624. Parameters
  2625. ----------
  2626. copy : boolean, default False
  2627. Whether to copy the blocks
  2628. """
  2629. self._consolidate_inplace()
  2630. return self.combine([b for b in self.blocks if b.is_bool], copy)
  2631. def get_numeric_data(self, copy=False):
  2632. """
  2633. Parameters
  2634. ----------
  2635. copy : boolean, default False
  2636. Whether to copy the blocks
  2637. """
  2638. self._consolidate_inplace()
  2639. return self.combine([b for b in self.blocks if b.is_numeric], copy)
  2640. def combine(self, blocks, copy=True):
  2641. """ return a new manager with the blocks """
  2642. if len(blocks) == 0:
  2643. return self.make_empty()
  2644. # FIXME: optimization potential
  2645. indexer = np.sort(np.concatenate([b.mgr_locs.as_array
  2646. for b in blocks]))
  2647. inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])
  2648. new_blocks = []
  2649. for b in blocks:
  2650. b = b.copy(deep=copy)
  2651. b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
  2652. axis=0, allow_fill=False)
  2653. new_blocks.append(b)
  2654. axes = list(self.axes)
  2655. axes[0] = self.items.take(indexer)
  2656. return self.__class__(new_blocks, axes, do_integrity_check=False)
  2657. def get_slice(self, slobj, axis=0):
  2658. if axis >= self.ndim:
  2659. raise IndexError("Requested axis not found in manager")
  2660. if axis == 0:
  2661. new_blocks = self._slice_take_blocks_ax0(slobj)
  2662. else:
  2663. slicer = [slice(None)] * (axis + 1)
  2664. slicer[axis] = slobj
  2665. slicer = tuple(slicer)
  2666. new_blocks = [blk.getitem_block(slicer) for blk in self.blocks]
  2667. new_axes = list(self.axes)
  2668. new_axes[axis] = new_axes[axis][slobj]
  2669. bm = self.__class__(new_blocks, new_axes, do_integrity_check=False,
  2670. fastpath=True)
  2671. bm._consolidate_inplace()
  2672. return bm
  2673. def __contains__(self, item):
  2674. return item in self.items
  2675. @property
  2676. def nblocks(self):
  2677. return len(self.blocks)
  2678. def copy(self, deep=True, mgr=None):
  2679. """
  2680. Make deep or shallow copy of BlockManager
  2681. Parameters
  2682. ----------
  2683. deep : boolean o rstring, default True
  2684. If False, return shallow copy (do not copy data)
  2685. If 'all', copy data and a deep copy of the index
  2686. Returns
  2687. -------
  2688. copy : BlockManager
  2689. """
  2690. # this preserves the notion of view copying of axes
  2691. if deep:
  2692. if deep == 'all':
  2693. copy = lambda ax: ax.copy(deep=True)
  2694. else:
  2695. copy = lambda ax: ax.view()
  2696. new_axes = [copy(ax) for ax in self.axes]
  2697. else:
  2698. new_axes = list(self.axes)
  2699. return self.apply('copy', axes=new_axes, deep=deep,
  2700. do_integrity_check=False)
  2701. def as_matrix(self, items=None):
  2702. if len(self.blocks) == 0:
  2703. return np.empty(self.shape, dtype=float)
  2704. if items is not None:
  2705. mgr = self.reindex_axis(items, axis=0)
  2706. else:
  2707. mgr = self
  2708. if self._is_single_block or not self.is_mixed_type:
  2709. return mgr.blocks[0].get_values()
  2710. else:
  2711. return mgr._interleave()
  2712. def _interleave(self):
  2713. """
  2714. Return ndarray from blocks with specified item order
  2715. Items must be contained in the blocks
  2716. """
  2717. dtype = _interleaved_dtype(self.blocks)
  2718. result = np.empty(self.shape, dtype=dtype)
  2719. if result.shape[0] == 0:
  2720. # Workaround for numpy 1.7 bug:
  2721. #
  2722. # >>> a = np.empty((0,10))
  2723. # >>> a[slice(0,0)]
  2724. # array([], shape=(0, 10), dtype=float64)
  2725. # >>> a[[]]
  2726. # Traceback (most recent call last):
  2727. # File "<stdin>", line 1, in <module>
  2728. # IndexError: index 0 is out of bounds for axis 0 with size 0
  2729. return result
  2730. itemmask = np.zeros(self.shape[0])
  2731. for blk in self.blocks:
  2732. rl = blk.mgr_locs
  2733. result[rl.indexer] = blk.get_values(dtype)
  2734. itemmask[rl.indexer] = 1
  2735. if not itemmask.all():
  2736. raise AssertionError('Some items were not contained in blocks')
  2737. return result
  2738. def xs(self, key, axis=1, copy=True, takeable=False):
  2739. if axis < 1:
  2740. raise AssertionError('Can only take xs across axis >= 1, got %d' %
  2741. axis)
  2742. # take by position
  2743. if takeable:
  2744. loc = key
  2745. else:
  2746. loc = self.axes[axis].get_loc(key)
  2747. slicer = [slice(None, None) for _ in range(self.ndim)]
  2748. slicer[axis] = loc
  2749. slicer = tuple(slicer)
  2750. new_axes = list(self.axes)
  2751. # could be an array indexer!
  2752. if isinstance(loc, (slice, np.ndarray)):
  2753. new_axes[axis] = new_axes[axis][loc]
  2754. else:
  2755. new_axes.pop(axis)
  2756. new_blocks = []
  2757. if len(self.blocks) > 1:
  2758. # we must copy here as we are mixed type
  2759. for blk in self.blocks:
  2760. newb = make_block(values=blk.values[slicer],
  2761. klass=blk.__class__, fastpath=True,
  2762. placement=blk.mgr_locs)
  2763. new_blocks.append(newb)
  2764. elif len(self.blocks) == 1:
  2765. block = self.blocks[0]
  2766. vals = block.values[slicer]
  2767. if copy:
  2768. vals = vals.copy()
  2769. new_blocks = [make_block(values=vals,
  2770. placement=block.mgr_locs,
  2771. klass=block.__class__,
  2772. fastpath=True, )]
  2773. return self.__class__(new_blocks, new_axes)
  2774. def fast_xs(self, loc):
  2775. """
  2776. get a cross sectional for a given location in the
  2777. items ; handle dups
  2778. return the result, is *could* be a view in the case of a
  2779. single block
  2780. """
  2781. if len(self.blocks) == 1:
  2782. return self.blocks[0].iget((slice(None), loc))
  2783. items = self.items
  2784. # non-unique (GH4726)
  2785. if not items.is_unique:
  2786. result = self._interleave()
  2787. if self.ndim == 2:
  2788. result = result.T
  2789. return result[loc]
  2790. # unique
  2791. dtype = _interleaved_dtype(self.blocks)
  2792. n = len(items)
  2793. result = np.empty(n, dtype=dtype)
  2794. for blk in self.blocks:
  2795. # Such assignment may incorrectly coerce NaT to None
  2796. # result[blk.mgr_locs] = blk._slice((slice(None), loc))
  2797. for i, rl in enumerate(blk.mgr_locs):
  2798. result[rl] = blk._try_coerce_result(blk.iget((i, loc)))
  2799. return result
  2800. def consolidate(self):
  2801. """
  2802. Join together blocks having same dtype
  2803. Returns
  2804. -------
  2805. y : BlockManager
  2806. """
  2807. if self.is_consolidated():
  2808. return self
  2809. bm = self.__class__(self.blocks, self.axes)
  2810. bm._is_consolidated = False
  2811. bm._consolidate_inplace()
  2812. return bm
  2813. def _consolidate_inplace(self):
  2814. if not self.is_consolidated():
  2815. self.blocks = tuple(_consolidate(self.blocks))
  2816. self._is_consolidated = True
  2817. self._known_consolidated = True
  2818. self._rebuild_blknos_and_blklocs()
  2819. def get(self, item, fastpath=True):
  2820. """
  2821. Return values for selected item (ndarray or BlockManager).
  2822. """
  2823. if self.items.is_unique:
  2824. if not isnull(item):
  2825. loc = self.items.get_loc(item)
  2826. else:
  2827. indexer = np.arange(len(self.items))[isnull(self.items)]
  2828. # allow a single nan location indexer
  2829. if not is_scalar(indexer):
  2830. if len(indexer) == 1:
  2831. loc = indexer.item()
  2832. else:
  2833. raise ValueError("cannot label index with a null key")
  2834. return self.iget(loc, fastpath=fastpath)
  2835. else:
  2836. if isnull(item):
  2837. raise TypeError("cannot label index with a null key")
  2838. indexer = self.items.get_indexer_for([item])
  2839. return self.reindex_indexer(new_axis=self.items[indexer],
  2840. indexer=indexer, axis=0,
  2841. allow_dups=True)
  2842. def iget(self, i, fastpath=True):
  2843. """
  2844. Return the data as a SingleBlockManager if fastpath=True and possible
  2845. Otherwise return as a ndarray
  2846. """
  2847. block = self.blocks[self._blknos[i]]
  2848. values = block.iget(self._blklocs[i])
  2849. if not fastpath or not block._box_to_block_values or values.ndim != 1:
  2850. return values
  2851. # fastpath shortcut for select a single-dim from a 2-dim BM
  2852. return SingleBlockManager(
  2853. [block.make_block_same_class(values,
  2854. placement=slice(0, len(values)),
  2855. ndim=1, fastpath=True)],
  2856. self.axes[1])
  2857. def get_scalar(self, tup):
  2858. """
  2859. Retrieve single item
  2860. """
  2861. full_loc = list(ax.get_loc(x) for ax, x in zip(self.axes, tup))
  2862. blk = self.blocks[self._blknos[full_loc[0]]]
  2863. values = blk.values
  2864. # FIXME: this may return non-upcasted types?
  2865. if values.ndim == 1:
  2866. return values[full_loc[1]]
  2867. full_loc[0] = self._blklocs[full_loc[0]]
  2868. return values[tuple(full_loc)]
  2869. def delete(self, item):
  2870. """
  2871. Delete selected item (items if non-unique) in-place.
  2872. """
  2873. indexer = self.items.get_loc(item)
  2874. is_deleted = np.zeros(self.shape[0], dtype=np.bool_)
  2875. is_deleted[indexer] = True
  2876. ref_loc_offset = -is_deleted.cumsum()
  2877. is_blk_deleted = [False] * len(self.blocks)
  2878. if isinstance(indexer, int):
  2879. affected_start = indexer
  2880. else:
  2881. affected_start = is_deleted.nonzero()[0][0]
  2882. for blkno, _ in _fast_count_smallints(self._blknos[affected_start:]):
  2883. blk = self.blocks[blkno]
  2884. bml = blk.mgr_locs
  2885. blk_del = is_deleted[bml.indexer].nonzero()[0]
  2886. if len(blk_del) == len(bml):
  2887. is_blk_deleted[blkno] = True
  2888. continue
  2889. elif len(blk_del) != 0:
  2890. blk.delete(blk_del)
  2891. bml = blk.mgr_locs
  2892. blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer])
  2893. # FIXME: use Index.delete as soon as it uses fastpath=True
  2894. self.axes[0] = self.items[~is_deleted]
  2895. self.blocks = tuple(b for blkno, b in enumerate(self.blocks)
  2896. if not is_blk_deleted[blkno])
  2897. self._shape = None
  2898. self._rebuild_blknos_and_blklocs()
  2899. def set(self, item, value, check=False):
  2900. """
  2901. Set new item in-place. Does not consolidate. Adds new Block if not
  2902. contained in the current set of items
  2903. if check, then validate that we are not setting the same data in-place
  2904. """
  2905. # FIXME: refactor, clearly separate broadcasting & zip-like assignment
  2906. # can prob also fix the various if tests for sparse/categorical
  2907. value_is_extension_type = is_extension_type(value)
  2908. # categorical/spares/datetimetz
  2909. if value_is_extension_type:
  2910. def value_getitem(placement):
  2911. return value
  2912. else:
  2913. if value.ndim == self.ndim - 1:
  2914. value = _safe_reshape(value, (1,) + value.shape)
  2915. def value_getitem(placement):
  2916. return value
  2917. else:
  2918. def value_getitem(placement):
  2919. return value[placement.indexer]
  2920. if value.shape[1:] != self.shape[1:]:
  2921. raise AssertionError('Shape of new values must be compatible '
  2922. 'with manager shape')
  2923. try:
  2924. loc = self.items.get_loc(item)
  2925. except KeyError:
  2926. # This item wasn't present, just insert at end
  2927. self.insert(len(self.items), item, value)
  2928. return
  2929. if isinstance(loc, int):
  2930. loc = [loc]
  2931. blknos = self._blknos[loc]
  2932. blklocs = self._blklocs[loc].copy()
  2933. unfit_mgr_locs = []
  2934. unfit_val_locs = []
  2935. removed_blknos = []
  2936. for blkno, val_locs in _get_blkno_placements(blknos, len(self.blocks),
  2937. group=True):
  2938. blk = self.blocks[blkno]
  2939. blk_locs = blklocs[val_locs.indexer]
  2940. if blk.should_store(value):
  2941. blk.set(blk_locs, value_getitem(val_locs), check=check)
  2942. else:
  2943. unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs])
  2944. unfit_val_locs.append(val_locs)
  2945. # If all block items are unfit, schedule the block for removal.
  2946. if len(val_locs) == len(blk.mgr_locs):
  2947. removed_blknos.append(blkno)
  2948. else:
  2949. self._blklocs[blk.mgr_locs.indexer] = -1
  2950. blk.delete(blk_locs)
  2951. self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk))
  2952. if len(removed_blknos):
  2953. # Remove blocks & update blknos accordingly
  2954. is_deleted = np.zeros(self.nblocks, dtype=np.bool_)
  2955. is_deleted[removed_blknos] = True
  2956. new_blknos = np.empty(self.nblocks, dtype=np.int64)
  2957. new_blknos.fill(-1)
  2958. new_blknos[~is_deleted] = np.arange(self.nblocks -
  2959. len(removed_blknos))
  2960. self._blknos = algos.take_1d(new_blknos, self._blknos, axis=0,
  2961. allow_fill=False)
  2962. self.blocks = tuple(blk for i, blk in enumerate(self.blocks)
  2963. if i not in set(removed_blknos))
  2964. if unfit_val_locs:
  2965. unfit_mgr_locs = np.concatenate(unfit_mgr_locs)
  2966. unfit_count = len(unfit_mgr_locs)
  2967. new_blocks = []
  2968. if value_is_extension_type:
  2969. # This code (ab-)uses the fact that sparse blocks contain only
  2970. # one item.
  2971. new_blocks.extend(
  2972. make_block(values=value.copy(), ndim=self.ndim,
  2973. placement=slice(mgr_loc, mgr_loc + 1))
  2974. for mgr_loc in unfit_mgr_locs)
  2975. self._blknos[unfit_mgr_locs] = (np.arange(unfit_count) +
  2976. len(self.blocks))
  2977. self._blklocs[unfit_mgr_locs] = 0
  2978. else:
  2979. # unfit_val_locs contains BlockPlacement objects
  2980. unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])
  2981. new_blocks.append(
  2982. make_block(values=value_getitem(unfit_val_items),
  2983. ndim=self.ndim, placement=unfit_mgr_locs))
  2984. self._blknos[unfit_mgr_locs] = len(self.blocks)
  2985. self._blklocs[unfit_mgr_locs] = np.arange(unfit_count)
  2986. self.blocks += tuple(new_blocks)
  2987. # Newly created block's dtype may already be present.
  2988. self._known_consolidated = False
  2989. def insert(self, loc, item, value, allow_duplicates=False):
  2990. """
  2991. Insert item at selected position.
  2992. Parameters
  2993. ----------
  2994. loc : int
  2995. item : hashable
  2996. value : array_like
  2997. allow_duplicates: bool
  2998. If False, trying to insert non-unique item will raise
  2999. """
  3000. if not allow_duplicates and item in self.items:
  3001. # Should this be a different kind of error??
  3002. raise ValueError('cannot insert %s, already exists' % item)
  3003. if not isinstance(loc, int):
  3004. raise TypeError("loc must be int")
  3005. # insert to the axis; this could possibly raise a TypeError
  3006. new_axis = self.items.insert(loc, item)
  3007. block = make_block(values=value, ndim=self.ndim,
  3008. placement=slice(loc, loc + 1))
  3009. for blkno, count in _fast_count_smallints(self._blknos[loc:]):
  3010. blk = self.blocks[blkno]
  3011. if count == len(blk.mgr_locs):
  3012. blk.mgr_locs = blk.mgr_locs.add(1)
  3013. else:
  3014. new_mgr_locs = blk.mgr_locs.as_array.copy()
  3015. new_mgr_locs[new_mgr_locs >= loc] += 1
  3016. blk.mgr_locs = new_mgr_locs
  3017. if loc == self._blklocs.shape[0]:
  3018. # np.append is a lot faster (at least in numpy 1.7.1), let's use it
  3019. # if we can.
  3020. self._blklocs = np.append(self._blklocs, 0)
  3021. self._blknos = np.append(self._blknos, len(self.blocks))
  3022. else:
  3023. self._blklocs = np.insert(self._blklocs, loc, 0)
  3024. self._blknos = np.insert(self._blknos, loc, len(self.blocks))
  3025. self.axes[0] = new_axis
  3026. self.blocks += (block,)
  3027. self._shape = None
  3028. self._known_consolidated = False
  3029. if len(self.blocks) > 100:
  3030. self._consolidate_inplace()
  3031. def reindex_axis(self, new_index, axis, method=None, limit=None,
  3032. fill_value=None, copy=True):
  3033. """
  3034. Conform block manager to new index.
  3035. """
  3036. new_index = _ensure_index(new_index)
  3037. new_index, indexer = self.axes[axis].reindex(new_index, method=method,
  3038. limit=limit)
  3039. return self.reindex_indexer(new_index, indexer, axis=axis,
  3040. fill_value=fill_value, copy=copy)
  3041. def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
  3042. allow_dups=False, copy=True):
  3043. """
  3044. Parameters
  3045. ----------
  3046. new_axis : Index
  3047. indexer : ndarray of int64 or None
  3048. axis : int
  3049. fill_value : object
  3050. allow_dups : bool
  3051. pandas-indexer with -1's only.
  3052. """
  3053. if indexer is None:
  3054. if new_axis is self.axes[axis] and not copy:
  3055. return self
  3056. result = self.copy(deep=copy)
  3057. result.axes = list(self.axes)
  3058. result.axes[axis] = new_axis
  3059. return result
  3060. self._consolidate_inplace()
  3061. # some axes don't allow reindexing with dups
  3062. if not allow_dups:
  3063. self.axes[axis]._can_reindex(indexer)
  3064. if axis >= self.ndim:
  3065. raise IndexError("Requested axis not found in manager")
  3066. if axis == 0:
  3067. new_blocks = self._slice_take_blocks_ax0(indexer,
  3068. fill_tuple=(fill_value,))
  3069. else:
  3070. new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
  3071. fill_value if fill_value is not None else blk.fill_value,))
  3072. for blk in self.blocks]
  3073. new_axes = list(self.axes)
  3074. new_axes[axis] = new_axis
  3075. return self.__class__(new_blocks, new_axes)
  3076. def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
  3077. """
  3078. Slice/take blocks along axis=0.
  3079. Overloaded for SingleBlock
  3080. Returns
  3081. -------
  3082. new_blocks : list of Block
  3083. """
  3084. allow_fill = fill_tuple is not None
  3085. sl_type, slobj, sllen = _preprocess_slice_or_indexer(
  3086. slice_or_indexer, self.shape[0], allow_fill=allow_fill)
  3087. if self._is_single_block:
  3088. blk = self.blocks[0]
  3089. if sl_type in ('slice', 'mask'):
  3090. return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
  3091. elif not allow_fill or self.ndim == 1:
  3092. if allow_fill and fill_tuple[0] is None:
  3093. _, fill_value = _maybe_promote(blk.dtype)
  3094. fill_tuple = (fill_value, )
  3095. return [blk.take_nd(slobj, axis=0,
  3096. new_mgr_locs=slice(0, sllen),
  3097. fill_tuple=fill_tuple)]
  3098. if sl_type in ('slice', 'mask'):
  3099. blknos = self._blknos[slobj]
  3100. blklocs = self._blklocs[slobj]
  3101. else:
  3102. blknos = algos.take_1d(self._blknos, slobj, fill_value=-1,
  3103. allow_fill=allow_fill)
  3104. blklocs = algos.take_1d(self._blklocs, slobj, fill_value=-1,
  3105. allow_fill=allow_fill)
  3106. # When filling blknos, make sure blknos is updated before appending to
  3107. # blocks list, that way new blkno is exactly len(blocks).
  3108. #
  3109. # FIXME: mgr_groupby_blknos must return mgr_locs in ascending order,
  3110. # pytables serialization will break otherwise.
  3111. blocks = []
  3112. for blkno, mgr_locs in _get_blkno_placements(blknos, len(self.blocks),
  3113. group=True):
  3114. if blkno == -1:
  3115. # If we've got here, fill_tuple was not None.
  3116. fill_value = fill_tuple[0]
  3117. blocks.append(self._make_na_block(placement=mgr_locs,
  3118. fill_value=fill_value))
  3119. else:
  3120. blk = self.blocks[blkno]
  3121. # Otherwise, slicing along items axis is necessary.
  3122. if not blk._can_consolidate:
  3123. # A non-consolidatable block, it's easy, because there's
  3124. # only one item and each mgr loc is a copy of that single
  3125. # item.
  3126. for mgr_loc in mgr_locs:
  3127. newblk = blk.copy(deep=True)
  3128. newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1)
  3129. blocks.append(newblk)
  3130. else:
  3131. blocks.append(blk.take_nd(blklocs[mgr_locs.indexer],
  3132. axis=0, new_mgr_locs=mgr_locs,
  3133. fill_tuple=None))
  3134. return blocks
  3135. def _make_na_block(self, placement, fill_value=None):
  3136. # TODO: infer dtypes other than float64 from fill_value
  3137. if fill_value is None:
  3138. fill_value = np.nan
  3139. block_shape = list(self.shape)
  3140. block_shape[0] = len(placement)
  3141. dtype, fill_value = _infer_dtype_from_scalar(fill_value)
  3142. block_values = np.empty(block_shape, dtype=dtype)
  3143. block_values.fill(fill_value)
  3144. return make_block(block_values, placement=placement)
  3145. def take(self, indexer, axis=1, verify=True, convert=True):
  3146. """
  3147. Take items along any axis.
  3148. """
  3149. self._consolidate_inplace()
  3150. indexer = (np.arange(indexer.start, indexer.stop, indexer.step,
  3151. dtype='int64')
  3152. if isinstance(indexer, slice)
  3153. else np.asanyarray(indexer, dtype='int64'))
  3154. n = self.shape[axis]
  3155. if convert:
  3156. indexer = maybe_convert_indices(indexer, n)
  3157. if verify:
  3158. if ((indexer == -1) | (indexer >= n)).any():
  3159. raise Exception('Indices must be nonzero and less than '
  3160. 'the axis length')
  3161. new_labels = self.axes[axis].take(indexer)
  3162. return self.reindex_indexer(new_axis=new_labels, indexer=indexer,
  3163. axis=axis, allow_dups=True)
  3164. def merge(self, other, lsuffix='', rsuffix=''):
  3165. if not self._is_indexed_like(other):
  3166. raise AssertionError('Must have same axes to merge managers')
  3167. l, r = items_overlap_with_suffix(left=self.items, lsuffix=lsuffix,
  3168. right=other.items, rsuffix=rsuffix)
  3169. new_items = _concat_indexes([l, r])
  3170. new_blocks = [blk.copy(deep=False) for blk in self.blocks]
  3171. offset = self.shape[0]
  3172. for blk in other.blocks:
  3173. blk = blk.copy(deep=False)
  3174. blk.mgr_locs = blk.mgr_locs.add(offset)
  3175. new_blocks.append(blk)
  3176. new_axes = list(self.axes)
  3177. new_axes[0] = new_items
  3178. return self.__class__(_consolidate(new_blocks), new_axes)
  3179. def _is_indexed_like(self, other):
  3180. """
  3181. Check all axes except items
  3182. """
  3183. if self.ndim != other.ndim:
  3184. raise AssertionError('Number of dimensions must agree '
  3185. 'got %d and %d' % (self.ndim, other.ndim))
  3186. for ax, oax in zip(self.axes[1:], other.axes[1:]):
  3187. if not ax.equals(oax):
  3188. return False
  3189. return True
  3190. def equals(self, other):
  3191. self_axes, other_axes = self.axes, other.axes
  3192. if len(self_axes) != len(other_axes):
  3193. return False
  3194. if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
  3195. return False
  3196. self._consolidate_inplace()
  3197. other._consolidate_inplace()
  3198. if len(self.blocks) != len(other.blocks):
  3199. return False
  3200. # canonicalize block order, using a tuple combining the type
  3201. # name and then mgr_locs because there might be unconsolidated
  3202. # blocks (say, Categorical) which can only be distinguished by
  3203. # the iteration order
  3204. def canonicalize(block):
  3205. return (block.dtype.name, block.mgr_locs.as_array.tolist())
  3206. self_blocks = sorted(self.blocks, key=canonicalize)
  3207. other_blocks = sorted(other.blocks, key=canonicalize)
  3208. return all(block.equals(oblock)
  3209. for block, oblock in zip(self_blocks, other_blocks))
  3210. class SingleBlockManager(BlockManager):
  3211. """ manage a single block with """
  3212. ndim = 1
  3213. _is_consolidated = True
  3214. _known_consolidated = True
  3215. __slots__ = ()
  3216. def __init__(self, block, axis, do_integrity_check=False, fastpath=False):
  3217. if isinstance(axis, list):
  3218. if len(axis) != 1:
  3219. raise ValueError("cannot create SingleBlockManager with more "
  3220. "than 1 axis")
  3221. axis = axis[0]
  3222. # passed from constructor, single block, single axis
  3223. if fastpath:
  3224. self.axes = [axis]
  3225. if isinstance(block, list):
  3226. # empty block
  3227. if len(block) == 0:
  3228. block = [np.array([])]
  3229. elif len(block) != 1:
  3230. raise ValueError('Cannot create SingleBlockManager with '
  3231. 'more than 1 block')
  3232. block = block[0]
  3233. else:
  3234. self.axes = [_ensure_index(axis)]
  3235. # create the block here
  3236. if isinstance(block, list):
  3237. # provide consolidation to the interleaved_dtype
  3238. if len(block) > 1:
  3239. dtype = _interleaved_dtype(block)
  3240. block = [b.astype(dtype) for b in block]
  3241. block = _consolidate(block)
  3242. if len(block) != 1:
  3243. raise ValueError('Cannot create SingleBlockManager with '
  3244. 'more than 1 block')
  3245. block = block[0]
  3246. if not isinstance(block, Block):
  3247. block = make_block(block, placement=slice(0, len(axis)), ndim=1,
  3248. fastpath=True)
  3249. self.blocks = [block]
  3250. def _post_setstate(self):
  3251. pass
  3252. @property
  3253. def _block(self):
  3254. return self.blocks[0]
  3255. @property
  3256. def _values(self):
  3257. return self._block.values
  3258. @property
  3259. def _blknos(self):
  3260. """ compat with BlockManager """
  3261. return None
  3262. @property
  3263. def _blklocs(self):
  3264. """ compat with BlockManager """
  3265. return None
  3266. def reindex(self, new_axis, indexer=None, method=None, fill_value=None,
  3267. limit=None, copy=True):
  3268. # if we are the same and don't copy, just return
  3269. if self.index.equals(new_axis):
  3270. if copy:
  3271. return self.copy(deep=True)
  3272. else:
  3273. return self
  3274. values = self._block.get_values()
  3275. if indexer is None:
  3276. indexer = self.items.get_indexer_for(new_axis)
  3277. if fill_value is None:
  3278. fill_value = np.nan
  3279. new_values = algos.take_1d(values, indexer, fill_value=fill_value)
  3280. # fill if needed
  3281. if method is not None or limit is not None:
  3282. new_values = missing.interpolate_2d(new_values,
  3283. method=method,
  3284. limit=limit,
  3285. fill_value=fill_value)
  3286. if self._block.is_sparse:
  3287. make_block = self._block.make_block_same_class
  3288. block = make_block(new_values, copy=copy,
  3289. placement=slice(0, len(new_axis)))
  3290. mgr = SingleBlockManager(block, new_axis)
  3291. mgr._consolidate_inplace()
  3292. return mgr
  3293. def get_slice(self, slobj, axis=0):
  3294. if axis >= self.ndim:
  3295. raise IndexError("Requested axis not found in manager")
  3296. return self.__class__(self._block._slice(slobj),
  3297. self.index[slobj], fastpath=True)
  3298. @property
  3299. def index(self):
  3300. return self.axes[0]
  3301. def convert(self, **kwargs):
  3302. """ convert the whole block as one """
  3303. kwargs['by_item'] = False
  3304. return self.apply('convert', **kwargs)
  3305. @property
  3306. def dtype(self):
  3307. return self._block.dtype
  3308. @property
  3309. def array_dtype(self):
  3310. return self._block.array_dtype
  3311. @property
  3312. def ftype(self):
  3313. return self._block.ftype
  3314. def get_dtype_counts(self):
  3315. return {self.dtype.name: 1}
  3316. def get_ftype_counts(self):
  3317. return {self.ftype: 1}
  3318. def get_dtypes(self):
  3319. return np.array([self._block.dtype])
  3320. def get_ftypes(self):
  3321. return np.array([self._block.ftype])
  3322. def external_values(self):
  3323. return self._block.external_values()
  3324. def internal_values(self):
  3325. return self._block.internal_values()
  3326. def get_values(self):
  3327. """ return a dense type view """
  3328. return np.array(self._block.to_dense(), copy=False)
  3329. @property
  3330. def asobject(self):
  3331. """
  3332. return a object dtype array. datetime/timedelta like values are boxed
  3333. to Timestamp/Timedelta instances.
  3334. """
  3335. return self._block.get_values(dtype=object)
  3336. @property
  3337. def itemsize(self):
  3338. return self._block.values.itemsize
  3339. @property
  3340. def _can_hold_na(self):
  3341. return self._block._can_hold_na
  3342. def is_consolidated(self):
  3343. return True
  3344. def _consolidate_check(self):
  3345. pass
  3346. def _consolidate_inplace(self):
  3347. pass
  3348. def delete(self, item):
  3349. """
  3350. Delete single item from SingleBlockManager.
  3351. Ensures that self.blocks doesn't become empty.
  3352. """
  3353. loc = self.items.get_loc(item)
  3354. self._block.delete(loc)
  3355. self.axes[0] = self.axes[0].delete(loc)
  3356. def fast_xs(self, loc):
  3357. """
  3358. fast path for getting a cross-section
  3359. return a view of the data
  3360. """
  3361. return self._block.values[loc]
  3362. def construction_error(tot_items, block_shape, axes, e=None):
  3363. """ raise a helpful message about our construction """
  3364. passed = tuple(map(int, [tot_items] + list(block_shape)))
  3365. implied = tuple(map(int, [len(ax) for ax in axes]))
  3366. if passed == implied and e is not None:
  3367. raise e
  3368. if block_shape[0] == 0:
  3369. raise ValueError("Empty data passed with indices specified.")
  3370. raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
  3371. passed, implied))
  3372. def create_block_manager_from_blocks(blocks, axes):
  3373. try:
  3374. if len(blocks) == 1 and not isinstance(blocks[0], Block):
  3375. # if blocks[0] is of length 0, return empty blocks
  3376. if not len(blocks[0]):
  3377. blocks = []
  3378. else:
  3379. # It's OK if a single block is passed as values, its placement
  3380. # is basically "all items", but if there're many, don't bother
  3381. # converting, it's an error anyway.
  3382. blocks = [make_block(values=blocks[0],
  3383. placement=slice(0, len(axes[0])))]
  3384. mgr = BlockManager(blocks, axes)
  3385. mgr._consolidate_inplace()
  3386. return mgr
  3387. except (ValueError) as e:
  3388. blocks = [getattr(b, 'values', b) for b in blocks]
  3389. tot_items = sum(b.shape[0] for b in blocks)
  3390. construction_error(tot_items, blocks[0].shape[1:], axes, e)
  3391. def create_block_manager_from_arrays(arrays, names, axes):
  3392. try:
  3393. blocks = form_blocks(arrays, names, axes)
  3394. mgr = BlockManager(blocks, axes)
  3395. mgr._consolidate_inplace()
  3396. return mgr
  3397. except ValueError as e:
  3398. construction_error(len(arrays), arrays[0].shape, axes, e)
  3399. def form_blocks(arrays, names, axes):
  3400. # put "leftover" items in float bucket, where else?
  3401. # generalize?
  3402. float_items = []
  3403. complex_items = []
  3404. int_items = []
  3405. bool_items = []
  3406. object_items = []
  3407. sparse_items = []
  3408. datetime_items = []
  3409. datetime_tz_items = []
  3410. cat_items = []
  3411. extra_locs = []
  3412. names_idx = Index(names)
  3413. if names_idx.equals(axes[0]):
  3414. names_indexer = np.arange(len(names_idx))
  3415. else:
  3416. assert names_idx.intersection(axes[0]).is_unique
  3417. names_indexer = names_idx.get_indexer_for(axes[0])
  3418. for i, name_idx in enumerate(names_indexer):
  3419. if name_idx == -1:
  3420. extra_locs.append(i)
  3421. continue
  3422. k = names[name_idx]
  3423. v = arrays[name_idx]
  3424. if is_sparse(v):
  3425. sparse_items.append((i, k, v))
  3426. elif issubclass(v.dtype.type, np.floating):
  3427. float_items.append((i, k, v))
  3428. elif issubclass(v.dtype.type, np.complexfloating):
  3429. complex_items.append((i, k, v))
  3430. elif issubclass(v.dtype.type, np.datetime64):
  3431. if v.dtype != _NS_DTYPE:
  3432. v = tslib.cast_to_nanoseconds(v)
  3433. if is_datetimetz(v):
  3434. datetime_tz_items.append((i, k, v))
  3435. else:
  3436. datetime_items.append((i, k, v))
  3437. elif is_datetimetz(v):
  3438. datetime_tz_items.append((i, k, v))
  3439. elif issubclass(v.dtype.type, np.integer):
  3440. if v.dtype == np.uint64:
  3441. # HACK #2355 definite overflow
  3442. if (v > 2**63 - 1).any():
  3443. object_items.append((i, k, v))
  3444. continue
  3445. int_items.append((i, k, v))
  3446. elif v.dtype == np.bool_:
  3447. bool_items.append((i, k, v))
  3448. elif is_categorical(v):
  3449. cat_items.append((i, k, v))
  3450. else:
  3451. object_items.append((i, k, v))
  3452. blocks = []
  3453. if len(float_items):
  3454. float_blocks = _multi_blockify(float_items)
  3455. blocks.extend(float_blocks)
  3456. if len(complex_items):
  3457. complex_blocks = _multi_blockify(complex_items)
  3458. blocks.extend(complex_blocks)
  3459. if len(int_items):
  3460. int_blocks = _multi_blockify(int_items)
  3461. blocks.extend(int_blocks)
  3462. if len(datetime_items):
  3463. datetime_blocks = _simple_blockify(datetime_items, _NS_DTYPE)
  3464. blocks.extend(datetime_blocks)
  3465. if len(datetime_tz_items):
  3466. dttz_blocks = [make_block(array,
  3467. klass=DatetimeTZBlock,
  3468. fastpath=True,
  3469. placement=[i], )
  3470. for i, _, array in datetime_tz_items]
  3471. blocks.extend(dttz_blocks)
  3472. if len(bool_items):
  3473. bool_blocks = _simple_blockify(bool_items, np.bool_)
  3474. blocks.extend(bool_blocks)
  3475. if len(object_items) > 0:
  3476. object_blocks = _simple_blockify(object_items, np.object_)
  3477. blocks.extend(object_blocks)
  3478. if len(sparse_items) > 0:
  3479. sparse_blocks = _sparse_blockify(sparse_items)
  3480. blocks.extend(sparse_blocks)
  3481. if len(cat_items) > 0:
  3482. cat_blocks = [make_block(array, klass=CategoricalBlock, fastpath=True,
  3483. placement=[i])
  3484. for i, _, array in cat_items]
  3485. blocks.extend(cat_blocks)
  3486. if len(extra_locs):
  3487. shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
  3488. # empty items -> dtype object
  3489. block_values = np.empty(shape, dtype=object)
  3490. block_values.fill(np.nan)
  3491. na_block = make_block(block_values, placement=extra_locs)
  3492. blocks.append(na_block)
  3493. return blocks
  3494. def _simple_blockify(tuples, dtype):
  3495. """ return a single array of a block that has a single dtype; if dtype is
  3496. not None, coerce to this dtype
  3497. """
  3498. values, placement = _stack_arrays(tuples, dtype)
  3499. # CHECK DTYPE?
  3500. if dtype is not None and values.dtype != dtype: # pragma: no cover
  3501. values = values.astype(dtype)
  3502. block = make_block(values, placement=placement)
  3503. return [block]
  3504. def _multi_blockify(tuples, dtype=None):
  3505. """ return an array of blocks that potentially have different dtypes """
  3506. # group by dtype
  3507. grouper = itertools.groupby(tuples, lambda x: x[2].dtype)
  3508. new_blocks = []
  3509. for dtype, tup_block in grouper:
  3510. values, placement = _stack_arrays(list(tup_block), dtype)
  3511. block = make_block(values, placement=placement)
  3512. new_blocks.append(block)
  3513. return new_blocks
  3514. def _sparse_blockify(tuples, dtype=None):
  3515. """ return an array of blocks that potentially have different dtypes (and
  3516. are sparse)
  3517. """
  3518. new_blocks = []
  3519. for i, names, array in tuples:
  3520. array = _maybe_to_sparse(array)
  3521. block = make_block(array, klass=SparseBlock, fastpath=True,
  3522. placement=[i])
  3523. new_blocks.append(block)
  3524. return new_blocks
  3525. def _stack_arrays(tuples, dtype):
  3526. # fml
  3527. def _asarray_compat(x):
  3528. if isinstance(x, ABCSeries):
  3529. return x._values
  3530. else:
  3531. return np.asarray(x)
  3532. def _shape_compat(x):
  3533. if isinstance(x, ABCSeries):
  3534. return len(x),
  3535. else:
  3536. return x.shape
  3537. placement, names, arrays = zip(*tuples)
  3538. first = arrays[0]
  3539. shape = (len(arrays),) + _shape_compat(first)
  3540. stacked = np.empty(shape, dtype=dtype)
  3541. for i, arr in enumerate(arrays):
  3542. stacked[i] = _asarray_compat(arr)
  3543. return stacked, placement
  3544. def _interleaved_dtype(blocks):
  3545. if not len(blocks):
  3546. return None
  3547. counts = defaultdict(list)
  3548. for x in blocks:
  3549. counts[type(x)].append(x)
  3550. have_int = len(counts[IntBlock]) > 0
  3551. have_bool = len(counts[BoolBlock]) > 0
  3552. have_object = len(counts[ObjectBlock]) > 0
  3553. have_float = len(counts[FloatBlock]) > 0
  3554. have_complex = len(counts[ComplexBlock]) > 0
  3555. have_dt64 = len(counts[DatetimeBlock]) > 0
  3556. have_dt64_tz = len(counts[DatetimeTZBlock]) > 0
  3557. have_td64 = len(counts[TimeDeltaBlock]) > 0
  3558. have_cat = len(counts[CategoricalBlock]) > 0
  3559. # TODO: have_sparse is not used
  3560. have_sparse = len(counts[SparseBlock]) > 0 # noqa
  3561. have_numeric = have_float or have_complex or have_int
  3562. has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat
  3563. if (have_object or
  3564. (have_bool and
  3565. (have_numeric or have_dt64 or have_dt64_tz or have_td64)) or
  3566. (have_numeric and has_non_numeric) or have_cat or have_dt64 or
  3567. have_dt64_tz or have_td64):
  3568. return np.dtype(object)
  3569. elif have_bool:
  3570. return np.dtype(bool)
  3571. elif have_int and not have_float and not have_complex:
  3572. # if we are mixing unsigned and signed, then return
  3573. # the next biggest int type (if we can)
  3574. lcd = _find_common_type([b.dtype for b in counts[IntBlock]])
  3575. kinds = set([i.dtype.kind for i in counts[IntBlock]])
  3576. if len(kinds) == 1:
  3577. return lcd
  3578. if lcd == 'uint64' or lcd == 'int64':
  3579. return np.dtype('int64')
  3580. # return 1 bigger on the itemsize if unsinged
  3581. if lcd.kind == 'u':
  3582. return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
  3583. return lcd
  3584. elif have_complex:
  3585. return np.dtype('c16')
  3586. else:
  3587. introspection_blks = counts[FloatBlock] + counts[SparseBlock]
  3588. return _find_common_type([b.dtype for b in introspection_blks])
  3589. def _consolidate(blocks):
  3590. """
  3591. Merge blocks having same dtype, exclude non-consolidating blocks
  3592. """
  3593. # sort by _can_consolidate, dtype
  3594. gkey = lambda x: x._consolidate_key
  3595. grouper = itertools.groupby(sorted(blocks, key=gkey), gkey)
  3596. new_blocks = []
  3597. for (_can_consolidate, dtype), group_blocks in grouper:
  3598. merged_blocks = _merge_blocks(list(group_blocks), dtype=dtype,
  3599. _can_consolidate=_can_consolidate)
  3600. new_blocks = _extend_blocks(merged_blocks, new_blocks)
  3601. return new_blocks
  3602. def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
  3603. if len(blocks) == 1:
  3604. return blocks[0]
  3605. if _can_consolidate:
  3606. if dtype is None:
  3607. if len(set([b.dtype for b in blocks])) != 1:
  3608. raise AssertionError("_merge_blocks are invalid!")
  3609. dtype = blocks[0].dtype
  3610. # FIXME: optimization potential in case all mgrs contain slices and
  3611. # combination of those slices is a slice, too.
  3612. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
  3613. new_values = _vstack([b.values for b in blocks], dtype)
  3614. argsort = np.argsort(new_mgr_locs)
  3615. new_values = new_values[argsort]
  3616. new_mgr_locs = new_mgr_locs[argsort]
  3617. return make_block(new_values, fastpath=True, placement=new_mgr_locs)
  3618. # no merge
  3619. return blocks
  3620. def _extend_blocks(result, blocks=None):
  3621. """ return a new extended blocks, givin the result """
  3622. if blocks is None:
  3623. blocks = []
  3624. if isinstance(result, list):
  3625. for r in result:
  3626. if isinstance(r, list):
  3627. blocks.extend(r)
  3628. else:
  3629. blocks.append(r)
  3630. elif isinstance(result, BlockManager):
  3631. blocks.extend(result.blocks)
  3632. else:
  3633. blocks.append(result)
  3634. return blocks
  3635. def _block_shape(values, ndim=1, shape=None):
  3636. """ guarantee the shape of the values to be at least 1 d """
  3637. if values.ndim < ndim:
  3638. if shape is None:
  3639. shape = values.shape
  3640. values = values.reshape(tuple((1, ) + shape))
  3641. return values
  3642. def _vstack(to_stack, dtype):
  3643. # work around NumPy 1.6 bug
  3644. if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
  3645. new_values = np.vstack([x.view('i8') for x in to_stack])
  3646. return new_values.view(dtype)
  3647. else:
  3648. return np.vstack(to_stack)
  3649. def _possibly_compare(a, b, op):
  3650. is_a_array = isinstance(a, np.ndarray)
  3651. is_b_array = isinstance(b, np.ndarray)
  3652. # numpy deprecation warning to have i8 vs integer comparisions
  3653. if is_datetimelike_v_numeric(a, b):
  3654. result = False
  3655. # numpy deprecation warning if comparing numeric vs string-like
  3656. elif is_numeric_v_string_like(a, b):
  3657. result = False
  3658. else:
  3659. result = op(a, b)
  3660. if is_scalar(result) and (is_a_array or is_b_array):
  3661. type_names = [type(a).__name__, type(b).__name__]
  3662. if is_a_array:
  3663. type_names[0] = 'ndarray(dtype=%s)' % a.dtype
  3664. if is_b_array:
  3665. type_names[1] = 'ndarray(dtype=%s)' % b.dtype
  3666. raise TypeError("Cannot compare types %r and %r" % tuple(type_names))
  3667. return result
  3668. def _concat_indexes(indexes):
  3669. return indexes[0].append(indexes[1:])
  3670. def _block2d_to_blocknd(values, placement, shape, labels, ref_items):
  3671. """ pivot to the labels shape """
  3672. from pandas.core.internals import make_block
  3673. panel_shape = (len(placement),) + shape
  3674. # TODO: lexsort depth needs to be 2!!
  3675. # Create observation selection vector using major and minor
  3676. # labels, for converting to panel format.
  3677. selector = _factor_indexer(shape[1:], labels)
  3678. mask = np.zeros(np.prod(shape), dtype=bool)
  3679. mask.put(selector, True)
  3680. if mask.all():
  3681. pvalues = np.empty(panel_shape, dtype=values.dtype)
  3682. else:
  3683. dtype, fill_value = _maybe_promote(values.dtype)
  3684. pvalues = np.empty(panel_shape, dtype=dtype)
  3685. pvalues.fill(fill_value)
  3686. values = values
  3687. for i in range(len(placement)):
  3688. pvalues[i].flat[mask] = values[:, i]
  3689. return make_block(pvalues, placement=placement)
  3690. def _factor_indexer(shape, labels):
  3691. """
  3692. given a tuple of shape and a list of Categorical labels, return the
  3693. expanded label indexer
  3694. """
  3695. mult = np.array(shape)[::-1].cumprod()[::-1]
  3696. return _ensure_platform_int(
  3697. np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T)
  3698. def _get_blkno_placements(blknos, blk_count, group=True):
  3699. """
  3700. Parameters
  3701. ----------
  3702. blknos : array of int64
  3703. blk_count : int
  3704. group : bool
  3705. Returns
  3706. -------
  3707. iterator
  3708. yield (BlockPlacement, blkno)
  3709. """
  3710. blknos = _ensure_int64(blknos)
  3711. # FIXME: blk_count is unused, but it may avoid the use of dicts in cython
  3712. for blkno, indexer in lib.get_blkno_indexers(blknos, group):
  3713. yield blkno, BlockPlacement(indexer)
  3714. def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
  3715. """
  3716. If two indices overlap, add suffixes to overlapping entries.
  3717. If corresponding suffix is empty, the entry is simply converted to string.
  3718. """
  3719. to_rename = left.intersection(right)
  3720. if len(to_rename) == 0:
  3721. return left, right
  3722. else:
  3723. if not lsuffix and not rsuffix:
  3724. raise ValueError('columns overlap but no suffix specified: %s' %
  3725. to_rename)
  3726. def lrenamer(x):
  3727. if x in to_rename:
  3728. return '%s%s' % (x, lsuffix)
  3729. return x
  3730. def rrenamer(x):
  3731. if x in to_rename:
  3732. return '%s%s' % (x, rsuffix)
  3733. return x
  3734. return (_transform_index(left, lrenamer),
  3735. _transform_index(right, rrenamer))
  3736. def _safe_reshape(arr, new_shape):
  3737. """
  3738. If possible, reshape `arr` to have shape `new_shape`,
  3739. with a couple of exceptions (see gh-13012):
  3740. 1) If `arr` is a Categorical or Index, `arr` will be
  3741. returned as is.
  3742. 2) If `arr` is a Series, the `_values` attribute will
  3743. be reshaped and returned.
  3744. Parameters
  3745. ----------
  3746. arr : array-like, object to be reshaped
  3747. new_shape : int or tuple of ints, the new shape
  3748. """
  3749. if isinstance(arr, ABCSeries):
  3750. arr = arr._values
  3751. if not isinstance(arr, Categorical):
  3752. arr = arr.reshape(new_shape)
  3753. return arr
  3754. def _transform_index(index, func):
  3755. """
  3756. Apply function to all values found in index.
  3757. This includes transforming multiindex entries separately.
  3758. """
  3759. if isinstance(index, MultiIndex):
  3760. items = [tuple(func(y) for y in x) for x in index]
  3761. return MultiIndex.from_tuples(items, names=index.names)
  3762. else:
  3763. items = [func(x) for x in index]
  3764. return Index(items, name=index.name)
  3765. def _putmask_smart(v, m, n):
  3766. """
  3767. Return a new block, try to preserve dtype if possible.
  3768. Parameters
  3769. ----------
  3770. v : `values`, updated in-place (array like)
  3771. m : `mask`, applies to both sides (array like)
  3772. n : `new values` either scalar or an array like aligned with `values`
  3773. """
  3774. # n should be the length of the mask or a scalar here
  3775. if not is_list_like(n):
  3776. n = np.array([n] * len(m))
  3777. elif isinstance(n, np.ndarray) and n.ndim == 0: # numpy scalar
  3778. n = np.repeat(np.array(n, ndmin=1), len(m))
  3779. # see if we are only masking values that if putted
  3780. # will work in the current dtype
  3781. try:
  3782. nn = n[m]
  3783. # make sure that we have a nullable type
  3784. # if we have nulls
  3785. if not _is_na_compat(v, nn[0]):
  3786. raise ValueError
  3787. nn_at = nn.astype(v.dtype)
  3788. # avoid invalid dtype comparisons
  3789. if not is_numeric_v_string_like(nn, nn_at):
  3790. comp = (nn == nn_at)
  3791. if is_list_like(comp) and comp.all():
  3792. nv = v.copy()
  3793. nv[m] = nn_at
  3794. return nv
  3795. except (ValueError, IndexError, TypeError):
  3796. pass
  3797. # change the dtype
  3798. dtype, _ = _maybe_promote(n.dtype)
  3799. nv = v.astype(dtype)
  3800. try:
  3801. nv[m] = n[m]
  3802. except ValueError:
  3803. idx, = np.where(np.squeeze(m))
  3804. for mask_index, new_val in zip(idx, n[m]):
  3805. nv[mask_index] = new_val
  3806. return nv
  3807. def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
  3808. """
  3809. Concatenate block managers into one.
  3810. Parameters
  3811. ----------
  3812. mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
  3813. axes : list of Index
  3814. concat_axis : int
  3815. copy : bool
  3816. """
  3817. concat_plan = combine_concat_plans(
  3818. [get_mgr_concatenation_plan(mgr, indexers)
  3819. for mgr, indexers in mgrs_indexers], concat_axis)
  3820. blocks = [make_block(concatenate_join_units(join_units, concat_axis,
  3821. copy=copy),
  3822. placement=placement)
  3823. for placement, join_units in concat_plan]
  3824. return BlockManager(blocks, axes)
  3825. def get_empty_dtype_and_na(join_units):
  3826. """
  3827. Return dtype and N/A values to use when concatenating specified units.
  3828. Returned N/A value may be None which means there was no casting involved.
  3829. Returns
  3830. -------
  3831. dtype
  3832. na
  3833. """
  3834. if len(join_units) == 1:
  3835. blk = join_units[0].block
  3836. if blk is None:
  3837. return np.float64, np.nan
  3838. has_none_blocks = False
  3839. dtypes = [None] * len(join_units)
  3840. for i, unit in enumerate(join_units):
  3841. if unit.block is None:
  3842. has_none_blocks = True
  3843. else:
  3844. dtypes[i] = unit.dtype
  3845. upcast_classes = defaultdict(list)
  3846. null_upcast_classes = defaultdict(list)
  3847. for dtype, unit in zip(dtypes, join_units):
  3848. if dtype is None:
  3849. continue
  3850. if is_categorical_dtype(dtype):
  3851. upcast_cls = 'category'
  3852. elif is_datetimetz(dtype):
  3853. upcast_cls = 'datetimetz'
  3854. elif issubclass(dtype.type, np.bool_):
  3855. upcast_cls = 'bool'
  3856. elif issubclass(dtype.type, np.object_):
  3857. upcast_cls = 'object'
  3858. elif is_datetime64_dtype(dtype):
  3859. upcast_cls = 'datetime'
  3860. elif is_timedelta64_dtype(dtype):
  3861. upcast_cls = 'timedelta'
  3862. else:
  3863. upcast_cls = 'float'
  3864. # Null blocks should not influence upcast class selection, unless there
  3865. # are only null blocks, when same upcasting rules must be applied to
  3866. # null upcast classes.
  3867. if unit.is_null:
  3868. null_upcast_classes[upcast_cls].append(dtype)
  3869. else:
  3870. upcast_classes[upcast_cls].append(dtype)
  3871. if not upcast_classes:
  3872. upcast_classes = null_upcast_classes
  3873. # create the result
  3874. if 'object' in upcast_classes:
  3875. return np.dtype(np.object_), np.nan
  3876. elif 'bool' in upcast_classes:
  3877. if has_none_blocks:
  3878. return np.dtype(np.object_), np.nan
  3879. else:
  3880. return np.dtype(np.bool_), None
  3881. elif 'category' in upcast_classes:
  3882. return np.dtype(np.object_), np.nan
  3883. elif 'float' in upcast_classes:
  3884. return np.dtype(np.float64), np.nan
  3885. elif 'datetimetz' in upcast_classes:
  3886. dtype = upcast_classes['datetimetz']
  3887. return dtype[0], tslib.iNaT
  3888. elif 'datetime' in upcast_classes:
  3889. return np.dtype('M8[ns]'), tslib.iNaT
  3890. elif 'timedelta' in upcast_classes:
  3891. return np.dtype('m8[ns]'), tslib.iNaT
  3892. else: # pragma
  3893. raise AssertionError("invalid dtype determination in get_concat_dtype")
  3894. def concatenate_join_units(join_units, concat_axis, copy):
  3895. """
  3896. Concatenate values from several join units along selected axis.
  3897. """
  3898. if concat_axis == 0 and len(join_units) > 1:
  3899. # Concatenating join units along ax0 is handled in _merge_blocks.
  3900. raise AssertionError("Concatenating join units along axis0")
  3901. empty_dtype, upcasted_na = get_empty_dtype_and_na(join_units)
  3902. to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype,
  3903. upcasted_na=upcasted_na)
  3904. for ju in join_units]
  3905. if len(to_concat) == 1:
  3906. # Only one block, nothing to concatenate.
  3907. concat_values = to_concat[0]
  3908. if copy and concat_values.base is not None:
  3909. concat_values = concat_values.copy()
  3910. else:
  3911. concat_values = _concat._concat_compat(to_concat, axis=concat_axis)
  3912. return concat_values
  3913. def get_mgr_concatenation_plan(mgr, indexers):
  3914. """
  3915. Construct concatenation plan for given block manager and indexers.
  3916. Parameters
  3917. ----------
  3918. mgr : BlockManager
  3919. indexers : dict of {axis: indexer}
  3920. Returns
  3921. -------
  3922. plan : list of (BlockPlacement, JoinUnit) tuples
  3923. """
  3924. # Calculate post-reindex shape , save for item axis which will be separate
  3925. # for each block anyway.
  3926. mgr_shape = list(mgr.shape)
  3927. for ax, indexer in indexers.items():
  3928. mgr_shape[ax] = len(indexer)
  3929. mgr_shape = tuple(mgr_shape)
  3930. if 0 in indexers:
  3931. ax0_indexer = indexers.pop(0)
  3932. blknos = algos.take_1d(mgr._blknos, ax0_indexer, fill_value=-1)
  3933. blklocs = algos.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1)
  3934. else:
  3935. if mgr._is_single_block:
  3936. blk = mgr.blocks[0]
  3937. return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))]
  3938. ax0_indexer = None
  3939. blknos = mgr._blknos
  3940. blklocs = mgr._blklocs
  3941. plan = []
  3942. for blkno, placements in _get_blkno_placements(blknos, len(mgr.blocks),
  3943. group=False):
  3944. assert placements.is_slice_like
  3945. join_unit_indexers = indexers.copy()
  3946. shape = list(mgr_shape)
  3947. shape[0] = len(placements)
  3948. shape = tuple(shape)
  3949. if blkno == -1:
  3950. unit = JoinUnit(None, shape)
  3951. else:
  3952. blk = mgr.blocks[blkno]
  3953. ax0_blk_indexer = blklocs[placements.indexer]
  3954. unit_no_ax0_reindexing = (len(placements) == len(blk.mgr_locs) and
  3955. # Fastpath detection of join unit not
  3956. # needing to reindex its block: no ax0
  3957. # reindexing took place and block
  3958. # placement was sequential before.
  3959. ((ax0_indexer is None and
  3960. blk.mgr_locs.is_slice_like and
  3961. blk.mgr_locs.as_slice.step == 1) or
  3962. # Slow-ish detection: all indexer locs
  3963. # are sequential (and length match is
  3964. # checked above).
  3965. (np.diff(ax0_blk_indexer) == 1).all()))
  3966. # Omit indexer if no item reindexing is required.
  3967. if unit_no_ax0_reindexing:
  3968. join_unit_indexers.pop(0, None)
  3969. else:
  3970. join_unit_indexers[0] = ax0_blk_indexer
  3971. unit = JoinUnit(blk, shape, join_unit_indexers)
  3972. plan.append((placements, unit))
  3973. return plan
  3974. def combine_concat_plans(plans, concat_axis):
  3975. """
  3976. Combine multiple concatenation plans into one.
  3977. existing_plan is updated in-place.
  3978. """
  3979. if len(plans) == 1:
  3980. for p in plans[0]:
  3981. yield p[0], [p[1]]
  3982. elif concat_axis == 0:
  3983. offset = 0
  3984. for plan in plans:
  3985. last_plc = None
  3986. for plc, unit in plan:
  3987. yield plc.add(offset), [unit]
  3988. last_plc = plc
  3989. if last_plc is not None:
  3990. offset += last_plc.as_slice.stop
  3991. else:
  3992. num_ended = [0]
  3993. def _next_or_none(seq):
  3994. retval = next(seq, None)
  3995. if retval is None:
  3996. num_ended[0] += 1
  3997. return retval
  3998. plans = list(map(iter, plans))
  3999. next_items = list(map(_next_or_none, plans))
  4000. while num_ended[0] != len(next_items):
  4001. if num_ended[0] > 0:
  4002. raise ValueError("Plan shapes are not aligned")
  4003. placements, units = zip(*next_items)
  4004. lengths = list(map(len, placements))
  4005. min_len, max_len = min(lengths), max(lengths)
  4006. if min_len == max_len:
  4007. yield placements[0], units
  4008. next_items[:] = map(_next_or_none, plans)
  4009. else:
  4010. yielded_placement = None
  4011. yielded_units = [None] * len(next_items)
  4012. for i, (plc, unit) in enumerate(next_items):
  4013. yielded_units[i] = unit
  4014. if len(plc) > min_len:
  4015. # trim_join_unit updates unit in place, so only
  4016. # placement needs to be sliced to skip min_len.
  4017. next_items[i] = (plc[min_len:],
  4018. trim_join_unit(unit, min_len))
  4019. else:
  4020. yielded_placement = plc
  4021. next_items[i] = _next_or_none(plans[i])
  4022. yield yielded_placement, yielded_units
  4023. def trim_join_unit(join_unit, length):
  4024. """
  4025. Reduce join_unit's shape along item axis to length.
  4026. Extra items that didn't fit are returned as a separate block.
  4027. """
  4028. if 0 not in join_unit.indexers:
  4029. extra_indexers = join_unit.indexers
  4030. if join_unit.block is None:
  4031. extra_block = None
  4032. else:
  4033. extra_block = join_unit.block.getitem_block(slice(length, None))
  4034. join_unit.block = join_unit.block.getitem_block(slice(length))
  4035. else:
  4036. extra_block = join_unit.block
  4037. extra_indexers = copy.copy(join_unit.indexers)
  4038. extra_indexers[0] = extra_indexers[0][length:]
  4039. join_unit.indexers[0] = join_unit.indexers[0][:length]
  4040. extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:]
  4041. join_unit.shape = (length,) + join_unit.shape[1:]
  4042. return JoinUnit(block=extra_block, indexers=extra_indexers,
  4043. shape=extra_shape)
  4044. class JoinUnit(object):
  4045. def __init__(self, block, shape, indexers=None):
  4046. # Passing shape explicitly is required for cases when block is None.
  4047. if indexers is None:
  4048. indexers = {}
  4049. self.block = block
  4050. self.indexers = indexers
  4051. self.shape = shape
  4052. def __repr__(self):
  4053. return '%s(%r, %s)' % (self.__class__.__name__, self.block,
  4054. self.indexers)
  4055. @cache_readonly
  4056. def needs_filling(self):
  4057. for indexer in self.indexers.values():
  4058. # FIXME: cache results of indexer == -1 checks.
  4059. if (indexer == -1).any():
  4060. return True
  4061. return False
  4062. @cache_readonly
  4063. def dtype(self):
  4064. if self.block is None:
  4065. raise AssertionError("Block is None, no dtype")
  4066. if not self.needs_filling:
  4067. return self.block.dtype
  4068. else:
  4069. return _get_dtype(_maybe_promote(self.block.dtype,
  4070. self.block.fill_value)[0])
  4071. return self._dtype
  4072. @cache_readonly
  4073. def is_null(self):
  4074. if self.block is None:
  4075. return True
  4076. if not self.block._can_hold_na:
  4077. return False
  4078. # Usually it's enough to check but a small fraction of values to see if
  4079. # a block is NOT null, chunks should help in such cases. 1000 value
  4080. # was chosen rather arbitrarily.
  4081. values = self.block.values
  4082. if self.block.is_categorical:
  4083. values_flat = values.categories
  4084. elif self.block.is_sparse:
  4085. # fill_value is not NaN and have holes
  4086. if not values._null_fill_value and values.sp_index.ngaps > 0:
  4087. return False
  4088. values_flat = values.ravel(order='K')
  4089. else:
  4090. values_flat = values.ravel(order='K')
  4091. total_len = values_flat.shape[0]
  4092. chunk_len = max(total_len // 40, 1000)
  4093. for i in range(0, total_len, chunk_len):
  4094. if not isnull(values_flat[i:i + chunk_len]).all():
  4095. return False
  4096. return True
  4097. def get_reindexed_values(self, empty_dtype, upcasted_na):
  4098. if upcasted_na is None:
  4099. # No upcasting is necessary
  4100. fill_value = self.block.fill_value
  4101. values = self.block.get_values()
  4102. else:
  4103. fill_value = upcasted_na
  4104. if self.is_null:
  4105. if getattr(self.block, 'is_object', False):
  4106. # we want to avoid filling with np.nan if we are
  4107. # using None; we already know that we are all
  4108. # nulls
  4109. values = self.block.values.ravel(order='K')
  4110. if len(values) and values[0] is None:
  4111. fill_value = None
  4112. if getattr(self.block, 'is_datetimetz', False):
  4113. pass
  4114. elif getattr(self.block, 'is_categorical', False):
  4115. pass
  4116. elif getattr(self.block, 'is_sparse', False):
  4117. pass
  4118. else:
  4119. missing_arr = np.empty(self.shape, dtype=empty_dtype)
  4120. missing_arr.fill(fill_value)
  4121. return missing_arr
  4122. if not self.indexers:
  4123. if not self.block._can_consolidate:
  4124. # preserve these for validation in _concat_compat
  4125. return self.block.values
  4126. if self.block.is_bool:
  4127. # External code requested filling/upcasting, bool values must
  4128. # be upcasted to object to avoid being upcasted to numeric.
  4129. values = self.block.astype(np.object_).values
  4130. else:
  4131. # No dtype upcasting is done here, it will be performed during
  4132. # concatenation itself.
  4133. values = self.block.get_values()
  4134. if not self.indexers:
  4135. # If there's no indexing to be done, we want to signal outside
  4136. # code that this array must be copied explicitly. This is done
  4137. # by returning a view and checking `retval.base`.
  4138. values = values.view()
  4139. else:
  4140. for ax, indexer in self.indexers.items():
  4141. values = algos.take_nd(values, indexer, axis=ax,
  4142. fill_value=fill_value)
  4143. return values
  4144. def _fast_count_smallints(arr):
  4145. """Faster version of set(arr) for sequences of small numbers."""
  4146. if len(arr) == 0:
  4147. # Handle empty arr case separately: numpy 1.6 chokes on that.
  4148. return np.empty((0, 2), dtype=arr.dtype)
  4149. else:
  4150. counts = np.bincount(arr.astype(np.int_))
  4151. nz = counts.nonzero()[0]
  4152. return np.c_[nz, counts[nz]]
  4153. def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
  4154. if isinstance(slice_or_indexer, slice):
  4155. return 'slice', slice_or_indexer, lib.slice_len(slice_or_indexer,
  4156. length)
  4157. elif (isinstance(slice_or_indexer, np.ndarray) and
  4158. slice_or_indexer.dtype == np.bool_):
  4159. return 'mask', slice_or_indexer, slice_or_indexer.sum()
  4160. else:
  4161. indexer = np.asanyarray(slice_or_indexer, dtype=np.int64)
  4162. if not allow_fill:
  4163. indexer = maybe_convert_indices(indexer, length)
  4164. return 'fancy', indexer, len(indexer)