PageRenderTime 72ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/core/internals.py

https://github.com/ajcr/pandas
Python | 4069 lines | 3422 code | 325 blank | 322 comment | 311 complexity | ea364a4280ea861ebe94fc70579f020b MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. import copy
  2. import itertools
  3. import re
  4. import operator
  5. from datetime import datetime, timedelta
  6. from collections import defaultdict
  7. import numpy as np
  8. from pandas.core.base import PandasObject
  9. from pandas.core.common import (_possibly_downcast_to_dtype, isnull,
  10. _NS_DTYPE, _TD_DTYPE, ABCSeries, is_list_like,
  11. ABCSparseSeries, _infer_dtype_from_scalar,
  12. _is_null_datelike_scalar,
  13. is_timedelta64_dtype, is_datetime64_dtype,
  14. _possibly_infer_to_datetimelike)
  15. from pandas.core.index import Index, MultiIndex, _ensure_index
  16. from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer)
  17. import pandas.core.common as com
  18. from pandas.sparse.array import _maybe_to_sparse, SparseArray
  19. import pandas.lib as lib
  20. import pandas.tslib as tslib
  21. import pandas.computation.expressions as expressions
  22. from pandas.util.decorators import cache_readonly
  23. from pandas.tslib import Timestamp
  24. from pandas import compat
  25. from pandas.compat import range, map, zip, u
  26. from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
  27. from pandas.lib import BlockPlacement
  28. class Block(PandasObject):
  29. """
  30. Canonical n-dimensional unit of homogeneous dtype contained in a pandas
  31. data structure
  32. Index-ignorant; let the container take care of that
  33. """
  34. __slots__ = ['_mgr_locs', 'values', 'ndim']
  35. is_numeric = False
  36. is_float = False
  37. is_integer = False
  38. is_complex = False
  39. is_datetime = False
  40. is_timedelta = False
  41. is_bool = False
  42. is_object = False
  43. is_sparse = False
  44. _can_hold_na = False
  45. _downcast_dtype = None
  46. _can_consolidate = True
  47. _verify_integrity = True
  48. _ftype = 'dense'
  49. def __init__(self, values, placement, ndim=None, fastpath=False):
  50. if ndim is None:
  51. ndim = values.ndim
  52. elif values.ndim != ndim:
  53. raise ValueError('Wrong number of dimensions')
  54. self.ndim = ndim
  55. self.mgr_locs = placement
  56. self.values = values
  57. if len(self.mgr_locs) != len(self.values):
  58. raise ValueError('Wrong number of items passed %d,'
  59. ' placement implies %d' % (
  60. len(self.values), len(self.mgr_locs)))
  61. @property
  62. def _consolidate_key(self):
  63. return (self._can_consolidate, self.dtype.name)
  64. @property
  65. def _is_single_block(self):
  66. return self.ndim == 1
  67. @property
  68. def is_datelike(self):
  69. """ return True if I am a non-datelike """
  70. return self.is_datetime or self.is_timedelta
  71. @property
  72. def fill_value(self):
  73. return np.nan
  74. @property
  75. def mgr_locs(self):
  76. return self._mgr_locs
  77. def make_block_same_class(self, values, placement, copy=False,
  78. **kwargs):
  79. """
  80. Wrap given values in a block of same type as self.
  81. `kwargs` are used in SparseBlock override.
  82. """
  83. if copy:
  84. values = values.copy()
  85. return make_block(values, placement, klass=self.__class__,
  86. fastpath=True)
  87. @mgr_locs.setter
  88. def mgr_locs(self, new_mgr_locs):
  89. if not isinstance(new_mgr_locs, BlockPlacement):
  90. new_mgr_locs = BlockPlacement(new_mgr_locs)
  91. self._mgr_locs = new_mgr_locs
  92. def __unicode__(self):
  93. # don't want to print out all of the items here
  94. name = com.pprint_thing(self.__class__.__name__)
  95. if self._is_single_block:
  96. result = '%s: %s dtype: %s' % (
  97. name, len(self), self.dtype)
  98. else:
  99. shape = ' x '.join([com.pprint_thing(s) for s in self.shape])
  100. result = '%s: %s, %s, dtype: %s' % (
  101. name, com.pprint_thing(self.mgr_locs.indexer), shape,
  102. self.dtype)
  103. return result
  104. def __len__(self):
  105. return len(self.values)
  106. def __getstate__(self):
  107. return self.mgr_locs.indexer, self.values
  108. def __setstate__(self, state):
  109. self.mgr_locs = BlockPlacement(state[0])
  110. self.values = state[1]
  111. self.ndim = self.values.ndim
  112. def _slice(self, slicer):
  113. """ return a slice of my values """
  114. return self.values[slicer]
  115. def getitem_block(self, slicer, new_mgr_locs=None):
  116. """
  117. Perform __getitem__-like, return result as block.
  118. As of now, only supports slices that preserve dimensionality.
  119. """
  120. if new_mgr_locs is None:
  121. if isinstance(slicer, tuple):
  122. axis0_slicer = slicer[0]
  123. else:
  124. axis0_slicer = slicer
  125. new_mgr_locs = self.mgr_locs[axis0_slicer]
  126. new_values = self._slice(slicer)
  127. if new_values.ndim != self.ndim:
  128. raise ValueError("Only same dim slicing is allowed")
  129. return self.make_block_same_class(new_values, new_mgr_locs)
  130. @property
  131. def shape(self):
  132. return self.values.shape
  133. @property
  134. def itemsize(self):
  135. return self.values.itemsize
  136. @property
  137. def dtype(self):
  138. return self.values.dtype
  139. @property
  140. def ftype(self):
  141. return "%s:%s" % (self.dtype, self._ftype)
  142. def merge(self, other):
  143. return _merge_blocks([self, other])
  144. def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
  145. limit=None, mask_info=None):
  146. """
  147. Reindex using pre-computed indexer information
  148. """
  149. if axis < 1:
  150. raise AssertionError('axis must be at least 1, got %d' % axis)
  151. if fill_value is None:
  152. fill_value = self.fill_value
  153. new_values = com.take_nd(self.values, indexer, axis,
  154. fill_value=fill_value, mask_info=mask_info)
  155. return make_block(new_values,
  156. ndim=self.ndim, fastpath=True,
  157. placement=self.mgr_locs)
  158. def get(self, item):
  159. loc = self.items.get_loc(item)
  160. return self.values[loc]
  161. def iget(self, i):
  162. return self.values[i]
  163. def set(self, locs, values, check=False):
  164. """
  165. Modify Block in-place with new item value
  166. Returns
  167. -------
  168. None
  169. """
  170. self.values[locs] = values
  171. def delete(self, loc):
  172. """
  173. Delete given loc(-s) from block in-place.
  174. """
  175. self.values = np.delete(self.values, loc, 0)
  176. self.mgr_locs = self.mgr_locs.delete(loc)
  177. def apply(self, func, **kwargs):
  178. """ apply the function to my values; return a block if we are not one """
  179. result = func(self.values)
  180. if not isinstance(result, Block):
  181. result = make_block(values=result, placement=self.mgr_locs,)
  182. return result
  183. def fillna(self, value, limit=None, inplace=False, downcast=None):
  184. if not self._can_hold_na:
  185. if inplace:
  186. return [self]
  187. else:
  188. return [self.copy()]
  189. mask = isnull(self.values)
  190. if limit is not None:
  191. if self.ndim > 2:
  192. raise NotImplementedError
  193. mask[mask.cumsum(self.ndim-1)>limit]=False
  194. value = self._try_fill(value)
  195. blocks = self.putmask(mask, value, inplace=inplace)
  196. return self._maybe_downcast(blocks, downcast)
  197. def _maybe_downcast(self, blocks, downcast=None):
  198. # no need to downcast our float
  199. # unless indicated
  200. if downcast is None and self.is_float:
  201. return blocks
  202. elif downcast is None and (self.is_timedelta or self.is_datetime):
  203. return blocks
  204. result_blocks = []
  205. for b in blocks:
  206. result_blocks.extend(b.downcast(downcast))
  207. return result_blocks
  208. def downcast(self, dtypes=None):
  209. """ try to downcast each item to the dict of dtypes if present """
  210. # turn it off completely
  211. if dtypes is False:
  212. return [self]
  213. values = self.values
  214. # single block handling
  215. if self._is_single_block:
  216. # try to cast all non-floats here
  217. if dtypes is None:
  218. dtypes = 'infer'
  219. nv = _possibly_downcast_to_dtype(values, dtypes)
  220. return [make_block(nv, ndim=self.ndim,
  221. fastpath=True, placement=self.mgr_locs)]
  222. # ndim > 1
  223. if dtypes is None:
  224. return [self]
  225. if not (dtypes == 'infer' or isinstance(dtypes, dict)):
  226. raise ValueError("downcast must have a dictionary or 'infer' as "
  227. "its argument")
  228. # item-by-item
  229. # this is expensive as it splits the blocks items-by-item
  230. blocks = []
  231. for i, rl in enumerate(self.mgr_locs):
  232. if dtypes == 'infer':
  233. dtype = 'infer'
  234. else:
  235. raise AssertionError("dtypes as dict is not supported yet")
  236. dtype = dtypes.get(item, self._downcast_dtype)
  237. if dtype is None:
  238. nv = _block_shape(values[i], ndim=self.ndim)
  239. else:
  240. nv = _possibly_downcast_to_dtype(values[i], dtype)
  241. nv = _block_shape(nv, ndim=self.ndim)
  242. blocks.append(make_block(nv,
  243. ndim=self.ndim, fastpath=True,
  244. placement=[rl]))
  245. return blocks
  246. def astype(self, dtype, copy=False, raise_on_error=True, values=None):
  247. return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
  248. values=values)
  249. def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
  250. klass=None):
  251. """
  252. Coerce to the new type (if copy=True, return a new copy)
  253. raise on an except if raise == True
  254. """
  255. dtype = np.dtype(dtype)
  256. if self.dtype == dtype:
  257. if copy:
  258. return self.copy()
  259. return self
  260. try:
  261. # force the copy here
  262. if values is None:
  263. # _astype_nansafe works fine with 1-d only
  264. values = com._astype_nansafe(self.values.ravel(), dtype, copy=True)
  265. values = values.reshape(self.values.shape)
  266. newb = make_block(values,
  267. ndim=self.ndim, placement=self.mgr_locs,
  268. fastpath=True, dtype=dtype, klass=klass)
  269. except:
  270. if raise_on_error is True:
  271. raise
  272. newb = self.copy() if copy else self
  273. if newb.is_numeric and self.is_numeric:
  274. if newb.shape != self.shape:
  275. raise TypeError("cannot set astype for copy = [%s] for dtype "
  276. "(%s [%s]) with smaller itemsize that current "
  277. "(%s [%s])" % (copy, self.dtype.name,
  278. self.itemsize, newb.dtype.name,
  279. newb.itemsize))
  280. return newb
  281. def convert(self, copy=True, **kwargs):
  282. """ attempt to coerce any object types to better types
  283. return a copy of the block (if copy = True)
  284. by definition we are not an ObjectBlock here! """
  285. return [self.copy()] if copy else [self]
  286. def _can_hold_element(self, value):
  287. raise NotImplementedError()
  288. def _try_cast(self, value):
  289. raise NotImplementedError()
  290. def _try_cast_result(self, result, dtype=None):
  291. """ try to cast the result to our original type,
  292. we may have roundtripped thru object in the mean-time """
  293. if dtype is None:
  294. dtype = self.dtype
  295. if self.is_integer or self.is_bool or self.is_datetime:
  296. pass
  297. elif self.is_float and result.dtype == self.dtype:
  298. # protect against a bool/object showing up here
  299. if isinstance(dtype, compat.string_types) and dtype == 'infer':
  300. return result
  301. if not isinstance(dtype, type):
  302. dtype = dtype.type
  303. if issubclass(dtype, (np.bool_, np.object_)):
  304. if issubclass(dtype, np.bool_):
  305. if isnull(result).all():
  306. return result.astype(np.bool_)
  307. else:
  308. result = result.astype(np.object_)
  309. result[result == 1] = True
  310. result[result == 0] = False
  311. return result
  312. else:
  313. return result.astype(np.object_)
  314. return result
  315. # may need to change the dtype here
  316. return _possibly_downcast_to_dtype(result, dtype)
  317. def _try_operate(self, values):
  318. """ return a version to operate on as the input """
  319. return values
  320. def _try_coerce_args(self, values, other):
  321. """ provide coercion to our input arguments """
  322. return values, other
  323. def _try_coerce_result(self, result):
  324. """ reverse of try_coerce_args """
  325. return result
  326. def _try_coerce_and_cast_result(self, result, dtype=None):
  327. result = self._try_coerce_result(result)
  328. result = self._try_cast_result(result, dtype=dtype)
  329. return result
  330. def _try_fill(self, value):
  331. return value
  332. def to_native_types(self, slicer=None, na_rep='', **kwargs):
  333. """ convert to our native types format, slicing if desired """
  334. values = self.values
  335. if slicer is not None:
  336. values = values[:, slicer]
  337. values = np.array(values, dtype=object)
  338. mask = isnull(values)
  339. values[mask] = na_rep
  340. return values.tolist()
  341. # block actions ####
  342. def copy(self, deep=True):
  343. values = self.values
  344. if deep:
  345. values = values.copy()
  346. return make_block(values, ndim=self.ndim,
  347. klass=self.__class__, fastpath=True,
  348. placement=self.mgr_locs)
  349. def replace(self, to_replace, value, inplace=False, filter=None,
  350. regex=False):
  351. """ replace the to_replace value with value, possible to create new
  352. blocks here this is just a call to putmask. regex is not used here.
  353. It is used in ObjectBlocks. It is here for API
  354. compatibility."""
  355. mask = com.mask_missing(self.values, to_replace)
  356. if filter is not None:
  357. filtered_out = ~self.mgr_locs.isin(filter)
  358. mask[filtered_out.nonzero()[0]] = False
  359. if not mask.any():
  360. if inplace:
  361. return [self]
  362. return [self.copy()]
  363. return self.putmask(mask, value, inplace=inplace)
  364. def setitem(self, indexer, value):
  365. """ set the value inplace; return a new block (of a possibly different
  366. dtype)
  367. indexer is a direct slice/positional indexer; value must be a
  368. compatible shape
  369. """
  370. # coerce args
  371. values, value = self._try_coerce_args(self.values, value)
  372. arr_value = np.array(value)
  373. # cast the values to a type that can hold nan (if necessary)
  374. if not self._can_hold_element(value):
  375. dtype, _ = com._maybe_promote(arr_value.dtype)
  376. values = values.astype(dtype)
  377. transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
  378. values = transf(values)
  379. l = len(values)
  380. # length checking
  381. # boolean with truth values == len of the value is ok too
  382. if isinstance(indexer, (np.ndarray, list)):
  383. if is_list_like(value) and len(indexer) != len(value):
  384. if not (isinstance(indexer, np.ndarray) and
  385. indexer.dtype == np.bool_ and
  386. len(indexer[indexer]) == len(value)):
  387. raise ValueError("cannot set using a list-like indexer "
  388. "with a different length than the value")
  389. # slice
  390. elif isinstance(indexer, slice):
  391. if is_list_like(value) and l:
  392. if len(value) != _length_of_indexer(indexer, values):
  393. raise ValueError("cannot set using a slice indexer with a "
  394. "different length than the value")
  395. try:
  396. # setting a single element for each dim and with a rhs that could be say a list
  397. # GH 6043
  398. if arr_value.ndim == 1 and (
  399. np.isscalar(indexer) or (isinstance(indexer, tuple) and all([ np.isscalar(idx) for idx in indexer ]))):
  400. values[indexer] = value
  401. # if we are an exact match (ex-broadcasting),
  402. # then use the resultant dtype
  403. elif len(arr_value.shape) and arr_value.shape[0] == values.shape[0] and np.prod(arr_value.shape) == np.prod(values.shape):
  404. values[indexer] = value
  405. values = values.astype(arr_value.dtype)
  406. # set
  407. else:
  408. values[indexer] = value
  409. # coerce and try to infer the dtypes of the result
  410. if np.isscalar(value):
  411. dtype, _ = _infer_dtype_from_scalar(value)
  412. else:
  413. dtype = 'infer'
  414. values = self._try_coerce_and_cast_result(values, dtype)
  415. return [make_block(transf(values),
  416. ndim=self.ndim, placement=self.mgr_locs,
  417. fastpath=True)]
  418. except (ValueError, TypeError) as detail:
  419. raise
  420. except Exception as detail:
  421. pass
  422. return [self]
  423. def putmask(self, mask, new, align=True, inplace=False):
  424. """ putmask the data to the block; it is possible that we may create a
  425. new dtype of block
  426. return the resulting block(s)
  427. Parameters
  428. ----------
  429. mask : the condition to respect
  430. new : a ndarray/object
  431. align : boolean, perform alignment on other/cond, default is True
  432. inplace : perform inplace modification, default is False
  433. Returns
  434. -------
  435. a new block(s), the result of the putmask
  436. """
  437. new_values = self.values if inplace else self.values.copy()
  438. # may need to align the new
  439. if hasattr(new, 'reindex_axis'):
  440. new = new.values.T
  441. # may need to align the mask
  442. if hasattr(mask, 'reindex_axis'):
  443. mask = mask.values.T
  444. # if we are passed a scalar None, convert it here
  445. if not is_list_like(new) and isnull(new):
  446. new = self.fill_value
  447. if self._can_hold_element(new):
  448. new = self._try_cast(new)
  449. # pseudo-broadcast
  450. if isinstance(new, np.ndarray) and new.ndim == self.ndim - 1:
  451. new = np.repeat(new, self.shape[-1]).reshape(self.shape)
  452. np.putmask(new_values, mask, new)
  453. # maybe upcast me
  454. elif mask.any():
  455. # need to go column by column
  456. new_blocks = []
  457. if self.ndim > 1:
  458. for i, ref_loc in enumerate(self.mgr_locs):
  459. m = mask[i]
  460. v = new_values[i]
  461. # need a new block
  462. if m.any():
  463. n = new[i] if isinstance(
  464. new, np.ndarray) else np.array(new)
  465. # type of the new block
  466. dtype, _ = com._maybe_promote(n.dtype)
  467. # we need to exiplicty astype here to make a copy
  468. n = n.astype(dtype)
  469. nv = _putmask_smart(v, m, n)
  470. else:
  471. nv = v if inplace else v.copy()
  472. # Put back the dimension that was taken from it and make
  473. # a block out of the result.
  474. block = make_block(values=nv[np.newaxis],
  475. placement=[ref_loc],
  476. fastpath=True)
  477. new_blocks.append(block)
  478. else:
  479. nv = _putmask_smart(new_values, mask, new)
  480. new_blocks.append(make_block(values=nv,
  481. placement=self.mgr_locs,
  482. fastpath=True))
  483. return new_blocks
  484. if inplace:
  485. return [self]
  486. return [make_block(new_values,
  487. placement=self.mgr_locs, fastpath=True)]
  488. def interpolate(self, method='pad', axis=0, index=None,
  489. values=None, inplace=False, limit=None,
  490. fill_value=None, coerce=False, downcast=None, **kwargs):
  491. def check_int_bool(self, inplace):
  492. # Only FloatBlocks will contain NaNs.
  493. # timedelta subclasses IntBlock
  494. if (self.is_bool or self.is_integer) and not self.is_timedelta:
  495. if inplace:
  496. return self
  497. else:
  498. return self.copy()
  499. # a fill na type method
  500. try:
  501. m = com._clean_fill_method(method)
  502. except:
  503. m = None
  504. if m is not None:
  505. r = check_int_bool(self, inplace)
  506. if r is not None:
  507. return r
  508. return self._interpolate_with_fill(method=m,
  509. axis=axis,
  510. inplace=inplace,
  511. limit=limit,
  512. fill_value=fill_value,
  513. coerce=coerce,
  514. downcast=downcast)
  515. # try an interp method
  516. try:
  517. m = com._clean_interp_method(method, **kwargs)
  518. except:
  519. m = None
  520. if m is not None:
  521. r = check_int_bool(self, inplace)
  522. if r is not None:
  523. return r
  524. return self._interpolate(method=m,
  525. index=index,
  526. values=values,
  527. axis=axis,
  528. limit=limit,
  529. fill_value=fill_value,
  530. inplace=inplace,
  531. downcast=downcast,
  532. **kwargs)
  533. raise ValueError("invalid method '{0}' to interpolate.".format(method))
  534. def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
  535. limit=None, fill_value=None, coerce=False,
  536. downcast=None):
  537. """ fillna but using the interpolate machinery """
  538. # if we are coercing, then don't force the conversion
  539. # if the block can't hold the type
  540. if coerce:
  541. if not self._can_hold_na:
  542. if inplace:
  543. return [self]
  544. else:
  545. return [self.copy()]
  546. fill_value = self._try_fill(fill_value)
  547. values = self.values if inplace else self.values.copy()
  548. values = self._try_operate(values)
  549. values = com.interpolate_2d(values,
  550. method=method,
  551. axis=axis,
  552. limit=limit,
  553. fill_value=fill_value,
  554. dtype=self.dtype)
  555. values = self._try_coerce_result(values)
  556. blocks = [make_block(values,
  557. ndim=self.ndim, klass=self.__class__,
  558. fastpath=True, placement=self.mgr_locs)]
  559. return self._maybe_downcast(blocks, downcast)
  560. def _interpolate(self, method=None, index=None, values=None,
  561. fill_value=None, axis=0, limit=None,
  562. inplace=False, downcast=None, **kwargs):
  563. """ interpolate using scipy wrappers """
  564. data = self.values if inplace else self.values.copy()
  565. # only deal with floats
  566. if not self.is_float:
  567. if not self.is_integer:
  568. return self
  569. data = data.astype(np.float64)
  570. if fill_value is None:
  571. fill_value = self.fill_value
  572. if method in ('krogh', 'piecewise_polynomial', 'pchip'):
  573. if not index.is_monotonic:
  574. raise ValueError("{0} interpolation requires that the "
  575. "index be monotonic.".format(method))
  576. # process 1-d slices in the axis direction
  577. def func(x):
  578. # process a 1-d slice, returning it
  579. # should the axis argument be handled below in apply_along_axis?
  580. # i.e. not an arg to com.interpolate_1d
  581. return com.interpolate_1d(index, x, method=method, limit=limit,
  582. fill_value=fill_value,
  583. bounds_error=False, **kwargs)
  584. # interp each column independently
  585. interp_values = np.apply_along_axis(func, axis, data)
  586. blocks = [make_block(interp_values,
  587. ndim=self.ndim, klass=self.__class__,
  588. fastpath=True, placement=self.mgr_locs)]
  589. return self._maybe_downcast(blocks, downcast)
  590. def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
  591. """
  592. Take values according to indexer and return them as a block.bb
  593. """
  594. if fill_tuple is None:
  595. fill_value = self.fill_value
  596. new_values = com.take_nd(self.get_values(), indexer, axis=axis,
  597. allow_fill=False)
  598. else:
  599. fill_value = fill_tuple[0]
  600. new_values = com.take_nd(self.get_values(), indexer, axis=axis,
  601. allow_fill=True, fill_value=fill_value)
  602. if new_mgr_locs is None:
  603. if axis == 0:
  604. slc = lib.indexer_as_slice(indexer)
  605. if slc is not None:
  606. new_mgr_locs = self.mgr_locs[slc]
  607. else:
  608. new_mgr_locs = self.mgr_locs[indexer]
  609. else:
  610. new_mgr_locs = self.mgr_locs
  611. if new_values.dtype != self.dtype:
  612. return make_block(new_values, new_mgr_locs)
  613. else:
  614. return self.make_block_same_class(new_values, new_mgr_locs)
  615. def get_values(self, dtype=None):
  616. return self.values
  617. def diff(self, n):
  618. """ return block for the diff of the values """
  619. new_values = com.diff(self.values, n, axis=1)
  620. return [make_block(values=new_values,
  621. ndim=self.ndim, fastpath=True,
  622. placement=self.mgr_locs)]
  623. def shift(self, periods, axis=0):
  624. """ shift the block by periods, possibly upcast """
  625. # convert integer to float if necessary. need to do a lot more than
  626. # that, handle boolean etc also
  627. new_values, fill_value = com._maybe_upcast(self.values)
  628. # make sure array sent to np.roll is c_contiguous
  629. f_ordered = new_values.flags.f_contiguous
  630. if f_ordered:
  631. new_values = new_values.T
  632. axis = new_values.ndim - axis - 1
  633. new_values = np.roll(new_values, periods, axis=axis)
  634. axis_indexer = [ slice(None) ] * self.ndim
  635. if periods > 0:
  636. axis_indexer[axis] = slice(None,periods)
  637. else:
  638. axis_indexer[axis] = slice(periods,None)
  639. new_values[tuple(axis_indexer)] = fill_value
  640. # restore original order
  641. if f_ordered:
  642. new_values = new_values.T
  643. return [make_block(new_values,
  644. ndim=self.ndim, fastpath=True,
  645. placement=self.mgr_locs)]
  646. def eval(self, func, other, raise_on_error=True, try_cast=False):
  647. """
  648. evaluate the block; return result block from the result
  649. Parameters
  650. ----------
  651. func : how to combine self, other
  652. other : a ndarray/object
  653. raise_on_error : if True, raise when I can't perform the function,
  654. False by default (and just return the data that we had coming in)
  655. Returns
  656. -------
  657. a new block, the result of the func
  658. """
  659. values = self.values
  660. if hasattr(other, 'reindex_axis'):
  661. other = other.values
  662. # make sure that we can broadcast
  663. is_transposed = False
  664. if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
  665. if values.ndim != other.ndim:
  666. is_transposed = True
  667. else:
  668. if values.shape == other.shape[::-1]:
  669. is_transposed = True
  670. elif values.shape[0] == other.shape[-1]:
  671. is_transposed = True
  672. else:
  673. # this is a broadcast error heree
  674. raise ValueError("cannot broadcast shape [%s] with block "
  675. "values [%s]" % (values.T.shape,
  676. other.shape))
  677. transf = (lambda x: x.T) if is_transposed else (lambda x: x)
  678. # coerce/transpose the args if needed
  679. values, other = self._try_coerce_args(transf(values), other)
  680. # get the result, may need to transpose the other
  681. def get_result(other):
  682. return self._try_coerce_result(func(values, other))
  683. # error handler if we have an issue operating with the function
  684. def handle_error():
  685. if raise_on_error:
  686. raise TypeError('Could not operate %s with block values %s'
  687. % (repr(other), str(detail)))
  688. else:
  689. # return the values
  690. result = np.empty(values.shape, dtype='O')
  691. result.fill(np.nan)
  692. return result
  693. # get the result
  694. try:
  695. result = get_result(other)
  696. # if we have an invalid shape/broadcast error
  697. # GH4576, so raise instead of allowing to pass through
  698. except ValueError as detail:
  699. raise
  700. except Exception as detail:
  701. result = handle_error()
  702. # technically a broadcast error in numpy can 'work' by returning a
  703. # boolean False
  704. if not isinstance(result, np.ndarray):
  705. if not isinstance(result, np.ndarray):
  706. # differentiate between an invalid ndarray-ndarray comparison
  707. # and an invalid type comparison
  708. if isinstance(values, np.ndarray) and is_list_like(other):
  709. raise ValueError('Invalid broadcasting comparison [%s] '
  710. 'with block values' % repr(other))
  711. raise TypeError('Could not compare [%s] with block values'
  712. % repr(other))
  713. # transpose if needed
  714. result = transf(result)
  715. # try to cast if requested
  716. if try_cast:
  717. result = self._try_cast_result(result)
  718. return [make_block(result, ndim=self.ndim,
  719. fastpath=True, placement=self.mgr_locs)]
  720. def where(self, other, cond, align=True, raise_on_error=True,
  721. try_cast=False):
  722. """
  723. evaluate the block; return result block(s) from the result
  724. Parameters
  725. ----------
  726. other : a ndarray/object
  727. cond : the condition to respect
  728. align : boolean, perform alignment on other/cond
  729. raise_on_error : if True, raise when I can't perform the function,
  730. False by default (and just return the data that we had coming in)
  731. Returns
  732. -------
  733. a new block(s), the result of the func
  734. """
  735. values = self.values
  736. # see if we can align other
  737. if hasattr(other, 'reindex_axis'):
  738. other = other.values
  739. # make sure that we can broadcast
  740. is_transposed = False
  741. if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
  742. if values.ndim != other.ndim or values.shape == other.shape[::-1]:
  743. # if its symmetric are ok, no reshaping needed (GH 7506)
  744. if (values.shape[0] == np.array(values.shape)).all():
  745. pass
  746. # pseodo broadcast (its a 2d vs 1d say and where needs it in a
  747. # specific direction)
  748. elif (other.ndim >= 1 and values.ndim - 1 == other.ndim and
  749. values.shape[0] != other.shape[0]):
  750. other = _block_shape(other).T
  751. else:
  752. values = values.T
  753. is_transposed = True
  754. # see if we can align cond
  755. if not hasattr(cond, 'shape'):
  756. raise ValueError(
  757. "where must have a condition that is ndarray like")
  758. if hasattr(cond, 'reindex_axis'):
  759. cond = cond.values
  760. # may need to undo transpose of values
  761. if hasattr(values, 'ndim'):
  762. if values.ndim != cond.ndim or values.shape == cond.shape[::-1]:
  763. values = values.T
  764. is_transposed = not is_transposed
  765. # our where function
  766. def func(c, v, o):
  767. if c.ravel().all():
  768. return v
  769. v, o = self._try_coerce_args(v, o)
  770. try:
  771. return self._try_coerce_result(
  772. expressions.where(c, v, o, raise_on_error=True)
  773. )
  774. except Exception as detail:
  775. if raise_on_error:
  776. raise TypeError('Could not operate [%s] with block values '
  777. '[%s]' % (repr(o), str(detail)))
  778. else:
  779. # return the values
  780. result = np.empty(v.shape, dtype='float64')
  781. result.fill(np.nan)
  782. return result
  783. # see if we can operate on the entire block, or need item-by-item
  784. # or if we are a single block (ndim == 1)
  785. result = func(cond, values, other)
  786. if self._can_hold_na or self.ndim == 1:
  787. if not isinstance(result, np.ndarray):
  788. raise TypeError('Could not compare [%s] with block values'
  789. % repr(other))
  790. if is_transposed:
  791. result = result.T
  792. # try to cast if requested
  793. if try_cast:
  794. result = self._try_cast_result(result)
  795. return make_block(result,
  796. ndim=self.ndim, placement=self.mgr_locs)
  797. # might need to separate out blocks
  798. axis = cond.ndim - 1
  799. cond = cond.swapaxes(axis, 0)
  800. mask = np.array([cond[i].all() for i in range(cond.shape[0])],
  801. dtype=bool)
  802. result_blocks = []
  803. for m in [mask, ~mask]:
  804. if m.any():
  805. r = self._try_cast_result(
  806. result.take(m.nonzero()[0], axis=axis))
  807. result_blocks.append(make_block(r.T,
  808. placement=self.mgr_locs[m]))
  809. return result_blocks
  810. def equals(self, other):
  811. if self.dtype != other.dtype or self.shape != other.shape: return False
  812. return np.array_equal(self.values, other.values)
  813. class NumericBlock(Block):
  814. __slots__ = ()
  815. is_numeric = True
  816. _can_hold_na = True
  817. class FloatOrComplexBlock(NumericBlock):
  818. __slots__ = ()
  819. def equals(self, other):
  820. if self.dtype != other.dtype or self.shape != other.shape: return False
  821. left, right = self.values, other.values
  822. return ((left == right) | (np.isnan(left) & np.isnan(right))).all()
  823. class FloatBlock(FloatOrComplexBlock):
  824. __slots__ = ()
  825. is_float = True
  826. _downcast_dtype = 'int64'
  827. def _can_hold_element(self, element):
  828. if is_list_like(element):
  829. element = np.array(element)
  830. tipo = element.dtype.type
  831. return issubclass(tipo, (np.floating, np.integer)) and not issubclass(
  832. tipo, (np.datetime64, np.timedelta64))
  833. return isinstance(element, (float, int, np.float_, np.int_)) and not isinstance(
  834. element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64))
  835. def _try_cast(self, element):
  836. try:
  837. return float(element)
  838. except: # pragma: no cover
  839. return element
  840. def to_native_types(self, slicer=None, na_rep='', float_format=None,
  841. **kwargs):
  842. """ convert to our native types format, slicing if desired """
  843. values = self.values
  844. if slicer is not None:
  845. values = values[:, slicer]
  846. values = np.array(values, dtype=object)
  847. mask = isnull(values)
  848. values[mask] = na_rep
  849. if float_format:
  850. imask = (~mask).ravel()
  851. values.flat[imask] = np.array(
  852. [float_format % val for val in values.ravel()[imask]])
  853. return values.tolist()
  854. def should_store(self, value):
  855. # when inserting a column should not coerce integers to floats
  856. # unnecessarily
  857. return (issubclass(value.dtype.type, np.floating) and
  858. value.dtype == self.dtype)
  859. class ComplexBlock(FloatOrComplexBlock):
  860. __slots__ = ()
  861. is_complex = True
  862. def _can_hold_element(self, element):
  863. if is_list_like(element):
  864. element = np.array(element)
  865. return issubclass(element.dtype.type, (np.floating, np.integer, np.complexfloating))
  866. return (isinstance(element, (float, int, complex, np.float_, np.int_)) and
  867. not isinstance(bool, np.bool_))
  868. def _try_cast(self, element):
  869. try:
  870. return complex(element)
  871. except: # pragma: no cover
  872. return element
  873. def should_store(self, value):
  874. return issubclass(value.dtype.type, np.complexfloating)
  875. class IntBlock(NumericBlock):
  876. __slots__ = ()
  877. is_integer = True
  878. _can_hold_na = False
  879. def _can_hold_element(self, element):
  880. if is_list_like(element):
  881. element = np.array(element)
  882. tipo = element.dtype.type
  883. return issubclass(tipo, np.integer) and not issubclass(tipo, (np.datetime64, np.timedelta64))
  884. return com.is_integer(element)
  885. def _try_cast(self, element):
  886. try:
  887. return int(element)
  888. except: # pragma: no cover
  889. return element
  890. def should_store(self, value):
  891. return com.is_integer_dtype(value) and value.dtype == self.dtype
  892. class TimeDeltaBlock(IntBlock):
  893. __slots__ = ()
  894. is_timedelta = True
  895. _can_hold_na = True
  896. is_numeric = False
  897. @property
  898. def fill_value(self):
  899. return tslib.iNaT
  900. def _try_fill(self, value):
  901. """ if we are a NaT, return the actual fill value """
  902. if isinstance(value, type(tslib.NaT)) or np.array(isnull(value)).all():
  903. value = tslib.iNaT
  904. elif isinstance(value, np.timedelta64):
  905. pass
  906. elif com.is_integer(value):
  907. # coerce to seconds of timedelta
  908. value = np.timedelta64(int(value * 1e9))
  909. elif isinstance(value, timedelta):
  910. value = np.timedelta64(value)
  911. return value
  912. def _try_coerce_args(self, values, other):
  913. """ provide coercion to our input arguments
  914. we are going to compare vs i8, so coerce to floats
  915. repring NaT with np.nan so nans propagate
  916. values is always ndarray like, other may not be """
  917. def masker(v):
  918. mask = isnull(v)
  919. v = v.view('i8').astype('float64')
  920. v[mask] = np.nan
  921. return v
  922. values = masker(values)
  923. if _is_null_datelike_scalar(other):
  924. other = np.nan
  925. elif isinstance(other, np.timedelta64):
  926. other = _coerce_scalar_to_timedelta_type(other, unit='s').item()
  927. if other == tslib.iNaT:
  928. other = np.nan
  929. else:
  930. other = masker(other)
  931. return values, other
  932. def _try_operate(self, values):
  933. """ return a version to operate on """
  934. return values.view('i8')
  935. def _try_coerce_result(self, result):
  936. """ reverse of try_coerce_args / try_operate """
  937. if isinstance(result, np.ndarray):
  938. mask = isnull(result)
  939. if result.dtype.kind in ['i', 'f', 'O']:
  940. result = result.astype('m8[ns]')
  941. result[mask] = tslib.iNaT
  942. elif isinstance(result, np.integer):
  943. result = np.timedelta64(result)
  944. return result
  945. def should_store(self, value):
  946. return issubclass(value.dtype.type, np.timedelta64)
  947. def to_native_types(self, slicer=None, na_rep=None, **kwargs):
  948. """ convert to our native types format, slicing if desired """
  949. values = self.values
  950. if slicer is not None:
  951. values = values[:, slicer]
  952. mask = isnull(values)
  953. rvalues = np.empty(values.shape, dtype=object)
  954. if na_rep is None:
  955. na_rep = 'NaT'
  956. rvalues[mask] = na_rep
  957. imask = (~mask).ravel()
  958. rvalues.flat[imask] = np.array([lib.repr_timedelta64(val)
  959. for val in values.ravel()[imask]],
  960. dtype=object)
  961. return rvalues.tolist()
  962. class BoolBlock(NumericBlock):
  963. __slots__ = ()
  964. is_bool = True
  965. _can_hold_na = False
  966. def _can_hold_element(self, element):
  967. if is_list_like(element):
  968. element = np.array(element)
  969. return issubclass(element.dtype.type, np.integer)
  970. return isinstance(element, (int, bool))
  971. def _try_cast(self, element):
  972. try:
  973. return bool(element)
  974. except: # pragma: no cover
  975. return element
  976. def should_store(self, value):
  977. return issubclass(value.dtype.type, np.bool_)
  978. def replace(self, to_replace, value, inplace=False, filter=None,
  979. regex=False):
  980. to_replace_values = np.atleast_1d(to_replace)
  981. if not np.can_cast(to_replace_values, bool):
  982. return self
  983. return super(BoolBlock, self).replace(to_replace, value,
  984. inplace=inplace, filter=filter,
  985. regex=regex)
  986. class ObjectBlock(Block):
  987. __slots__ = ()
  988. is_object = True
  989. _can_hold_na = True
  990. def __init__(self, values, ndim=2, fastpath=False,
  991. placement=None):
  992. if issubclass(values.dtype.type, compat.string_types):
  993. values = np.array(values, dtype=object)
  994. super(ObjectBlock, self).__init__(values, ndim=ndim,
  995. fastpath=fastpath,
  996. placement=placement)
  997. @property
  998. def is_bool(self):
  999. """ we can be a bool if we have only bool values but are of type
  1000. object
  1001. """
  1002. return lib.is_bool_array(self.values.ravel())
  1003. def convert(self, convert_dates=True, convert_numeric=True, convert_timedeltas=True,
  1004. copy=True, by_item=True):
  1005. """ attempt to coerce any object types to better types
  1006. return a copy of the block (if copy = True)
  1007. by definition we ARE an ObjectBlock!!!!!
  1008. can return multiple blocks!
  1009. """
  1010. # attempt to create new type blocks
  1011. blocks = []
  1012. if by_item and not self._is_single_block:
  1013. for i, rl in enumerate(self.mgr_locs):
  1014. values = self.iget(i)
  1015. values = com._possibly_convert_objects(
  1016. values.ravel(), convert_dates=convert_dates,
  1017. convert_numeric=convert_numeric,
  1018. convert_timedeltas=convert_timedeltas,
  1019. ).reshape(values.shape)
  1020. values = _block_shape(values, ndim=self.ndim)
  1021. newb = make_block(values,
  1022. ndim=self.ndim, placement=[rl])
  1023. blocks.append(newb)
  1024. else:
  1025. values = com._possibly_convert_objects(
  1026. self.values.ravel(), convert_dates=convert_dates,
  1027. convert_numeric=convert_numeric
  1028. ).reshape(self.values.shape)
  1029. blocks.append(make_block(values,
  1030. ndim=self.ndim, placement=self.mgr_locs))
  1031. return blocks
  1032. def set(self, locs, values, check=False):
  1033. """
  1034. Modify Block in-place with new item value
  1035. Returns
  1036. -------
  1037. None
  1038. """
  1039. # GH6026
  1040. if check:
  1041. try:
  1042. if (self.values[locs] == values).all():
  1043. return
  1044. except:
  1045. pass
  1046. try:
  1047. self.values[locs] = values
  1048. except (ValueError):
  1049. # broadcasting error
  1050. # see GH6171
  1051. new_shape = list(values.shape)
  1052. new_shape[0] = len(self.items)
  1053. self.values = np.empty(tuple(new_shape),dtype=self.dtype)
  1054. self.values.fill(np.nan)
  1055. self.values[locs] = values
  1056. def _maybe_downcast(self, blocks, downcast=None):
  1057. if downcast is not None:
  1058. return blocks
  1059. # split and convert the blocks
  1060. result_blocks = []
  1061. for blk in blocks:
  1062. result_blocks.extend(blk.convert(convert_dates=True,
  1063. convert_numeric=False))
  1064. return result_blocks
  1065. def _can_hold_element(self, element):
  1066. return True
  1067. def _try_cast(self, element):
  1068. return element
  1069. def should_store(self, value):
  1070. return not issubclass(value.dtype.type,
  1071. (np.integer, np.floating, np.complexfloating,
  1072. np.datetime64, np.bool_))
  1073. def replace(self, to_replace, value, inplace=False, filter=None,
  1074. regex=False):
  1075. blk = [self]
  1076. to_rep_is_list = com.is_list_like(to_replace)
  1077. value_is_list = com.is_list_like(value)
  1078. both_lists = to_rep_is_list and value_is_list
  1079. either_list = to_rep_is_list or value_is_list
  1080. if not either_list and com.is_re(to_replace):
  1081. blk[0], = blk[0]._replace_single(to_replace, value,
  1082. inplace=inplace, filter=filter,
  1083. regex=True)
  1084. elif not (either_list or regex):
  1085. blk = super(ObjectBlock, self).replace(to_replace, value,
  1086. inplace=inplace,
  1087. filter=filter, regex=regex)
  1088. elif both_lists:
  1089. for to_rep, v in zip(to_replace, value):
  1090. blk[0], = blk[0]._replace_single(to_rep, v, inplace=inplace,
  1091. filter=filter, regex=regex)
  1092. elif to_rep_is_list and regex:
  1093. for to_rep in to_replace:
  1094. blk[0], = blk[0]._replace_single(to_rep, value,
  1095. inplace=inplace,
  1096. filter=filter, regex=regex)
  1097. else:
  1098. blk[0], = blk[0]._replace_single(to_replace, value,
  1099. inplace=inplace, filter=filter,
  1100. regex=regex)
  1101. return blk
  1102. def _replace_single(self, to_replace, value, inplace=False, filter=None,
  1103. regex=False):
  1104. # to_replace is regex compilable
  1105. to_rep_re = regex and com.is_re_compilable(to_replace)
  1106. # regex is regex compilable
  1107. regex_re = com.is_re_compilable(regex)
  1108. # only one will survive
  1109. if to_rep_re and regex_re:
  1110. raise AssertionError('only one of to_replace and regex can be '
  1111. 'regex compilable')
  1112. # if regex was passed as something that can be a regex (rather than a
  1113. # boolean)
  1114. if regex_re:
  1115. to_replace = regex
  1116. regex = regex_re or to_rep_re
  1117. # try to get the pattern attribute (compiled re) or it's a string
  1118. try:
  1119. pattern = to_replace.pattern
  1120. except AttributeError:
  1121. pattern = to_replace
  1122. # if the pattern is not empty and to_replace is either a string or a
  1123. # regex
  1124. if regex and pattern:
  1125. rx = re.compile(to_replace)
  1126. else:
  1127. # if the thing to replace is not a string or compiled regex call
  1128. # the superclass method -> to_replace is some kind of object
  1129. result = super(ObjectBlock, self).replace(to_replace, value,
  1130. inplace=inplace,
  1131. filter=filter,
  1132. regex=regex)
  1133. if not isinstance(result, list):
  1134. result = [result]
  1135. return result
  1136. new_values = self.values if inplace else self.values.copy()
  1137. # deal with replacing values with objects (strings) that match but
  1138. # whose replacement is not a string (numeric, nan, object)
  1139. if isnull(value) or not isinstance(value, compat.string_types):
  1140. def re_replacer(s):
  1141. try:
  1142. return value if rx.search(s) is not None else s
  1143. except TypeError:
  1144. return s
  1145. else:
  1146. # value is guaranteed to

Large files files are truncated, but you can click here to view the full file