/pandas/core/internals.py
Python | 5218 lines | 4262 code | 469 blank | 487 comment | 452 complexity | 791ba33868e7ff745da90d2b43722e0a MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- import copy
- import itertools
- import re
- import operator
- from datetime import datetime, timedelta, date
- from collections import defaultdict
- import numpy as np
- from numpy import percentile as _quantile
- from pandas.core.base import PandasObject
- from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
- from pandas.types.common import (_TD_DTYPE, _NS_DTYPE,
- _ensure_int64, _ensure_platform_int,
- is_integer,
- is_dtype_equal,
- is_timedelta64_dtype,
- is_datetime64_dtype, is_datetimetz, is_sparse,
- is_categorical, is_categorical_dtype,
- is_integer_dtype,
- is_datetime64tz_dtype,
- is_object_dtype,
- is_datetimelike_v_numeric,
- is_numeric_v_string_like, is_extension_type,
- is_list_like,
- is_re,
- is_re_compilable,
- is_scalar,
- _get_dtype)
- from pandas.types.cast import (_possibly_downcast_to_dtype,
- _maybe_convert_string_to_object,
- _maybe_upcast,
- _maybe_convert_scalar, _maybe_promote,
- _infer_dtype_from_scalar,
- _soft_convert_objects,
- _possibly_convert_objects,
- _astype_nansafe,
- _find_common_type)
- from pandas.types.missing import (isnull, array_equivalent,
- _is_na_compat,
- is_null_datelike_scalar)
- import pandas.types.concat as _concat
- from pandas.types.generic import ABCSeries
- from pandas.core.common import is_null_slice
- import pandas.core.algorithms as algos
- from pandas.core.index import Index, MultiIndex, _ensure_index
- from pandas.core.indexing import maybe_convert_indices, length_of_indexer
- from pandas.core.categorical import Categorical, maybe_to_categorical
- from pandas.tseries.index import DatetimeIndex
- from pandas.formats.printing import pprint_thing
- import pandas.core.missing as missing
- from pandas.sparse.array import _maybe_to_sparse, SparseArray
- import pandas.lib as lib
- import pandas.tslib as tslib
- import pandas.computation.expressions as expressions
- from pandas.util.decorators import cache_readonly
- from pandas.tslib import Timedelta
- from pandas import compat, _np_version_under1p9
- from pandas.compat import range, map, zip, u
- from pandas.lib import BlockPlacement
- class Block(PandasObject):
- """
- Canonical n-dimensional unit of homogeneous dtype contained in a pandas
- data structure
- Index-ignorant; let the container take care of that
- """
- __slots__ = ['_mgr_locs', 'values', 'ndim']
- is_numeric = False
- is_float = False
- is_integer = False
- is_complex = False
- is_datetime = False
- is_datetimetz = False
- is_timedelta = False
- is_bool = False
- is_object = False
- is_categorical = False
- is_sparse = False
- _box_to_block_values = True
- _can_hold_na = False
- _downcast_dtype = None
- _can_consolidate = True
- _verify_integrity = True
- _validate_ndim = True
- _ftype = 'dense'
- _holder = None
- def __init__(self, values, placement, ndim=None, fastpath=False):
- if ndim is None:
- ndim = values.ndim
- elif values.ndim != ndim:
- raise ValueError('Wrong number of dimensions')
- self.ndim = ndim
- self.mgr_locs = placement
- self.values = values
- if ndim and len(self.mgr_locs) != len(self.values):
- raise ValueError('Wrong number of items passed %d, placement '
- 'implies %d' % (len(self.values),
- len(self.mgr_locs)))
- @property
- def _consolidate_key(self):
- return (self._can_consolidate, self.dtype.name)
- @property
- def _is_single_block(self):
- return self.ndim == 1
- @property
- def is_view(self):
- """ return a boolean if I am possibly a view """
- return self.values.base is not None
- @property
- def is_datelike(self):
- """ return True if I am a non-datelike """
- return self.is_datetime or self.is_timedelta
- def is_categorical_astype(self, dtype):
- """
- validate that we have a astypeable to categorical,
- returns a boolean if we are a categorical
- """
- if is_categorical_dtype(dtype):
- if dtype == CategoricalDtype():
- return True
- # this is a pd.Categorical, but is not
- # a valid type for astypeing
- raise TypeError("invalid type {0} for astype".format(dtype))
- return False
- def external_values(self, dtype=None):
- """ return an outside world format, currently just the ndarray """
- return self.values
- def internal_values(self, dtype=None):
- """ return an internal format, currently just the ndarray
- this should be the pure internal API format
- """
- return self.values
- def get_values(self, dtype=None):
- """
- return an internal format, currently just the ndarray
- this is often overriden to handle to_dense like operations
- """
- if is_object_dtype(dtype):
- return self.values.astype(object)
- return self.values
- def to_dense(self):
- return self.values.view()
- def to_object_block(self, mgr):
- """ return myself as an object block """
- values = self.get_values(dtype=object)
- return self.make_block(values, klass=ObjectBlock)
- @property
- def _na_value(self):
- return np.nan
- @property
- def fill_value(self):
- return np.nan
- @property
- def mgr_locs(self):
- return self._mgr_locs
- @property
- def array_dtype(self):
- """ the dtype to return if I want to construct this block as an
- array
- """
- return self.dtype
- def make_block(self, values, placement=None, ndim=None, **kwargs):
- """
- Create a new block, with type inference propagate any values that are
- not specified
- """
- if placement is None:
- placement = self.mgr_locs
- if ndim is None:
- ndim = self.ndim
- return make_block(values, placement=placement, ndim=ndim, **kwargs)
- def make_block_scalar(self, values, **kwargs):
- """
- Create a ScalarBlock
- """
- return ScalarBlock(values)
- def make_block_same_class(self, values, placement=None, fastpath=True,
- **kwargs):
- """ Wrap given values in a block of same type as self. """
- if placement is None:
- placement = self.mgr_locs
- return make_block(values, placement=placement, klass=self.__class__,
- fastpath=fastpath, **kwargs)
- @mgr_locs.setter
- def mgr_locs(self, new_mgr_locs):
- if not isinstance(new_mgr_locs, BlockPlacement):
- new_mgr_locs = BlockPlacement(new_mgr_locs)
- self._mgr_locs = new_mgr_locs
- def __unicode__(self):
- # don't want to print out all of the items here
- name = pprint_thing(self.__class__.__name__)
- if self._is_single_block:
- result = '%s: %s dtype: %s' % (name, len(self), self.dtype)
- else:
- shape = ' x '.join([pprint_thing(s) for s in self.shape])
- result = '%s: %s, %s, dtype: %s' % (name, pprint_thing(
- self.mgr_locs.indexer), shape, self.dtype)
- return result
- def __len__(self):
- return len(self.values)
- def __getstate__(self):
- return self.mgr_locs.indexer, self.values
- def __setstate__(self, state):
- self.mgr_locs = BlockPlacement(state[0])
- self.values = state[1]
- self.ndim = self.values.ndim
- def _slice(self, slicer):
- """ return a slice of my values """
- return self.values[slicer]
- def reshape_nd(self, labels, shape, ref_items, mgr=None):
- """
- Parameters
- ----------
- labels : list of new axis labels
- shape : new shape
- ref_items : new ref_items
- return a new block that is transformed to a nd block
- """
- return _block2d_to_blocknd(values=self.get_values().T,
- placement=self.mgr_locs, shape=shape,
- labels=labels, ref_items=ref_items)
- def getitem_block(self, slicer, new_mgr_locs=None):
- """
- Perform __getitem__-like, return result as block.
- As of now, only supports slices that preserve dimensionality.
- """
- if new_mgr_locs is None:
- if isinstance(slicer, tuple):
- axis0_slicer = slicer[0]
- else:
- axis0_slicer = slicer
- new_mgr_locs = self.mgr_locs[axis0_slicer]
- new_values = self._slice(slicer)
- if self._validate_ndim and new_values.ndim != self.ndim:
- raise ValueError("Only same dim slicing is allowed")
- return self.make_block_same_class(new_values, new_mgr_locs)
- @property
- def shape(self):
- return self.values.shape
- @property
- def itemsize(self):
- return self.values.itemsize
- @property
- def dtype(self):
- return self.values.dtype
- @property
- def ftype(self):
- return "%s:%s" % (self.dtype, self._ftype)
- def merge(self, other):
- return _merge_blocks([self, other])
- def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
- limit=None, mask_info=None):
- """
- Reindex using pre-computed indexer information
- """
- if axis < 1:
- raise AssertionError('axis must be at least 1, got %d' % axis)
- if fill_value is None:
- fill_value = self.fill_value
- new_values = algos.take_nd(self.values, indexer, axis,
- fill_value=fill_value, mask_info=mask_info)
- return self.make_block(new_values, fastpath=True)
- def get(self, item):
- loc = self.items.get_loc(item)
- return self.values[loc]
- def iget(self, i):
- return self.values[i]
- def set(self, locs, values, check=False):
- """
- Modify Block in-place with new item value
- Returns
- -------
- None
- """
- self.values[locs] = values
- def delete(self, loc):
- """
- Delete given loc(-s) from block in-place.
- """
- self.values = np.delete(self.values, loc, 0)
- self.mgr_locs = self.mgr_locs.delete(loc)
- def apply(self, func, mgr=None, **kwargs):
- """ apply the function to my values; return a block if we are not
- one
- """
- result = func(self.values, **kwargs)
- if not isinstance(result, Block):
- result = self.make_block(values=_block_shape(result,
- ndim=self.ndim))
- return result
- def fillna(self, value, limit=None, inplace=False, downcast=None,
- mgr=None):
- """ fillna on the block with the value. If we fail, then convert to
- ObjectBlock and try again
- """
- if not self._can_hold_na:
- if inplace:
- return self
- else:
- return self.copy()
- original_value = value
- mask = isnull(self.values)
- if limit is not None:
- if self.ndim > 2:
- raise NotImplementedError("number of dimensions for 'fillna' "
- "is currently limited to 2")
- mask[mask.cumsum(self.ndim - 1) > limit] = False
- # fillna, but if we cannot coerce, then try again as an ObjectBlock
- try:
- values, _, value, _ = self._try_coerce_args(self.values, value)
- blocks = self.putmask(mask, value, inplace=inplace)
- blocks = [b.make_block(values=self._try_coerce_result(b.values))
- for b in blocks]
- return self._maybe_downcast(blocks, downcast)
- except (TypeError, ValueError):
- # we can't process the value, but nothing to do
- if not mask.any():
- return self if inplace else self.copy()
- # we cannot coerce the underlying object, so
- # make an ObjectBlock
- return self.to_object_block(mgr=mgr).fillna(original_value,
- limit=limit,
- inplace=inplace,
- downcast=False)
- def _maybe_downcast(self, blocks, downcast=None):
- # no need to downcast our float
- # unless indicated
- if downcast is None and self.is_float:
- return blocks
- elif downcast is None and (self.is_timedelta or self.is_datetime):
- return blocks
- return _extend_blocks([b.downcast(downcast) for b in blocks])
- def downcast(self, dtypes=None, mgr=None):
- """ try to downcast each item to the dict of dtypes if present """
- # turn it off completely
- if dtypes is False:
- return self
- values = self.values
- # single block handling
- if self._is_single_block:
- # try to cast all non-floats here
- if dtypes is None:
- dtypes = 'infer'
- nv = _possibly_downcast_to_dtype(values, dtypes)
- return self.make_block(nv, fastpath=True)
- # ndim > 1
- if dtypes is None:
- return self
- if not (dtypes == 'infer' or isinstance(dtypes, dict)):
- raise ValueError("downcast must have a dictionary or 'infer' as "
- "its argument")
- # item-by-item
- # this is expensive as it splits the blocks items-by-item
- blocks = []
- for i, rl in enumerate(self.mgr_locs):
- if dtypes == 'infer':
- dtype = 'infer'
- else:
- raise AssertionError("dtypes as dict is not supported yet")
- # TODO: This either should be completed or removed
- dtype = dtypes.get(item, self._downcast_dtype) # noqa
- if dtype is None:
- nv = _block_shape(values[i], ndim=self.ndim)
- else:
- nv = _possibly_downcast_to_dtype(values[i], dtype)
- nv = _block_shape(nv, ndim=self.ndim)
- blocks.append(self.make_block(nv, fastpath=True, placement=[rl]))
- return blocks
- def astype(self, dtype, copy=False, raise_on_error=True, values=None,
- **kwargs):
- return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
- values=values, **kwargs)
- def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
- klass=None, mgr=None, **kwargs):
- """
- Coerce to the new type (if copy=True, return a new copy)
- raise on an except if raise == True
- """
- # may need to convert to categorical
- # this is only called for non-categoricals
- if self.is_categorical_astype(dtype):
- return self.make_block(Categorical(self.values, **kwargs))
- # astype processing
- dtype = np.dtype(dtype)
- if self.dtype == dtype:
- if copy:
- return self.copy()
- return self
- if klass is None:
- if dtype == np.object_:
- klass = ObjectBlock
- try:
- # force the copy here
- if values is None:
- if issubclass(dtype.type,
- (compat.text_type, compat.string_types)):
- # use native type formatting for datetime/tz/timedelta
- if self.is_datelike:
- values = self.to_native_types()
- # astype formatting
- else:
- values = self.values
- else:
- values = self.get_values(dtype=dtype)
- # _astype_nansafe works fine with 1-d only
- values = _astype_nansafe(values.ravel(), dtype, copy=True)
- values = values.reshape(self.shape)
- newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
- klass=klass)
- except:
- if raise_on_error is True:
- raise
- newb = self.copy() if copy else self
- if newb.is_numeric and self.is_numeric:
- if newb.shape != self.shape:
- raise TypeError("cannot set astype for copy = [%s] for dtype "
- "(%s [%s]) with smaller itemsize that current "
- "(%s [%s])" % (copy, self.dtype.name,
- self.itemsize, newb.dtype.name,
- newb.itemsize))
- return newb
- def convert(self, copy=True, **kwargs):
- """ attempt to coerce any object types to better types return a copy
- of the block (if copy = True) by definition we are not an ObjectBlock
- here!
- """
- return self.copy() if copy else self
- def _can_hold_element(self, value):
- raise NotImplementedError()
- def _try_cast(self, value):
- raise NotImplementedError()
- def _try_cast_result(self, result, dtype=None):
- """ try to cast the result to our original type, we may have
- roundtripped thru object in the mean-time
- """
- if dtype is None:
- dtype = self.dtype
- if self.is_integer or self.is_bool or self.is_datetime:
- pass
- elif self.is_float and result.dtype == self.dtype:
- # protect against a bool/object showing up here
- if isinstance(dtype, compat.string_types) and dtype == 'infer':
- return result
- if not isinstance(dtype, type):
- dtype = dtype.type
- if issubclass(dtype, (np.bool_, np.object_)):
- if issubclass(dtype, np.bool_):
- if isnull(result).all():
- return result.astype(np.bool_)
- else:
- result = result.astype(np.object_)
- result[result == 1] = True
- result[result == 0] = False
- return result
- else:
- return result.astype(np.object_)
- return result
- # may need to change the dtype here
- return _possibly_downcast_to_dtype(result, dtype)
- def _try_operate(self, values):
- """ return a version to operate on as the input """
- return values
- def _try_coerce_args(self, values, other):
- """ provide coercion to our input arguments """
- return values, False, other, False
- def _try_coerce_result(self, result):
- """ reverse of try_coerce_args """
- return result
- def _try_coerce_and_cast_result(self, result, dtype=None):
- result = self._try_coerce_result(result)
- result = self._try_cast_result(result, dtype=dtype)
- return result
- def _try_fill(self, value):
- return value
- def to_native_types(self, slicer=None, na_rep='nan', quoting=None,
- **kwargs):
- """ convert to our native types format, slicing if desired """
- values = self.values
- if slicer is not None:
- values = values[:, slicer]
- mask = isnull(values)
- if not self.is_object and not quoting:
- values = values.astype(str)
- else:
- values = np.array(values, dtype='object')
- values[mask] = na_rep
- return values
- # block actions ####
- def copy(self, deep=True, mgr=None):
- """ copy constructor """
- values = self.values
- if deep:
- values = values.copy()
- return self.make_block_same_class(values)
- def replace(self, to_replace, value, inplace=False, filter=None,
- regex=False, convert=True, mgr=None):
- """ replace the to_replace value with value, possible to create new
- blocks here this is just a call to putmask. regex is not used here.
- It is used in ObjectBlocks. It is here for API
- compatibility.
- """
- original_to_replace = to_replace
- mask = isnull(self.values)
- # try to replace, if we raise an error, convert to ObjectBlock and
- # retry
- try:
- values, _, to_replace, _ = self._try_coerce_args(self.values,
- to_replace)
- mask = missing.mask_missing(values, to_replace)
- if filter is not None:
- filtered_out = ~self.mgr_locs.isin(filter)
- mask[filtered_out.nonzero()[0]] = False
- blocks = self.putmask(mask, value, inplace=inplace)
- if convert:
- blocks = [b.convert(by_item=True, numeric=False,
- copy=not inplace) for b in blocks]
- return blocks
- except (TypeError, ValueError):
- # we can't process the value, but nothing to do
- if not mask.any():
- return self if inplace else self.copy()
- return self.to_object_block(mgr=mgr).replace(
- to_replace=original_to_replace, value=value, inplace=inplace,
- filter=filter, regex=regex, convert=convert)
- def _replace_single(self, *args, **kwargs):
- """ no-op on a non-ObjectBlock """
- return self if kwargs['inplace'] else self.copy()
- def setitem(self, indexer, value, mgr=None):
- """ set the value inplace; return a new block (of a possibly different
- dtype)
- indexer is a direct slice/positional indexer; value must be a
- compatible shape
- """
- # coerce None values, if appropriate
- if value is None:
- if self.is_numeric:
- value = np.nan
- # coerce args
- values, _, value, _ = self._try_coerce_args(self.values, value)
- arr_value = np.array(value)
- # cast the values to a type that can hold nan (if necessary)
- if not self._can_hold_element(value):
- dtype, _ = _maybe_promote(arr_value.dtype)
- values = values.astype(dtype)
- transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
- values = transf(values)
- l = len(values)
- # length checking
- # boolean with truth values == len of the value is ok too
- if isinstance(indexer, (np.ndarray, list)):
- if is_list_like(value) and len(indexer) != len(value):
- if not (isinstance(indexer, np.ndarray) and
- indexer.dtype == np.bool_ and
- len(indexer[indexer]) == len(value)):
- raise ValueError("cannot set using a list-like indexer "
- "with a different length than the value")
- # slice
- elif isinstance(indexer, slice):
- if is_list_like(value) and l:
- if len(value) != length_of_indexer(indexer, values):
- raise ValueError("cannot set using a slice indexer with a "
- "different length than the value")
- try:
- def _is_scalar_indexer(indexer):
- # return True if we are all scalar indexers
- if arr_value.ndim == 1:
- if not isinstance(indexer, tuple):
- indexer = tuple([indexer])
- return all([is_scalar(idx) for idx in indexer])
- return False
- def _is_empty_indexer(indexer):
- # return a boolean if we have an empty indexer
- if arr_value.ndim == 1:
- if not isinstance(indexer, tuple):
- indexer = tuple([indexer])
- return any(isinstance(idx, np.ndarray) and len(idx) == 0
- for idx in indexer)
- return False
- # empty indexers
- # 8669 (empty)
- if _is_empty_indexer(indexer):
- pass
- # setting a single element for each dim and with a rhs that could
- # be say a list
- # GH 6043
- elif _is_scalar_indexer(indexer):
- values[indexer] = value
- # if we are an exact match (ex-broadcasting),
- # then use the resultant dtype
- elif (len(arr_value.shape) and
- arr_value.shape[0] == values.shape[0] and
- np.prod(arr_value.shape) == np.prod(values.shape)):
- values[indexer] = value
- values = values.astype(arr_value.dtype)
- # set
- else:
- values[indexer] = value
- # coerce and try to infer the dtypes of the result
- if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
- value.dtype):
- dtype = value.dtype
- elif is_scalar(value):
- dtype, _ = _infer_dtype_from_scalar(value)
- else:
- dtype = 'infer'
- values = self._try_coerce_and_cast_result(values, dtype)
- block = self.make_block(transf(values), fastpath=True)
- # may have to soft convert_objects here
- if block.is_object and not self.is_object:
- block = block.convert(numeric=False)
- return block
- except ValueError:
- raise
- except TypeError:
- # cast to the passed dtype if possible
- # otherwise raise the original error
- try:
- # e.g. we are uint32 and our value is uint64
- # this is for compat with older numpies
- block = self.make_block(transf(values.astype(value.dtype)))
- return block.setitem(indexer=indexer, value=value, mgr=mgr)
- except:
- pass
- raise
- except Exception:
- pass
- return [self]
- def putmask(self, mask, new, align=True, inplace=False, axis=0,
- transpose=False, mgr=None):
- """ putmask the data to the block; it is possible that we may create a
- new dtype of block
- return the resulting block(s)
- Parameters
- ----------
- mask : the condition to respect
- new : a ndarray/object
- align : boolean, perform alignment on other/cond, default is True
- inplace : perform inplace modification, default is False
- axis : int
- transpose : boolean
- Set to True if self is stored with axes reversed
- Returns
- -------
- a list of new blocks, the result of the putmask
- """
- new_values = self.values if inplace else self.values.copy()
- if hasattr(new, 'reindex_axis'):
- new = new.values
- if hasattr(mask, 'reindex_axis'):
- mask = mask.values
- # if we are passed a scalar None, convert it here
- if not is_list_like(new) and isnull(new) and not self.is_object:
- new = self.fill_value
- if self._can_hold_element(new):
- if transpose:
- new_values = new_values.T
- new = self._try_cast(new)
- # If the default repeat behavior in np.putmask would go in the
- # wrong direction, then explictly repeat and reshape new instead
- if getattr(new, 'ndim', 0) >= 1:
- if self.ndim - 1 == new.ndim and axis == 1:
- new = np.repeat(
- new, new_values.shape[-1]).reshape(self.shape)
- new = new.astype(new_values.dtype)
- np.putmask(new_values, mask, new)
- # maybe upcast me
- elif mask.any():
- if transpose:
- mask = mask.T
- if isinstance(new, np.ndarray):
- new = new.T
- axis = new_values.ndim - axis - 1
- # Pseudo-broadcast
- if getattr(new, 'ndim', 0) >= 1:
- if self.ndim - 1 == new.ndim:
- new_shape = list(new.shape)
- new_shape.insert(axis, 1)
- new = new.reshape(tuple(new_shape))
- # need to go column by column
- new_blocks = []
- if self.ndim > 1:
- for i, ref_loc in enumerate(self.mgr_locs):
- m = mask[i]
- v = new_values[i]
- # need a new block
- if m.any():
- if isinstance(new, np.ndarray):
- n = np.squeeze(new[i % new.shape[0]])
- else:
- n = np.array(new)
- # type of the new block
- dtype, _ = _maybe_promote(n.dtype)
- # we need to explicitly astype here to make a copy
- n = n.astype(dtype)
- nv = _putmask_smart(v, m, n)
- else:
- nv = v if inplace else v.copy()
- # Put back the dimension that was taken from it and make
- # a block out of the result.
- block = self.make_block(values=nv[np.newaxis],
- placement=[ref_loc], fastpath=True)
- new_blocks.append(block)
- else:
- nv = _putmask_smart(new_values, mask, new)
- new_blocks.append(self.make_block(values=nv, fastpath=True))
- return new_blocks
- if inplace:
- return [self]
- if transpose:
- new_values = new_values.T
- return [self.make_block(new_values, fastpath=True)]
- def interpolate(self, method='pad', axis=0, index=None, values=None,
- inplace=False, limit=None, limit_direction='forward',
- fill_value=None, coerce=False, downcast=None, mgr=None,
- **kwargs):
- def check_int_bool(self, inplace):
- # Only FloatBlocks will contain NaNs.
- # timedelta subclasses IntBlock
- if (self.is_bool or self.is_integer) and not self.is_timedelta:
- if inplace:
- return self
- else:
- return self.copy()
- # a fill na type method
- try:
- m = missing.clean_fill_method(method)
- except:
- m = None
- if m is not None:
- r = check_int_bool(self, inplace)
- if r is not None:
- return r
- return self._interpolate_with_fill(method=m, axis=axis,
- inplace=inplace, limit=limit,
- fill_value=fill_value,
- coerce=coerce,
- downcast=downcast, mgr=mgr)
- # try an interp method
- try:
- m = missing.clean_interp_method(method, **kwargs)
- except:
- m = None
- if m is not None:
- r = check_int_bool(self, inplace)
- if r is not None:
- return r
- return self._interpolate(method=m, index=index, values=values,
- axis=axis, limit=limit,
- limit_direction=limit_direction,
- fill_value=fill_value, inplace=inplace,
- downcast=downcast, mgr=mgr, **kwargs)
- raise ValueError("invalid method '{0}' to interpolate.".format(method))
- def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
- limit=None, fill_value=None, coerce=False,
- downcast=None, mgr=None):
- """ fillna but using the interpolate machinery """
- # if we are coercing, then don't force the conversion
- # if the block can't hold the type
- if coerce:
- if not self._can_hold_na:
- if inplace:
- return [self]
- else:
- return [self.copy()]
- values = self.values if inplace else self.values.copy()
- values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
- values = self._try_operate(values)
- values = missing.interpolate_2d(values, method=method, axis=axis,
- limit=limit, fill_value=fill_value,
- dtype=self.dtype)
- values = self._try_coerce_result(values)
- blocks = [self.make_block(values, klass=self.__class__, fastpath=True)]
- return self._maybe_downcast(blocks, downcast)
- def _interpolate(self, method=None, index=None, values=None,
- fill_value=None, axis=0, limit=None,
- limit_direction='forward', inplace=False, downcast=None,
- mgr=None, **kwargs):
- """ interpolate using scipy wrappers """
- data = self.values if inplace else self.values.copy()
- # only deal with floats
- if not self.is_float:
- if not self.is_integer:
- return self
- data = data.astype(np.float64)
- if fill_value is None:
- fill_value = self.fill_value
- if method in ('krogh', 'piecewise_polynomial', 'pchip'):
- if not index.is_monotonic:
- raise ValueError("{0} interpolation requires that the "
- "index be monotonic.".format(method))
- # process 1-d slices in the axis direction
- def func(x):
- # process a 1-d slice, returning it
- # should the axis argument be handled below in apply_along_axis?
- # i.e. not an arg to missing.interpolate_1d
- return missing.interpolate_1d(index, x, method=method, limit=limit,
- limit_direction=limit_direction,
- fill_value=fill_value,
- bounds_error=False, **kwargs)
- # interp each column independently
- interp_values = np.apply_along_axis(func, axis, data)
- blocks = [self.make_block(interp_values, klass=self.__class__,
- fastpath=True)]
- return self._maybe_downcast(blocks, downcast)
- def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
- """
- Take values according to indexer and return them as a block.bb
- """
- # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
- # so need to preserve types
- # sparse is treated like an ndarray, but needs .get_values() shaping
- values = self.values
- if self.is_sparse:
- values = self.get_values()
- if fill_tuple is None:
- fill_value = self.fill_value
- new_values = algos.take_nd(values, indexer, axis=axis,
- allow_fill=False)
- else:
- fill_value = fill_tuple[0]
- new_values = algos.take_nd(values, indexer, axis=axis,
- allow_fill=True, fill_value=fill_value)
- if new_mgr_locs is None:
- if axis == 0:
- slc = lib.indexer_as_slice(indexer)
- if slc is not None:
- new_mgr_locs = self.mgr_locs[slc]
- else:
- new_mgr_locs = self.mgr_locs[indexer]
- else:
- new_mgr_locs = self.mgr_locs
- if not is_dtype_equal(new_values.dtype, self.dtype):
- return self.make_block(new_values, new_mgr_locs)
- else:
- return self.make_block_same_class(new_values, new_mgr_locs)
- def diff(self, n, axis=1, mgr=None):
- """ return block for the diff of the values """
- new_values = algos.diff(self.values, n, axis=axis)
- return [self.make_block(values=new_values, fastpath=True)]
- def shift(self, periods, axis=0, mgr=None):
- """ shift the block by periods, possibly upcast """
- # convert integer to float if necessary. need to do a lot more than
- # that, handle boolean etc also
- new_values, fill_value = _maybe_upcast(self.values)
- # make sure array sent to np.roll is c_contiguous
- f_ordered = new_values.flags.f_contiguous
- if f_ordered:
- new_values = new_values.T
- axis = new_values.ndim - axis - 1
- if np.prod(new_values.shape):
- new_values = np.roll(new_values, _ensure_platform_int(periods),
- axis=axis)
- axis_indexer = [slice(None)] * self.ndim
- if periods > 0:
- axis_indexer[axis] = slice(None, periods)
- else:
- axis_indexer[axis] = slice(periods, None)
- new_values[tuple(axis_indexer)] = fill_value
- # restore original order
- if f_ordered:
- new_values = new_values.T
- return [self.make_block(new_values, fastpath=True)]
- def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None):
- """
- evaluate the block; return result block from the result
- Parameters
- ----------
- func : how to combine self, other
- other : a ndarray/object
- raise_on_error : if True, raise when I can't perform the function,
- False by default (and just return the data that we had coming in)
- try_cast : try casting the results to the input type
- Returns
- -------
- a new block, the result of the func
- """
- values = self.values
- if hasattr(other, 'reindex_axis'):
- other = other.values
- # make sure that we can broadcast
- is_transposed = False
- if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
- if values.ndim != other.ndim:
- is_transposed = True
- else:
- if values.shape == other.shape[::-1]:
- is_transposed = True
- elif values.shape[0] == other.shape[-1]:
- is_transposed = True
- else:
- # this is a broadcast error heree
- raise ValueError("cannot broadcast shape [%s] with block "
- "values [%s]" % (values.T.shape,
- other.shape))
- transf = (lambda x: x.T) if is_transposed else (lambda x: x)
- # coerce/transpose the args if needed
- values, values_mask, other, other_mask = self._try_coerce_args(
- transf(values), other)
- # get the result, may need to transpose the other
- def get_result(other):
- # avoid numpy warning of comparisons again None
- if other is None:
- result = not func.__name__ == 'eq'
- # avoid numpy warning of elementwise comparisons to object
- elif is_numeric_v_string_like(values, other):
- result = False
- else:
- result = func(values, other)
- # mask if needed
- if isinstance(values_mask, np.ndarray) and values_mask.any():
- result = result.astype('float64', copy=False)
- result[values_mask] = np.nan
- if other_mask is True:
- result = result.astype('float64', copy=False)
- result[:] = np.nan
- elif isinstance(other_mask, np.ndarray) and other_mask.any():
- result = result.astype('float64', copy=False)
- result[other_mask.ravel()] = np.nan
- return self._try_coerce_result(result)
- # error handler if we have an issue operating with the function
- def handle_error():
- if raise_on_error:
- raise TypeError('Could not operate %s with block values %s' %
- (repr(other), str(detail)))
- else:
- # return the values
- result = np.empty(values.shape, dtype='O')
- result.fill(np.nan)
- return result
- # get the result
- try:
- result = get_result(other)
- # if we have an invalid shape/broadcast error
- # GH4576, so raise instead of allowing to pass through
- except ValueError as detail:
- raise
- except Exception as detail:
- result = handle_error()
- # technically a broadcast error in numpy can 'work' by returning a
- # boolean False
- if not isinstance(result, np.ndarray):
- if not isinstance(result, np.ndarray):
- # differentiate between an invalid ndarray-ndarray comparison
- # and an invalid type comparison
- if isinstance(values, np.ndarray) and is_list_like(other):
- raise ValueError('Invalid broadcasting comparison [%s] '
- 'with block values' % repr(other))
- raise TypeError('Could not compare [%s] with block values' %
- repr(other))
- # transpose if needed
- result = transf(result)
- # try to cast if requested
- if try_cast:
- result = self._try_cast_result(result)
- return [self.make_block(result, fastpath=True, )]
- def where(self, other, cond, align=True, raise_on_error=True,
- try_cast=False, axis=0, transpose=False, mgr=None):
- """
- evaluate the block; return result block(s) from the result
- Parameters
- ----------
- other : a ndarray/object
- cond : the condition to respect
- align : boolean, perform alignment on other/cond
- raise_on_error : if True, raise when I can't perform the function,
- False by default (and just return the data that we had coming in)
- axis : int
- transpose : boolean
- Set to True if self is stored with axes reversed
- Returns
- -------
- a new block(s), the result of the func
- """
- values = self.values
- if transpose:
- values = values.T
- if hasattr(other, 'reindex_axis'):
- other = other.values
- if hasattr(cond, 'reindex_axis'):
- cond = cond.values
- # If the default broadcasting would go in the wrong direction, then
- # explictly reshape other instead
- if getattr(other, 'ndim', 0) >= 1:
- if values.ndim - 1 == other.ndim and axis == 1:
- other = other.reshape(tuple(other.shape + (1, )))
- if not hasattr(cond, 'shape'):
- raise ValueError("where must have a condition that is ndarray "
- "like")
- other = _maybe_convert_string_to_object(other)
- other = _maybe_convert_scalar(other)
- # our where function
- def func(cond, values, other):
- if cond.ravel().all():
- return values
- values, values_mask, other, other_mask = self._try_coerce_args(
- values, other)
- try:
- return self._try_coerce_result(expressions.where(
- cond, values, other, raise_on_error=True))
- except Exception as detail:
- if raise_on_error:
- raise TypeError('Could not operate [%s] with block values '
- '[%s]' % (repr(other), str(detail)))
- else:
- # return the values
- result = np.empty(values.shape, dtype='float64')
- result.fill(np.nan)
- return result
- # see if we can operate on the entire block, or need item-by-item
- # or if we are a single block (ndim == 1)
- result = func(cond, values, other)
- if self._can_hold_na or self.ndim == 1:
- if transpose:
- result = result.T
- # try to cast if requested
- if try_cast:
- result = self._try_cast_result(result)
- return self.make_block(result)
- # might need to separate out blocks
- axis = cond.ndim - 1
- cond = cond.swapaxes(axis, 0)
- mask = np.array([cond[i].all() for i in range(cond.shape[0])],
- dtype=bool)
- result_blocks = []
- for m in [mask, ~mask]:
- if m.any():
- r = self._try_cast_result(result.take(m.nonzero()[0],
- axis=axis))
- result_blocks.append(
- self.make_block(r.T, placement=self.mgr_locs[m]))
- return result_blocks
- def equals(self, other):
- if self.dtype != other.dtype or self.shape != other.shape:
- return False
- return array_equivalent(self.values, other.values)
- def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
- """
- compute the quantiles of the
- Parameters
- ----------
- qs: a scalar or list of the quantiles to be computed
- interpolation: type of interpolation, default 'linear'
- axis: axis to compute, default 0
- Returns
- -------
- tuple of (axis, block)
- """
- if _np_version_under1p9:
- if interpolation != 'linear':
- raise ValueError("Interpolation methods other than linear "
- "are not supported in numpy < 1.9.")
- kw = {}
- if not _np_version_under1p9:
- kw.update({'interpolation': interpolation})
- values = self.get_values()
- values, _, _, _ = self._try_coerce_args(values, values)
- mask = isnull(self.values)
- if not lib.isscalar(mask) and mask.any():
- # even though this could be a 2-d mask it appears
- # as a 1-d result
- mask = mask.reshape(values.shape)
- result_shape = tuple([values.shape[0]] + [-1] * (self.ndim - 1))
- values = _block_shape(values[~mask], ndim=self.ndim)
- if self.ndim > 1:
- values = values.reshape(result_shape)
- from pandas import Float64Index
- is_empty = values.shape[axis] == 0
- if is_list_like(qs):
- ax = Float64Index(qs)
- if is_empty:
- if self.ndim == 1:
- result = self._na_value
- else:
- # create the array of na_values
- # 2d len(values) * len(qs)
- result = np.repeat(np.array([self._na_value] * len(qs)),
- len(values)).reshape(len(values),
- len(qs))
- else:
- try:
- result = _quantile(values, np.array(qs) * 100,
- axis=axis, **kw)
- except ValueError:
- # older numpies don't handle an array for q
- result = [_quantile(values, q * 100,
- axis=axis, **kw) for q in qs]
- result = np.array(result, copy=False)
- if self.ndim > 1:
- result = result.T
- else:
- if self.ndim == 1:
- ax = Float64Index([qs])
- else:
- ax = mgr.axes[0]
- if is_empty:
- if self.ndim == 1:
- result = self._na_value
- else:
- result = np.array([self._na_value] * len(self))
- else:
- result = _quantile(values, qs * 100, axis=axis, **kw)
- ndim = getattr(result, 'ndim', None) or 0
- result = self._try_coerce_result(result)
- if is_scalar(result):
- return ax, self.make_block_scalar(result)
- return ax, make_block(result,
- placement=np.arange(len(result)),
- ndim=ndim)
- class ScalarBlock(Block):
- """
- a scalar compat Block
- """
- __slots__ = ['_mgr_locs', 'values', 'ndim']
- def __init__(self, values):
- self.ndim = 0
- self.mgr_locs = [0]
- self.values = values
- @property
- def dtype(self):
- return type(self.values)
- @property
- def shape(self):
- return tuple([0])
- def __len__(self):
- return 0
- class NonConsolidatableMixIn(object):
- """ hold methods for the nonconsolidatable blocks """
- _can_consolidate = False
- _verify_integrity = False
- _validate_ndim = False
- _holder = None
- def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
- # Placement must be converted to BlockPlacement via property setter
- # before ndim logic, because placement may be a slice which doesn't
- # have a length.
- self.mgr_locs = placement
- # kludgetastic
- if ndim is None:
- if len(self.mgr_locs) != 1:
- ndim = 1
- else:
- …
Large files files are truncated, but you can click here to view the full file