/pandas/core/generic.py
Python | 11070 lines | 11022 code | 12 blank | 36 comment | 45 complexity | 2c878bb09b63de04e2c88ceb91926ce0 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- # pylint: disable=W0231,E1101
- import collections
- from datetime import timedelta
- import functools
- import gc
- import json
- import operator
- from textwrap import dedent
- import warnings
- import weakref
- import numpy as np
- from pandas._libs import Timestamp, iNaT, properties
- import pandas.compat as compat
- from pandas.compat import (
- cPickle as pkl, isidentifier, lrange, lzip, map, set_function_name,
- string_types, to_str, zip)
- from pandas.compat.numpy import function as nv
- from pandas.errors import AbstractMethodError
- from pandas.util._decorators import (
- Appender, Substitution, rewrite_axis_style_signature)
- from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
- from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
- from pandas.core.dtypes.common import (
- ensure_int64, ensure_object, is_bool, is_bool_dtype,
- is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like,
- is_extension_array_dtype, is_integer, is_list_like, is_number,
- is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable,
- is_scalar, is_timedelta64_dtype, pandas_dtype)
- from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
- from pandas.core.dtypes.inference import is_hashable
- from pandas.core.dtypes.missing import isna, notna
- import pandas as pd
- from pandas.core import config, missing, nanops
- import pandas.core.algorithms as algos
- from pandas.core.base import PandasObject, SelectionMixin
- import pandas.core.common as com
- from pandas.core.index import (
- Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index)
- from pandas.core.indexes.datetimes import DatetimeIndex
- from pandas.core.indexes.period import Period, PeriodIndex
- import pandas.core.indexing as indexing
- from pandas.core.internals import BlockManager
- from pandas.core.ops import _align_method_FRAME
- from pandas.io.formats.format import DataFrameFormatter, format_percentiles
- from pandas.io.formats.printing import pprint_thing
- from pandas.tseries.frequencies import to_offset
- # goal is to be able to define the docs close to function, while still being
- # able to share
- _shared_docs = dict()
- _shared_doc_kwargs = dict(
- axes='keywords for axes', klass='NDFrame',
- axes_single_arg='int or labels for object',
- args_transpose='axes to permute (int or label for object)',
- optional_by="""
- by : str or list of str
- Name or list of names to sort by""")
- # sentinel value to use as kwarg in place of None when None has special meaning
- # and needs to be distinguished from a user explicitly passing None.
- sentinel = object()
- def _single_replace(self, to_replace, method, inplace, limit):
- """
- Replaces values in a Series using the fill method specified when no
- replacement value is given in the replace method
- """
- if self.ndim != 1:
- raise TypeError('cannot replace {0} with method {1} on a {2}'
- .format(to_replace, method, type(self).__name__))
- orig_dtype = self.dtype
- result = self if inplace else self.copy()
- fill_f = missing.get_fill_func(method)
- mask = missing.mask_missing(result.values, to_replace)
- values = fill_f(result.values, limit=limit, mask=mask)
- if values.dtype == orig_dtype and inplace:
- return
- result = pd.Series(values, index=self.index,
- dtype=self.dtype).__finalize__(self)
- if inplace:
- self._update_inplace(result._data)
- return
- return result
- class NDFrame(PandasObject, SelectionMixin):
- """
- N-dimensional analogue of DataFrame. Store multi-dimensional in a
- size-mutable, labeled data structure
- Parameters
- ----------
- data : BlockManager
- axes : list
- copy : boolean, default False
- """
- _internal_names = ['_data', '_cacher', '_item_cache', '_cache', '_is_copy',
- '_subtyp', '_name', '_index', '_default_kind',
- '_default_fill_value', '_metadata', '__array_struct__',
- '__array_interface__']
- _internal_names_set = set(_internal_names)
- _accessors = frozenset()
- _deprecations = frozenset(['as_blocks', 'blocks',
- 'convert_objects', 'is_copy'])
- _metadata = []
- _is_copy = None
- # dummy attribute so that datetime.__eq__(Series/DataFrame) defers
- # by returning NotImplemented
- timetuple = None
- # ----------------------------------------------------------------------
- # Constructors
- def __init__(self, data, axes=None, copy=False, dtype=None,
- fastpath=False):
- if not fastpath:
- if dtype is not None:
- data = data.astype(dtype)
- elif copy:
- data = data.copy()
- if axes is not None:
- for i, ax in enumerate(axes):
- data = data.reindex_axis(ax, axis=i)
- object.__setattr__(self, '_is_copy', None)
- object.__setattr__(self, '_data', data)
- object.__setattr__(self, '_item_cache', {})
- def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
- """ passed a manager and a axes dict """
- for a, axe in axes.items():
- if axe is not None:
- mgr = mgr.reindex_axis(axe,
- axis=self._get_block_manager_axis(a),
- copy=False)
- # make a copy if explicitly requested
- if copy:
- mgr = mgr.copy()
- if dtype is not None:
- # avoid further copies if we can
- if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
- mgr = mgr.astype(dtype=dtype)
- return mgr
- # ----------------------------------------------------------------------
- @property
- def is_copy(self):
- """
- Return the copy.
- """
- warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
- "in a future version.", FutureWarning, stacklevel=2)
- return self._is_copy
- @is_copy.setter
- def is_copy(self, msg):
- warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
- "in a future version.", FutureWarning, stacklevel=2)
- self._is_copy = msg
- def _validate_dtype(self, dtype):
- """ validate the passed dtype """
- if dtype is not None:
- dtype = pandas_dtype(dtype)
- # a compound dtype
- if dtype.kind == 'V':
- raise NotImplementedError("compound dtypes are not implemented"
- " in the {0} constructor"
- .format(self.__class__.__name__))
- return dtype
- # ----------------------------------------------------------------------
- # Construction
- @property
- def _constructor(self):
- """Used when a manipulation result has the same dimensions as the
- original.
- """
- raise AbstractMethodError(self)
- @property
- def _constructor_sliced(self):
- """Used when a manipulation result has one lower dimension(s) as the
- original, such as DataFrame single columns slicing.
- """
- raise AbstractMethodError(self)
- @property
- def _constructor_expanddim(self):
- """Used when a manipulation result has one higher dimension as the
- original, such as Series.to_frame() and DataFrame.to_panel()
- """
- raise NotImplementedError
- # ----------------------------------------------------------------------
- # Axis
- @classmethod
- def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None,
- slicers=None, axes_are_reversed=False, build_axes=True,
- ns=None, docs=None):
- """Provide axes setup for the major PandasObjects.
- Parameters
- ----------
- axes : the names of the axes in order (lowest to highest)
- info_axis_num : the axis of the selector dimension (int)
- stat_axis_num : the number of axis for the default stats (int)
- aliases : other names for a single axis (dict)
- slicers : how axes slice to others (dict)
- axes_are_reversed : boolean whether to treat passed axes as
- reversed (DataFrame)
- build_axes : setup the axis properties (default True)
- """
- cls._AXIS_ORDERS = axes
- cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)}
- cls._AXIS_LEN = len(axes)
- cls._AXIS_ALIASES = aliases or dict()
- cls._AXIS_IALIASES = {v: k for k, v in cls._AXIS_ALIASES.items()}
- cls._AXIS_NAMES = dict(enumerate(axes))
- cls._AXIS_SLICEMAP = slicers or None
- cls._AXIS_REVERSED = axes_are_reversed
- # typ
- setattr(cls, '_typ', cls.__name__.lower())
- # indexing support
- cls._ix = None
- if info_axis is not None:
- cls._info_axis_number = info_axis
- cls._info_axis_name = axes[info_axis]
- if stat_axis is not None:
- cls._stat_axis_number = stat_axis
- cls._stat_axis_name = axes[stat_axis]
- # setup the actual axis
- if build_axes:
- def set_axis(a, i):
- setattr(cls, a, properties.AxisProperty(i, docs.get(a, a)))
- cls._internal_names_set.add(a)
- if axes_are_reversed:
- m = cls._AXIS_LEN - 1
- for i, a in cls._AXIS_NAMES.items():
- set_axis(a, m - i)
- else:
- for i, a in cls._AXIS_NAMES.items():
- set_axis(a, i)
- assert not isinstance(ns, dict)
- def _construct_axes_dict(self, axes=None, **kwargs):
- """Return an axes dictionary for myself."""
- d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
- d.update(kwargs)
- return d
- @staticmethod
- def _construct_axes_dict_from(self, axes, **kwargs):
- """Return an axes dictionary for the passed axes."""
- d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)}
- d.update(kwargs)
- return d
- def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
- """Return an axes dictionary for myself."""
- d = {self._AXIS_SLICEMAP[a]: self._get_axis(a)
- for a in (axes or self._AXIS_ORDERS)}
- d.update(kwargs)
- return d
- def _construct_axes_from_arguments(
- self, args, kwargs, require_all=False, sentinel=None):
- """Construct and returns axes if supplied in args/kwargs.
- If require_all, raise if all axis arguments are not supplied
- return a tuple of (axes, kwargs).
- sentinel specifies the default parameter when an axis is not
- supplied; useful to distinguish when a user explicitly passes None
- in scenarios where None has special meaning.
- """
- # construct the args
- args = list(args)
- for a in self._AXIS_ORDERS:
- # if we have an alias for this axis
- alias = self._AXIS_IALIASES.get(a)
- if alias is not None:
- if a in kwargs:
- if alias in kwargs:
- raise TypeError("arguments are mutually exclusive "
- "for [%s,%s]" % (a, alias))
- continue
- if alias in kwargs:
- kwargs[a] = kwargs.pop(alias)
- continue
- # look for a argument by position
- if a not in kwargs:
- try:
- kwargs[a] = args.pop(0)
- except IndexError:
- if require_all:
- raise TypeError("not enough/duplicate arguments "
- "specified!")
- axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS}
- return axes, kwargs
- @classmethod
- def _from_axes(cls, data, axes, **kwargs):
- # for construction from BlockManager
- if isinstance(data, BlockManager):
- return cls(data, **kwargs)
- else:
- if cls._AXIS_REVERSED:
- axes = axes[::-1]
- d = cls._construct_axes_dict_from(cls, axes, copy=False)
- d.update(kwargs)
- return cls(data, **d)
- @classmethod
- def _get_axis_number(cls, axis):
- axis = cls._AXIS_ALIASES.get(axis, axis)
- if is_integer(axis):
- if axis in cls._AXIS_NAMES:
- return axis
- else:
- try:
- return cls._AXIS_NUMBERS[axis]
- except KeyError:
- pass
- raise ValueError('No axis named {0} for object type {1}'
- .format(axis, cls))
- @classmethod
- def _get_axis_name(cls, axis):
- axis = cls._AXIS_ALIASES.get(axis, axis)
- if isinstance(axis, string_types):
- if axis in cls._AXIS_NUMBERS:
- return axis
- else:
- try:
- return cls._AXIS_NAMES[axis]
- except KeyError:
- pass
- raise ValueError('No axis named {0} for object type {1}'
- .format(axis, cls))
- def _get_axis(self, axis):
- name = self._get_axis_name(axis)
- return getattr(self, name)
- @classmethod
- def _get_block_manager_axis(cls, axis):
- """Map the axis to the block_manager axis."""
- axis = cls._get_axis_number(axis)
- if cls._AXIS_REVERSED:
- m = cls._AXIS_LEN - 1
- return m - axis
- return axis
- def _get_axis_resolvers(self, axis):
- # index or columns
- axis_index = getattr(self, axis)
- d = dict()
- prefix = axis[0]
- for i, name in enumerate(axis_index.names):
- if name is not None:
- key = level = name
- else:
- # prefix with 'i' or 'c' depending on the input axis
- # e.g., you must do ilevel_0 for the 0th level of an unnamed
- # multiiindex
- key = '{prefix}level_{i}'.format(prefix=prefix, i=i)
- level = i
- level_values = axis_index.get_level_values(level)
- s = level_values.to_series()
- s.index = axis_index
- d[key] = s
- # put the index/columns itself in the dict
- if isinstance(axis_index, MultiIndex):
- dindex = axis_index
- else:
- dindex = axis_index.to_series()
- d[axis] = dindex
- return d
- def _get_index_resolvers(self):
- d = {}
- for axis_name in self._AXIS_ORDERS:
- d.update(self._get_axis_resolvers(axis_name))
- return d
- @property
- def _info_axis(self):
- return getattr(self, self._info_axis_name)
- @property
- def _stat_axis(self):
- return getattr(self, self._stat_axis_name)
- @property
- def shape(self):
- """
- Return a tuple of axis dimensions
- """
- return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)
- @property
- def axes(self):
- """
- Return index label(s) of the internal NDFrame
- """
- # we do it this way because if we have reversed axes, then
- # the block manager shows then reversed
- return [self._get_axis(a) for a in self._AXIS_ORDERS]
- @property
- def ndim(self):
- """
- Return an int representing the number of axes / array dimensions.
- Return 1 if Series. Otherwise return 2 if DataFrame.
- See Also
- --------
- ndarray.ndim : Number of array dimensions.
- Examples
- --------
- >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
- >>> s.ndim
- 1
- >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
- >>> df.ndim
- 2
- """
- return self._data.ndim
- @property
- def size(self):
- """
- Return an int representing the number of elements in this object.
- Return the number of rows if Series. Otherwise return the number of
- rows times number of columns if DataFrame.
- See Also
- --------
- ndarray.size : Number of elements in the array.
- Examples
- --------
- >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
- >>> s.size
- 3
- >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
- >>> df.size
- 4
- """
- return np.prod(self.shape)
- @property
- def _selected_obj(self):
- """ internal compat with SelectionMixin """
- return self
- @property
- def _obj_with_exclusions(self):
- """ internal compat with SelectionMixin """
- return self
- def _expand_axes(self, key):
- new_axes = []
- for k, ax in zip(key, self.axes):
- if k not in ax:
- if type(k) != ax.dtype.type:
- ax = ax.astype('O')
- new_axes.append(ax.insert(len(ax), k))
- else:
- new_axes.append(ax)
- return new_axes
- def set_axis(self, labels, axis=0, inplace=None):
- """
- Assign desired index to given axis.
- Indexes for column or row labels can be changed by assigning
- a list-like or Index.
- .. versionchanged:: 0.21.0
- The signature is now `labels` and `axis`, consistent with
- the rest of pandas API. Previously, the `axis` and `labels`
- arguments were respectively the first and second positional
- arguments.
- Parameters
- ----------
- labels : list-like, Index
- The values for the new index.
- axis : {0 or 'index', 1 or 'columns'}, default 0
- The axis to update. The value 0 identifies the rows, and 1
- identifies the columns.
- inplace : bool, default None
- Whether to return a new %(klass)s instance.
- .. warning::
- ``inplace=None`` currently falls back to to True, but in a
- future version, will default to False. Use inplace=True
- explicitly rather than relying on the default.
- Returns
- -------
- renamed : %(klass)s or None
- An object of same type as caller if inplace=False, None otherwise.
- See Also
- --------
- DataFrame.rename_axis : Alter the name of the index or columns.
- Examples
- --------
- **Series**
- >>> s = pd.Series([1, 2, 3])
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: int64
- >>> s.set_axis(['a', 'b', 'c'], axis=0, inplace=False)
- a 1
- b 2
- c 3
- dtype: int64
- The original object is not modified.
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: int64
- **DataFrame**
- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
- Change the row labels.
- >>> df.set_axis(['a', 'b', 'c'], axis='index', inplace=False)
- A B
- a 1 4
- b 2 5
- c 3 6
- Change the column labels.
- >>> df.set_axis(['I', 'II'], axis='columns', inplace=False)
- I II
- 0 1 4
- 1 2 5
- 2 3 6
- Now, update the labels inplace.
- >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True)
- >>> df
- i ii
- 0 1 4
- 1 2 5
- 2 3 6
- """
- if is_scalar(labels):
- warnings.warn(
- 'set_axis now takes "labels" as first argument, and '
- '"axis" as named parameter. The old form, with "axis" as '
- 'first parameter and \"labels\" as second, is still supported '
- 'but will be deprecated in a future version of pandas.',
- FutureWarning, stacklevel=2)
- labels, axis = axis, labels
- if inplace is None:
- warnings.warn(
- 'set_axis currently defaults to operating inplace.\nThis '
- 'will change in a future version of pandas, use '
- 'inplace=True to avoid this warning.',
- FutureWarning, stacklevel=2)
- inplace = True
- if inplace:
- setattr(self, self._get_axis_name(axis), labels)
- else:
- obj = self.copy()
- obj.set_axis(labels, axis=axis, inplace=True)
- return obj
- def _set_axis(self, axis, labels):
- self._data.set_axis(axis, labels)
- self._clear_item_cache()
- def transpose(self, *args, **kwargs):
- """
- Permute the dimensions of the %(klass)s
- Parameters
- ----------
- args : %(args_transpose)s
- copy : boolean, default False
- Make a copy of the underlying data. Mixed-dtype data will
- always result in a copy
- **kwargs
- Additional keyword arguments will be passed to the function.
- Returns
- -------
- y : same as input
- Examples
- --------
- >>> p.transpose(2, 0, 1)
- >>> p.transpose(2, 0, 1, copy=True)
- """
- # construct the args
- axes, kwargs = self._construct_axes_from_arguments(args, kwargs,
- require_all=True)
- axes_names = tuple(self._get_axis_name(axes[a])
- for a in self._AXIS_ORDERS)
- axes_numbers = tuple(self._get_axis_number(axes[a])
- for a in self._AXIS_ORDERS)
- # we must have unique axes
- if len(axes) != len(set(axes)):
- raise ValueError('Must specify %s unique axes' % self._AXIS_LEN)
- new_axes = self._construct_axes_dict_from(self, [self._get_axis(x)
- for x in axes_names])
- new_values = self.values.transpose(axes_numbers)
- if kwargs.pop('copy', None) or (len(args) and args[-1]):
- new_values = new_values.copy()
- nv.validate_transpose_for_generic(self, kwargs)
- return self._constructor(new_values, **new_axes).__finalize__(self)
- def swapaxes(self, axis1, axis2, copy=True):
- """
- Interchange axes and swap values axes appropriately.
- Returns
- -------
- y : same as input
- """
- i = self._get_axis_number(axis1)
- j = self._get_axis_number(axis2)
- if i == j:
- if copy:
- return self.copy()
- return self
- mapping = {i: j, j: i}
- new_axes = (self._get_axis(mapping.get(k, k))
- for k in range(self._AXIS_LEN))
- new_values = self.values.swapaxes(i, j)
- if copy:
- new_values = new_values.copy()
- return self._constructor(new_values, *new_axes).__finalize__(self)
- def droplevel(self, level, axis=0):
- """
- Return DataFrame with requested index / column level(s) removed.
- .. versionadded:: 0.24.0
- Parameters
- ----------
- level : int, str, or list-like
- If a string is given, must be the name of a level
- If list-like, elements must be names or positional indexes
- of levels.
- axis : {0 or 'index', 1 or 'columns'}, default 0
- Returns
- -------
- DataFrame.droplevel()
- Examples
- --------
- >>> df = pd.DataFrame([
- ... [1, 2, 3, 4],
- ... [5, 6, 7, 8],
- ... [9, 10, 11, 12]
- ... ]).set_index([0, 1]).rename_axis(['a', 'b'])
- >>> df.columns = pd.MultiIndex.from_tuples([
- ... ('c', 'e'), ('d', 'f')
- ... ], names=['level_1', 'level_2'])
- >>> df
- level_1 c d
- level_2 e f
- a b
- 1 2 3 4
- 5 6 7 8
- 9 10 11 12
- >>> df.droplevel('a')
- level_1 c d
- level_2 e f
- b
- 2 3 4
- 6 7 8
- 10 11 12
- >>> df.droplevel('level2', axis=1)
- level_1 c d
- a b
- 1 2 3 4
- 5 6 7 8
- 9 10 11 12
- """
- labels = self._get_axis(axis)
- new_labels = labels.droplevel(level)
- result = self.set_axis(new_labels, axis=axis, inplace=False)
- return result
- def pop(self, item):
- """
- Return item and drop from frame. Raise KeyError if not found.
- Parameters
- ----------
- item : str
- Label of column to be popped.
- Returns
- -------
- Series
- Examples
- --------
- >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
- ... ('parrot', 'bird', 24.0),
- ... ('lion', 'mammal', 80.5),
- ... ('monkey','mammal', np.nan)],
- ... columns=('name', 'class', 'max_speed'))
- >>> df
- name class max_speed
- 0 falcon bird 389.0
- 1 parrot bird 24.0
- 2 lion mammal 80.5
- 3 monkey mammal NaN
- >>> df.pop('class')
- 0 bird
- 1 bird
- 2 mammal
- 3 mammal
- Name: class, dtype: object
- >>> df
- name max_speed
- 0 falcon 389.0
- 1 parrot 24.0
- 2 lion 80.5
- 3 monkey NaN
- """
- result = self[item]
- del self[item]
- try:
- result._reset_cacher()
- except AttributeError:
- pass
- return result
- def squeeze(self, axis=None):
- """
- Squeeze 1 dimensional axis objects into scalars.
- Series or DataFrames with a single element are squeezed to a scalar.
- DataFrames with a single column or a single row are squeezed to a
- Series. Otherwise the object is unchanged.
- This method is most useful when you don't know if your
- object is a Series or DataFrame, but you do know it has just a single
- column. In that case you can safely call `squeeze` to ensure you have a
- Series.
- Parameters
- ----------
- axis : {0 or 'index', 1 or 'columns', None}, default None
- A specific axis to squeeze. By default, all length-1 axes are
- squeezed.
- .. versionadded:: 0.20.0
- Returns
- -------
- DataFrame, Series, or scalar
- The projection after squeezing `axis` or all the axes.
- See Also
- --------
- Series.iloc : Integer-location based indexing for selecting scalars.
- DataFrame.iloc : Integer-location based indexing for selecting Series.
- Series.to_frame : Inverse of DataFrame.squeeze for a
- single-column DataFrame.
- Examples
- --------
- >>> primes = pd.Series([2, 3, 5, 7])
- Slicing might produce a Series with a single value:
- >>> even_primes = primes[primes % 2 == 0]
- >>> even_primes
- 0 2
- dtype: int64
- >>> even_primes.squeeze()
- 2
- Squeezing objects with more than one value in every axis does nothing:
- >>> odd_primes = primes[primes % 2 == 1]
- >>> odd_primes
- 1 3
- 2 5
- 3 7
- dtype: int64
- >>> odd_primes.squeeze()
- 1 3
- 2 5
- 3 7
- dtype: int64
- Squeezing is even more effective when used with DataFrames.
- >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
- >>> df
- a b
- 0 1 2
- 1 3 4
- Slicing a single column will produce a DataFrame with the columns
- having only one value:
- >>> df_a = df[['a']]
- >>> df_a
- a
- 0 1
- 1 3
- So the columns can be squeezed down, resulting in a Series:
- >>> df_a.squeeze('columns')
- 0 1
- 1 3
- Name: a, dtype: int64
- Slicing a single row from a single column will produce a single
- scalar DataFrame:
- >>> df_0a = df.loc[df.index < 1, ['a']]
- >>> df_0a
- a
- 0 1
- Squeezing the rows produces a single scalar Series:
- >>> df_0a.squeeze('rows')
- a 1
- Name: 0, dtype: int64
- Squeezing all axes wil project directly into a scalar:
- >>> df_0a.squeeze()
- 1
- """
- axis = (self._AXIS_NAMES if axis is None else
- (self._get_axis_number(axis),))
- try:
- return self.iloc[
- tuple(0 if i in axis and len(a) == 1 else slice(None)
- for i, a in enumerate(self.axes))]
- except Exception:
- return self
- def swaplevel(self, i=-2, j=-1, axis=0):
- """
- Swap levels i and j in a MultiIndex on a particular axis
- Parameters
- ----------
- i, j : int, str (can be mixed)
- Level of index to be swapped. Can pass level name as string.
- Returns
- -------
- swapped : same type as caller (new object)
- .. versionchanged:: 0.18.1
- The indexes ``i`` and ``j`` are now optional, and default to
- the two innermost levels of the index.
- """
- axis = self._get_axis_number(axis)
- result = self.copy()
- labels = result._data.axes[axis]
- result._data.set_axis(axis, labels.swaplevel(i, j))
- return result
- # ----------------------------------------------------------------------
- # Rename
- def rename(self, *args, **kwargs):
- """
- Alter axes input function or functions. Function / dict values must be
- unique (1-to-1). Labels not contained in a dict / Series will be left
- as-is. Extra labels listed don't throw an error. Alternatively, change
- ``Series.name`` with a scalar value (Series only).
- Parameters
- ----------
- %(axes)s : scalar, list-like, dict-like or function, optional
- Scalar or list-like will alter the ``Series.name`` attribute,
- and raise on DataFrame or Panel.
- dict-like or functions are transformations to apply to
- that axis' values
- copy : bool, default True
- Also copy underlying data.
- inplace : bool, default False
- Whether to return a new %(klass)s. If True then value of copy is
- ignored.
- level : int or level name, default None
- In case of a MultiIndex, only rename labels in the specified
- level.
- errors : {'ignore', 'raise'}, default 'ignore'
- If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`,
- or `columns` contains labels that are not present in the Index
- being transformed.
- If 'ignore', existing keys will be renamed and extra keys will be
- ignored.
- Returns
- -------
- renamed : %(klass)s (new object)
- Raises
- ------
- KeyError
- If any of the labels is not found in the selected axis and
- "errors='raise'".
- See Also
- --------
- NDFrame.rename_axis
- Examples
- --------
- >>> s = pd.Series([1, 2, 3])
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: int64
- >>> s.rename("my_name") # scalar, changes Series.name
- 0 1
- 1 2
- 2 3
- Name: my_name, dtype: int64
- >>> s.rename(lambda x: x ** 2) # function, changes labels
- 0 1
- 1 2
- 4 3
- dtype: int64
- >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
- 0 1
- 3 2
- 5 3
- dtype: int64
- Since ``DataFrame`` doesn't have a ``.name`` attribute,
- only mapping-type arguments are allowed.
- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
- >>> df.rename(2)
- Traceback (most recent call last):
- ...
- TypeError: 'int' object is not callable
- ``DataFrame.rename`` supports two calling conventions
- * ``(index=index_mapper, columns=columns_mapper, ...)``
- * ``(mapper, axis={'index', 'columns'}, ...)``
- We *highly* recommend using keyword arguments to clarify your
- intent.
- >>> df.rename(index=str, columns={"A": "a", "B": "c"})
- a c
- 0 1 4
- 1 2 5
- 2 3 6
- >>> df.rename(index=str, columns={"A": "a", "C": "c"})
- a B
- 0 1 4
- 1 2 5
- 2 3 6
- Using axis-style parameters
- >>> df.rename(str.lower, axis='columns')
- a b
- 0 1 4
- 1 2 5
- 2 3 6
- >>> df.rename({1: 2, 2: 4}, axis='index')
- A B
- 0 1 4
- 2 2 5
- 4 3 6
- See the :ref:`user guide <basics.rename>` for more.
- """
- axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
- copy = kwargs.pop('copy', True)
- inplace = kwargs.pop('inplace', False)
- level = kwargs.pop('level', None)
- axis = kwargs.pop('axis', None)
- errors = kwargs.pop('errors', 'ignore')
- if axis is not None:
- # Validate the axis
- self._get_axis_number(axis)
- if kwargs:
- raise TypeError('rename() got an unexpected keyword '
- 'argument "{0}"'.format(list(kwargs.keys())[0]))
- if com.count_not_none(*axes.values()) == 0:
- raise TypeError('must pass an index to rename')
- self._consolidate_inplace()
- result = self if inplace else self.copy(deep=copy)
- # start in the axis order to eliminate too many copies
- for axis in lrange(self._AXIS_LEN):
- v = axes.get(self._AXIS_NAMES[axis])
- if v is None:
- continue
- f = com._get_rename_function(v)
- baxis = self._get_block_manager_axis(axis)
- if level is not None:
- level = self.axes[axis]._get_level_number(level)
- # GH 13473
- if not callable(v):
- indexer = self.axes[axis].get_indexer_for(v)
- if errors == 'raise' and len(indexer[indexer == -1]):
- missing_labels = [label for index, label in enumerate(v)
- if indexer[index] == -1]
- raise KeyError('{} not found in axis'
- .format(missing_labels))
- result._data = result._data.rename_axis(f, axis=baxis, copy=copy,
- level=level)
- result._clear_item_cache()
- if inplace:
- self._update_inplace(result._data)
- else:
- return result.__finalize__(self)
- @rewrite_axis_style_signature('mapper', [('copy', True),
- ('inplace', False)])
- def rename_axis(self, mapper=sentinel, **kwargs):
- """
- Set the name of the axis for the index or columns.
- Parameters
- ----------
- mapper : scalar, list-like, optional
- Value to set the axis name attribute.
- index, columns : scalar, list-like, dict-like or function, optional
- A scalar, list-like, dict-like or functions transformations to
- apply to that axis' values.
- Use either ``mapper`` and ``axis`` to
- specify the axis to target with ``mapper``, or ``index``
- and/or ``columns``.
- .. versionchanged:: 0.24.0
- axis : {0 or 'index', 1 or 'columns'}, default 0
- The axis to rename.
- copy : bool, default True
- Also copy underlying data.
- inplace : bool, default False
- Modifies the object directly, instead of creating a new Series
- or DataFrame.
- Returns
- -------
- Series, DataFrame, or None
- The same type as the caller or None if `inplace` is True.
- See Also
- --------
- Series.rename : Alter Series index labels or name.
- DataFrame.rename : Alter DataFrame index labels or name.
- Index.rename : Set new names on index.
- Notes
- -----
- Prior to version 0.21.0, ``rename_axis`` could also be used to change
- the axis *labels* by passing a mapping or scalar. This behavior is
- deprecated and will be removed in a future version. Use ``rename``
- instead.
- ``DataFrame.rename_axis`` supports two calling conventions
- * ``(index=index_mapper, columns=columns_mapper, ...)``
- * ``(mapper, axis={'index', 'columns'}, ...)``
- The first calling convention will only modify the names of
- the index and/or the names of the Index object that is the columns.
- In this case, the parameter ``copy`` is ignored.
- The second calling convention will modify the names of the
- the corresponding index if mapper is a list or a scalar.
- However, if mapper is dict-like or a function, it will use the
- deprecated behavior of modifying the axis *labels*.
- We *highly* recommend using keyword arguments to clarify your
- intent.
- Examples
- --------
- **Series**
- >>> s = pd.Series(["dog", "cat", "monkey"])
- >>> s
- 0 dog
- 1 cat
- 2 monkey
- dtype: object
- >>> s.rename_axis("animal")
- animal
- 0 dog
- 1 cat
- 2 monkey
- dtype: object
- **DataFrame**
- >>> df = pd.DataFrame({"num_legs": [4, 4, 2],
- ... "num_arms": [0, 0, 2]},
- ... ["dog", "cat", "monkey"])
- >>> df
- num_legs num_arms
- dog 4 0
- cat 4 0
- monkey 2 2
- >>> df = df.rename_axis("animal")
- >>> df
- num_legs num_arms
- animal
- dog 4 0
- cat 4 0
- monkey 2 2
- >>> df = df.rename_axis("limbs", axis="columns")
- >>> df
- limbs num_legs num_arms
- animal
- dog 4 0
- cat 4 0
- monkey 2 2
- **MultiIndex**
- >>> df.index = pd.MultiIndex.from_product([['mammal'],
- ... ['dog', 'cat', 'monkey']],
- ... names=['type', 'name'])
- >>> df
- limbs num_legs num_arms
- type name
- mammal dog 4 0
- cat 4 0
- monkey 2 2
- >>> df.rename_axis(index={'type': 'class'})
- limbs num_legs num_arms
- class name
- mammal dog 4 0
- cat 4 0
- monkey 2 2
- >>> df.rename_axis(columns=str.upper)
- LIMBS num_legs num_arms
- type name
- mammal dog 4 0
- cat 4 0
- monkey 2 2
- """
- axes, kwargs = self._construct_axes_from_arguments(
- (), kwargs, sentinel=sentinel)
- copy = kwargs.pop('copy', True)
- inplace = kwargs.pop('inplace', False)
- axis = kwargs.pop('axis', 0)
- if axis is not None:
- axis = self._get_axis_number(axis)
- if kwargs:
- raise TypeError('rename_axis() got an unexpected keyword '
- 'argument "{0}"'.format(list(kwargs.keys())[0]))
- inplace = validate_bool_kwarg(inplace, 'inplace')
- if (mapper is not sentinel):
- # Use v0.23 behavior if a scalar or list
- non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not
- is_dict_like(mapper))
- if non_mapper:
- return self._set_axis_name(mapper, axis=axis, inplace=inplace)
- else:
- # Deprecated (v0.21) behavior is if mapper is specified,
- # and not a list or scalar, then call rename
- msg = ("Using 'rename_axis' to alter labels is deprecated. "
- "Use '.rename' instead")
- warnings.warn(msg, FutureWarning, stacklevel=3)
- axis = self._get_axis_name(axis)
- d = {'copy': copy, 'inplace': inplace}
- d[axis] = mapper
- return self.rename(**d)
- else:
- # Use new behavior. Means that index and/or columns
- # is specified
- result = self if inplace else self.copy(deep=copy)
- for axis in lrange(self._AXIS_LEN):
- v = axes.get(self._AXIS_NAMES[axis])
- if v is sentinel:
- continue
- non_mapper = is_scalar(v) or (is_list_like(v) and not
- is_dict_like(v))
- if non_mapper:
- newnames = v
- else:
- f = com._get_rename_function(v)
- curnames = self._get_axis(axis).names
- newnames = [f(name) for name in curnames]
- result._set_axis_name(newnames, axis=axis,
- inplace=True)
- if not inplace:
- return result
- def _set_axis_name(self, name, axis=0, inplace=False):
- """
- Set the name(s) of the axis.
- Parameters
- ----------
- name : str or list of str
- Name(s) to set.
- axis : {0 or 'index', 1 or 'columns'}, default 0
- The axis to set the label. The value 0 or 'index' specifies index,
- and the value 1 or 'columns' specifies columns.
- inplace : bool, default False
- If `True`, do operation inplace and return None.
- .. versionadded:: 0.21.0
- Returns
- -------
- Series, DataFrame, or None
- The same type as the caller or `None` if `inplace` is `True`.
- See Also
- --------
- DataFrame.rename : Alter the axis labels of :class:`DataFrame`.
- Series.rename : Alter the index labels or set the index name
- of :class:`Series`.
- Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.
- Examples
- --------
- >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},
- ... ["dog", "cat", "monkey"])
- >>> df
- num_legs
- dog 4
- cat 4
- monkey 2
- >>> df._set_axis_name("animal")
- num_legs
- animal
- dog 4
- cat 4
- monkey 2
- >>> df.index = pd.MultiIndex.from_product(
- ... [["mammal"], ['dog', 'cat', 'monkey']])
- >>> df._set_axis_name(["type", "name"])
- legs
- type name
- mammal dog 4
- cat 4
- monkey 2
- """
- axis = self._get_axis_number(axis)
- idx = self._get_axis(axis).set_names(name)
- inplace = validate_bool_kwarg(inplace, 'inplace')
- renamed = self if inplace else self.copy()
- renamed.set_axis(idx, axis=axis, inplace=True)
- if not inplace:
- return renamed
- # ----------------------------------------------------------------------
- # Comparison Methods
- def _indexed_same(self, other):
- return all(self._get_axis(a).equals(other._get_axis(a))
- for a in self._AXIS_ORDERS)
- def equals(self, other):
- """
- Test whether two objects contain the same elements.
- This function allows two Series or DataFrames to be compared against
- each other to see if they have the same shape and elements. NaNs in
- the same location are considered equal. The column headers do not
- need to have the same type, but the elements within the columns must
- be the same dtype.
- Parameters
- ----------
- other : Series or DataFrame
- The other Series or DataFrame to be compared with the first.
- Returns
- -------
- bool
- True if all elements are the same in both objects, False
- otherwise.
- See Also
- --------
- Series.eq : Compare two Series objects of the same length
- and return a Series where each element is True if the element
- in each Series is equal, False otherwise.
- DataFrame.eq : Compare two DataFrame objects of the same shape and
- return a DataFrame where each element is True if the respective
- element in each DataFrame is equal, False otherwise.
- assert_series_equal : Return True if left and right Series are equal,
- False otherwise.
- assert_frame_equal : Return True if left and right DataFrames are
- equal, False otherwise.
- numpy.array_equal : Return True if two arrays have the same shape
- and elements, False otherwise.
- Notes
- -----
- This function requires that the elements have the same dtype as their
- respective elements in the other Series or DataFrame. However, the
- column labels do not need to have the same type, as long as they are
- still considered equal.
- Examples
- --------
- >>> df = pd.DataFrame({1: [10], 2: [20]})
- >>> df
- 1 2
- 0 10 20
- DataFrames df and exactly_equal have the same types and values for
- their elements and column labels, which will return True.
- >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})
- >>> exactly_equal
- 1 2
- 0 10 20
- >>> df.equals(exactly_equal)
- True
- DataFrames df and different_column_type have the same element
- types and values, but have different types for the column labels,
- which will still return True.
- >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})
- >>> different_column_type
- 1.0 2.0
- 0 10 20
- >>> df.equals(different_column_type)
- True
- DataFrames df and different_data_type have different types for the
- same values for their elements, and will return False even though
- their column labels are the same values and types.
- >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})
- >>> different_data_type
- 1 2
- 0 10.0 20.0
- >>> df.equals(different_data_type)
- False
- """
- if not isinstance(other, self._constructor):
- return False
- return self._data.equals(other._data)
- # -------------------------------------------------------------------------
- # Unary Methods
- def __neg__(self):
- values = com.values_from_object(self)
- if is_bool_dtype(values):
- arr = operator.inv(values)
- elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
- or is_object_dtype(values)):
- arr = operator.neg(values)
- else:
- raise TypeError("Unary negative expects numeric dtype, not {}"
- .format(values.dtype))
- return self.__array_wrap__(arr)
- def __pos__(self):
- values = com.values_from_object(self)
- if (is_bool_dtype(values) or is_period_arraylike(values)):
- arr = values
- elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
- or is_object_dtype(values)):
- arr = operator.pos(values)
- else:
- raise TypeError("Unary plus expects numeric dtype, not {}"
- .format(values.dtype))
- return self.__array_wrap__(arr)
- def __invert__(self):
- try:
- arr = operator.inv(com.values_from_object(self))
- return self.__array_wrap__(arr)
- except Exception:
- # inv fails with 0 len
- if not np.prod(self.shape):
- return self
- raise
- def __nonzero__(self):
- raise ValueError("The truth value of a {0} is ambiguous. "
- "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
- .format(self.__class__.__name__))
- __bool__ = __nonzero__
- def bool(self):
- """
- Return the bool of a single element PandasObject.
- This must be a boolean scalar value, either True or False. Raise a
- ValueError if the PandasObject does not have exactly 1 element, or that
- element is not boolean
- """
- v = self.squeeze()
- if isinstance(v, (bool, np.bool_)):
- return bool(v)
- elif is_scalar(v):
- raise ValueError("bool cannot act on a non-boolean single element "
- "{0}".format(self.__class__.__name__))
- self.__nonzero__()
- def __abs__(self):
- return self.abs()
- def __round__(self, decimals=0):
- return self.round(decimals)
- # -------------------------------------------------------------------------
- # Label or Level Combination Helpers
- #
- # A collection of helper methods for DataFrame/Series operations that
- # accept a combination of column/index labels and levels. All such
- # operations should utilize/extend these methods when possible so that we
- # have consistent precedence and validation logic throughout the library.
- def _is_level_reference(self, key, axis=0):
- """
- Test whethe…
Large files files are truncated, but you can click here to view the full file