generic.py - This code defines various functions for creati…

/pandas/core/generic.py

http://github.com/wesm/pandas · Python · 11070 lines · 9894 code · 555 blank · 621 comment · 586 complexity · 2c878bb09b63de04e2c88ceb91926ce0 MD5 · raw file
Large files are truncated click here to view the full file

# pylint: disable=W0231,E1101
import collections
from datetime import timedelta
import functools
import gc
import json
import operator
from textwrap import dedent
import warnings
import weakref

import numpy as np

from pandas._libs import Timestamp, iNaT, properties
import pandas.compat as compat
from pandas.compat import (
    cPickle as pkl, isidentifier, lrange, lzip, map, set_function_name,
    string_types, to_str, zip)
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import (
    Appender, Substitution, rewrite_axis_style_signature)
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs

from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.common import (
    ensure_int64, ensure_object, is_bool, is_bool_dtype,
    is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like,
    is_extension_array_dtype, is_integer, is_list_like, is_number,
    is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable,
    is_scalar, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
from pandas.core.dtypes.inference import is_hashable
from pandas.core.dtypes.missing import isna, notna

import pandas as pd
from pandas.core import config, missing, nanops
import pandas.core.algorithms as algos
from pandas.core.base import PandasObject, SelectionMixin
import pandas.core.common as com
from pandas.core.index import (
    Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index)
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import Period, PeriodIndex
import pandas.core.indexing as indexing
from pandas.core.internals import BlockManager
from pandas.core.ops import _align_method_FRAME

from pandas.io.formats.format import DataFrameFormatter, format_percentiles
from pandas.io.formats.printing import pprint_thing
from pandas.tseries.frequencies import to_offset

# goal is to be able to define the docs close to function, while still being
# able to share
_shared_docs = dict()
_shared_doc_kwargs = dict(
    axes='keywords for axes', klass='NDFrame',
    axes_single_arg='int or labels for object',
    args_transpose='axes to permute (int or label for object)',
    optional_by="""
        by : str or list of str
            Name or list of names to sort by""")

# sentinel value to use as kwarg in place of None when None has special meaning
# and needs to be distinguished from a user explicitly passing None.
sentinel = object()


def _single_replace(self, to_replace, method, inplace, limit):
    """
    Replaces values in a Series using the fill method specified when no
    replacement value is given in the replace method
    """
    if self.ndim != 1:
        raise TypeError('cannot replace {0} with method {1} on a {2}'
                        .format(to_replace, method, type(self).__name__))

    orig_dtype = self.dtype
    result = self if inplace else self.copy()
    fill_f = missing.get_fill_func(method)

    mask = missing.mask_missing(result.values, to_replace)
    values = fill_f(result.values, limit=limit, mask=mask)

    if values.dtype == orig_dtype and inplace:
        return

    result = pd.Series(values, index=self.index,
                       dtype=self.dtype).__finalize__(self)

    if inplace:
        self._update_inplace(result._data)
        return

    return result


class NDFrame(PandasObject, SelectionMixin):
    """
    N-dimensional analogue of DataFrame. Store multi-dimensional in a
    size-mutable, labeled data structure

    Parameters
    ----------
    data : BlockManager
    axes : list
    copy : boolean, default False
    """
    _internal_names = ['_data', '_cacher', '_item_cache', '_cache', '_is_copy',
                       '_subtyp', '_name', '_index', '_default_kind',
                       '_default_fill_value', '_metadata', '__array_struct__',
                       '__array_interface__']
    _internal_names_set = set(_internal_names)
    _accessors = frozenset()
    _deprecations = frozenset(['as_blocks', 'blocks',
                               'convert_objects', 'is_copy'])
    _metadata = []
    _is_copy = None

    # dummy attribute so that datetime.__eq__(Series/DataFrame) defers
    # by returning NotImplemented
    timetuple = None

    # ----------------------------------------------------------------------
    # Constructors

    def __init__(self, data, axes=None, copy=False, dtype=None,
                 fastpath=False):

        if not fastpath:
            if dtype is not None:
                data = data.astype(dtype)
            elif copy:
                data = data.copy()

            if axes is not None:
                for i, ax in enumerate(axes):
                    data = data.reindex_axis(ax, axis=i)

        object.__setattr__(self, '_is_copy', None)
        object.__setattr__(self, '_data', data)
        object.__setattr__(self, '_item_cache', {})

    def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
        """ passed a manager and a axes dict """
        for a, axe in axes.items():
            if axe is not None:
                mgr = mgr.reindex_axis(axe,
                                       axis=self._get_block_manager_axis(a),
                                       copy=False)

        # make a copy if explicitly requested
        if copy:
            mgr = mgr.copy()
        if dtype is not None:
            # avoid further copies if we can
            if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
                mgr = mgr.astype(dtype=dtype)
        return mgr

    # ----------------------------------------------------------------------

    @property
    def is_copy(self):
        """
        Return the copy.
        """
        warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
                      "in a future version.", FutureWarning, stacklevel=2)
        return self._is_copy

    @is_copy.setter
    def is_copy(self, msg):
        warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
                      "in a future version.", FutureWarning, stacklevel=2)
        self._is_copy = msg

    def _validate_dtype(self, dtype):
        """ validate the passed dtype """

        if dtype is not None:
            dtype = pandas_dtype(dtype)

            # a compound dtype
            if dtype.kind == 'V':
                raise NotImplementedError("compound dtypes are not implemented"
                                          " in the {0} constructor"
                                          .format(self.__class__.__name__))

        return dtype

    # ----------------------------------------------------------------------
    # Construction

    @property
    def _constructor(self):
        """Used when a manipulation result has the same dimensions as the
        original.
        """
        raise AbstractMethodError(self)

    @property
    def _constructor_sliced(self):
        """Used when a manipulation result has one lower dimension(s) as the
        original, such as DataFrame single columns slicing.
        """
        raise AbstractMethodError(self)

    @property
    def _constructor_expanddim(self):
        """Used when a manipulation result has one higher dimension as the
        original, such as Series.to_frame() and DataFrame.to_panel()
        """
        raise NotImplementedError

    # ----------------------------------------------------------------------
    # Axis

    @classmethod
    def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None,
                    slicers=None, axes_are_reversed=False, build_axes=True,
                    ns=None, docs=None):
        """Provide axes setup for the major PandasObjects.

        Parameters
        ----------
        axes : the names of the axes in order (lowest to highest)
        info_axis_num : the axis of the selector dimension (int)
        stat_axis_num : the number of axis for the default stats (int)
        aliases : other names for a single axis (dict)
        slicers : how axes slice to others (dict)
        axes_are_reversed : boolean whether to treat passed axes as
            reversed (DataFrame)
        build_axes : setup the axis properties (default True)
        """

        cls._AXIS_ORDERS = axes
        cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)}
        cls._AXIS_LEN = len(axes)
        cls._AXIS_ALIASES = aliases or dict()
        cls._AXIS_IALIASES = {v: k for k, v in cls._AXIS_ALIASES.items()}
        cls._AXIS_NAMES = dict(enumerate(axes))
        cls._AXIS_SLICEMAP = slicers or None
        cls._AXIS_REVERSED = axes_are_reversed

        # typ
        setattr(cls, '_typ', cls.__name__.lower())

        # indexing support
        cls._ix = None

        if info_axis is not None:
            cls._info_axis_number = info_axis
            cls._info_axis_name = axes[info_axis]

        if stat_axis is not None:
            cls._stat_axis_number = stat_axis
            cls._stat_axis_name = axes[stat_axis]

        # setup the actual axis
        if build_axes:

            def set_axis(a, i):
                setattr(cls, a, properties.AxisProperty(i, docs.get(a, a)))
                cls._internal_names_set.add(a)

            if axes_are_reversed:
                m = cls._AXIS_LEN - 1
                for i, a in cls._AXIS_NAMES.items():
                    set_axis(a, m - i)
            else:
                for i, a in cls._AXIS_NAMES.items():
                    set_axis(a, i)

        assert not isinstance(ns, dict)

    def _construct_axes_dict(self, axes=None, **kwargs):
        """Return an axes dictionary for myself."""
        d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
        d.update(kwargs)
        return d

    @staticmethod
    def _construct_axes_dict_from(self, axes, **kwargs):
        """Return an axes dictionary for the passed axes."""
        d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)}
        d.update(kwargs)
        return d

    def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
        """Return an axes dictionary for myself."""
        d = {self._AXIS_SLICEMAP[a]: self._get_axis(a)
             for a in (axes or self._AXIS_ORDERS)}
        d.update(kwargs)
        return d

    def _construct_axes_from_arguments(
            self, args, kwargs, require_all=False, sentinel=None):
        """Construct and returns axes if supplied in args/kwargs.

        If require_all, raise if all axis arguments are not supplied
        return a tuple of (axes, kwargs).

        sentinel specifies the default parameter when an axis is not
        supplied; useful to distinguish when a user explicitly passes None
        in scenarios where None has special meaning.
        """

        # construct the args
        args = list(args)
        for a in self._AXIS_ORDERS:

            # if we have an alias for this axis
            alias = self._AXIS_IALIASES.get(a)
            if alias is not None:
                if a in kwargs:
                    if alias in kwargs:
                        raise TypeError("arguments are mutually exclusive "
                                        "for [%s,%s]" % (a, alias))
                    continue
                if alias in kwargs:
                    kwargs[a] = kwargs.pop(alias)
                    continue

            # look for a argument by position
            if a not in kwargs:
                try:
                    kwargs[a] = args.pop(0)
                except IndexError:
                    if require_all:
                        raise TypeError("not enough/duplicate arguments "
                                        "specified!")

        axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS}
        return axes, kwargs

    @classmethod
    def _from_axes(cls, data, axes, **kwargs):
        # for construction from BlockManager
        if isinstance(data, BlockManager):
            return cls(data, **kwargs)
        else:
            if cls._AXIS_REVERSED:
                axes = axes[::-1]
            d = cls._construct_axes_dict_from(cls, axes, copy=False)
            d.update(kwargs)
            return cls(data, **d)

    @classmethod
    def _get_axis_number(cls, axis):
        axis = cls._AXIS_ALIASES.get(axis, axis)
        if is_integer(axis):
            if axis in cls._AXIS_NAMES:
                return axis
        else:
            try:
                return cls._AXIS_NUMBERS[axis]
            except KeyError:
                pass
        raise ValueError('No axis named {0} for object type {1}'
                         .format(axis, cls))

    @classmethod
    def _get_axis_name(cls, axis):
        axis = cls._AXIS_ALIASES.get(axis, axis)
        if isinstance(axis, string_types):
            if axis in cls._AXIS_NUMBERS:
                return axis
        else:
            try:
                return cls._AXIS_NAMES[axis]
            except KeyError:
                pass
        raise ValueError('No axis named {0} for object type {1}'
                         .format(axis, cls))

    def _get_axis(self, axis):
        name = self._get_axis_name(axis)
        return getattr(self, name)

    @classmethod
    def _get_block_manager_axis(cls, axis):
        """Map the axis to the block_manager axis."""
        axis = cls._get_axis_number(axis)
        if cls._AXIS_REVERSED:
            m = cls._AXIS_LEN - 1
            return m - axis
        return axis

    def _get_axis_resolvers(self, axis):
        # index or columns
        axis_index = getattr(self, axis)
        d = dict()
        prefix = axis[0]

        for i, name in enumerate(axis_index.names):
            if name is not None:
                key = level = name
            else:
                # prefix with 'i' or 'c' depending on the input axis
                # e.g., you must do ilevel_0 for the 0th level of an unnamed
                # multiiindex
                key = '{prefix}level_{i}'.format(prefix=prefix, i=i)
                level = i

            level_values = axis_index.get_level_values(level)
            s = level_values.to_series()
            s.index = axis_index
            d[key] = s

        # put the index/columns itself in the dict
        if isinstance(axis_index, MultiIndex):
            dindex = axis_index
        else:
            dindex = axis_index.to_series()

        d[axis] = dindex
        return d

    def _get_index_resolvers(self):
        d = {}
        for axis_name in self._AXIS_ORDERS:
            d.update(self._get_axis_resolvers(axis_name))
        return d

    @property
    def _info_axis(self):
        return getattr(self, self._info_axis_name)

    @property
    def _stat_axis(self):
        return getattr(self, self._stat_axis_name)

    @property
    def shape(self):
        """
        Return a tuple of axis dimensions
        """
        return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)

    @property
    def axes(self):
        """
        Return index label(s) of the internal NDFrame
        """
        # we do it this way because if we have reversed axes, then
        # the block manager shows then reversed
        return [self._get_axis(a) for a in self._AXIS_ORDERS]

    @property
    def ndim(self):
        """
        Return an int representing the number of axes / array dimensions.

        Return 1 if Series. Otherwise return 2 if DataFrame.

        See Also
        --------
        ndarray.ndim : Number of array dimensions.

        Examples
        --------
        >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
        >>> s.ndim
        1

        >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
        >>> df.ndim
        2
        """
        return self._data.ndim

    @property
    def size(self):
        """
        Return an int representing the number of elements in this object.

        Return the number of rows if Series. Otherwise return the number of
        rows times number of columns if DataFrame.

        See Also
        --------
        ndarray.size : Number of elements in the array.

        Examples
        --------
        >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
        >>> s.size
        3

        >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
        >>> df.size
        4
        """
        return np.prod(self.shape)

    @property
    def _selected_obj(self):
        """ internal compat with SelectionMixin """
        return self

    @property
    def _obj_with_exclusions(self):
        """ internal compat with SelectionMixin """
        return self

    def _expand_axes(self, key):
        new_axes = []
        for k, ax in zip(key, self.axes):
            if k not in ax:
                if type(k) != ax.dtype.type:
                    ax = ax.astype('O')
                new_axes.append(ax.insert(len(ax), k))
            else:
                new_axes.append(ax)

        return new_axes

    def set_axis(self, labels, axis=0, inplace=None):
        """
        Assign desired index to given axis.

        Indexes for column or row labels can be changed by assigning
        a list-like or Index.

        .. versionchanged:: 0.21.0

           The signature is now `labels` and `axis`, consistent with
           the rest of pandas API. Previously, the `axis` and `labels`
           arguments were respectively the first and second positional
           arguments.

        Parameters
        ----------
        labels : list-like, Index
            The values for the new index.

        axis : {0 or 'index', 1 or 'columns'}, default 0
            The axis to update. The value 0 identifies the rows, and 1
            identifies the columns.

        inplace : bool, default None
            Whether to return a new %(klass)s instance.

            .. warning::

               ``inplace=None`` currently falls back to to True, but in a
               future version, will default to False. Use inplace=True
               explicitly rather than relying on the default.

        Returns
        -------
        renamed : %(klass)s or None
            An object of same type as caller if inplace=False, None otherwise.

        See Also
        --------
        DataFrame.rename_axis : Alter the name of the index or columns.

        Examples
        --------
        **Series**

        >>> s = pd.Series([1, 2, 3])
        >>> s
        0    1
        1    2
        2    3
        dtype: int64

        >>> s.set_axis(['a', 'b', 'c'], axis=0, inplace=False)
        a    1
        b    2
        c    3
        dtype: int64

        The original object is not modified.

        >>> s
        0    1
        1    2
        2    3
        dtype: int64

        **DataFrame**

        >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

        Change the row labels.

        >>> df.set_axis(['a', 'b', 'c'], axis='index', inplace=False)
           A  B
        a  1  4
        b  2  5
        c  3  6

        Change the column labels.

        >>> df.set_axis(['I', 'II'], axis='columns', inplace=False)
           I  II
        0  1   4
        1  2   5
        2  3   6

        Now, update the labels inplace.

        >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True)
        >>> df
           i  ii
        0  1   4
        1  2   5
        2  3   6
        """
        if is_scalar(labels):
            warnings.warn(
                'set_axis now takes "labels" as first argument, and '
                '"axis" as named parameter. The old form, with "axis" as '
                'first parameter and \"labels\" as second, is still supported '
                'but will be deprecated in a future version of pandas.',
                FutureWarning, stacklevel=2)
            labels, axis = axis, labels

        if inplace is None:
            warnings.warn(
                'set_axis currently defaults to operating inplace.\nThis '
                'will change in a future version of pandas, use '
                'inplace=True to avoid this warning.',
                FutureWarning, stacklevel=2)
            inplace = True
        if inplace:
            setattr(self, self._get_axis_name(axis), labels)
        else:
            obj = self.copy()
            obj.set_axis(labels, axis=axis, inplace=True)
            return obj

    def _set_axis(self, axis, labels):
        self._data.set_axis(axis, labels)
        self._clear_item_cache()

    def transpose(self, *args, **kwargs):
        """
        Permute the dimensions of the %(klass)s

        Parameters
        ----------
        args : %(args_transpose)s
        copy : boolean, default False
            Make a copy of the underlying data. Mixed-dtype data will
            always result in a copy
        **kwargs
            Additional keyword arguments will be passed to the function.

        Returns
        -------
        y : same as input

        Examples
        --------
        >>> p.transpose(2, 0, 1)
        >>> p.transpose(2, 0, 1, copy=True)
        """

        # construct the args
        axes, kwargs = self._construct_axes_from_arguments(args, kwargs,
                                                           require_all=True)
        axes_names = tuple(self._get_axis_name(axes[a])
                           for a in self._AXIS_ORDERS)
        axes_numbers = tuple(self._get_axis_number(axes[a])
                             for a in self._AXIS_ORDERS)

        # we must have unique axes
        if len(axes) != len(set(axes)):
            raise ValueError('Must specify %s unique axes' % self._AXIS_LEN)

        new_axes = self._construct_axes_dict_from(self, [self._get_axis(x)
                                                         for x in axes_names])
        new_values = self.values.transpose(axes_numbers)
        if kwargs.pop('copy', None) or (len(args) and args[-1]):
            new_values = new_values.copy()

        nv.validate_transpose_for_generic(self, kwargs)
        return self._constructor(new_values, **new_axes).__finalize__(self)

    def swapaxes(self, axis1, axis2, copy=True):
        """
        Interchange axes and swap values axes appropriately.

        Returns
        -------
        y : same as input
        """
        i = self._get_axis_number(axis1)
        j = self._get_axis_number(axis2)

        if i == j:
            if copy:
                return self.copy()
            return self

        mapping = {i: j, j: i}

        new_axes = (self._get_axis(mapping.get(k, k))
                    for k in range(self._AXIS_LEN))
        new_values = self.values.swapaxes(i, j)
        if copy:
            new_values = new_values.copy()

        return self._constructor(new_values, *new_axes).__finalize__(self)

    def droplevel(self, level, axis=0):
        """
        Return DataFrame with requested index / column level(s) removed.

        .. versionadded:: 0.24.0

        Parameters
        ----------
        level : int, str, or list-like
            If a string is given, must be the name of a level
            If list-like, elements must be names or positional indexes
            of levels.

        axis : {0 or 'index', 1 or 'columns'}, default 0

        Returns
        -------
        DataFrame.droplevel()

        Examples
        --------
        >>> df = pd.DataFrame([
        ...     [1, 2, 3, 4],
        ...     [5, 6, 7, 8],
        ...     [9, 10, 11, 12]
        ... ]).set_index([0, 1]).rename_axis(['a', 'b'])

        >>> df.columns = pd.MultiIndex.from_tuples([
        ...    ('c', 'e'), ('d', 'f')
        ... ], names=['level_1', 'level_2'])

        >>> df
        level_1   c   d
        level_2   e   f
        a b
        1 2      3   4
        5 6      7   8
        9 10    11  12

        >>> df.droplevel('a')
        level_1   c   d
        level_2   e   f
        b
        2        3   4
        6        7   8
        10      11  12

        >>> df.droplevel('level2', axis=1)
        level_1   c   d
        a b
        1 2      3   4
        5 6      7   8
        9 10    11  12
        """
        labels = self._get_axis(axis)
        new_labels = labels.droplevel(level)
        result = self.set_axis(new_labels, axis=axis, inplace=False)
        return result

    def pop(self, item):
        """
        Return item and drop from frame. Raise KeyError if not found.

        Parameters
        ----------
        item : str
            Label of column to be popped.

        Returns
        -------
        Series

        Examples
        --------
        >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
        ...                    ('parrot', 'bird', 24.0),
        ...                    ('lion', 'mammal', 80.5),
        ...                    ('monkey','mammal', np.nan)],
        ...                   columns=('name', 'class', 'max_speed'))
        >>> df
             name   class  max_speed
        0  falcon    bird      389.0
        1  parrot    bird       24.0
        2    lion  mammal       80.5
        3  monkey  mammal        NaN

        >>> df.pop('class')
        0      bird
        1      bird
        2    mammal
        3    mammal
        Name: class, dtype: object

        >>> df
             name  max_speed
        0  falcon      389.0
        1  parrot       24.0
        2    lion       80.5
        3  monkey        NaN
        """
        result = self[item]
        del self[item]
        try:
            result._reset_cacher()
        except AttributeError:
            pass

        return result

    def squeeze(self, axis=None):
        """
        Squeeze 1 dimensional axis objects into scalars.

        Series or DataFrames with a single element are squeezed to a scalar.
        DataFrames with a single column or a single row are squeezed to a
        Series. Otherwise the object is unchanged.

        This method is most useful when you don't know if your
        object is a Series or DataFrame, but you do know it has just a single
        column. In that case you can safely call `squeeze` to ensure you have a
        Series.

        Parameters
        ----------
        axis : {0 or 'index', 1 or 'columns', None}, default None
            A specific axis to squeeze. By default, all length-1 axes are
            squeezed.

            .. versionadded:: 0.20.0

        Returns
        -------
        DataFrame, Series, or scalar
            The projection after squeezing `axis` or all the axes.

        See Also
        --------
        Series.iloc : Integer-location based indexing for selecting scalars.
        DataFrame.iloc : Integer-location based indexing for selecting Series.
        Series.to_frame : Inverse of DataFrame.squeeze for a
            single-column DataFrame.

        Examples
        --------
        >>> primes = pd.Series([2, 3, 5, 7])

        Slicing might produce a Series with a single value:

        >>> even_primes = primes[primes % 2 == 0]
        >>> even_primes
        0    2
        dtype: int64

        >>> even_primes.squeeze()
        2

        Squeezing objects with more than one value in every axis does nothing:

        >>> odd_primes = primes[primes % 2 == 1]
        >>> odd_primes
        1    3
        2    5
        3    7
        dtype: int64

        >>> odd_primes.squeeze()
        1    3
        2    5
        3    7
        dtype: int64

        Squeezing is even more effective when used with DataFrames.

        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
        >>> df
           a  b
        0  1  2
        1  3  4

        Slicing a single column will produce a DataFrame with the columns
        having only one value:

        >>> df_a = df[['a']]
        >>> df_a
           a
        0  1
        1  3

        So the columns can be squeezed down, resulting in a Series:

        >>> df_a.squeeze('columns')
        0    1
        1    3
        Name: a, dtype: int64

        Slicing a single row from a single column will produce a single
        scalar DataFrame:

        >>> df_0a = df.loc[df.index < 1, ['a']]
        >>> df_0a
           a
        0  1

        Squeezing the rows produces a single scalar Series:

        >>> df_0a.squeeze('rows')
        a    1
        Name: 0, dtype: int64

        Squeezing all axes wil project directly into a scalar:

        >>> df_0a.squeeze()
        1
        """
        axis = (self._AXIS_NAMES if axis is None else
                (self._get_axis_number(axis),))
        try:
            return self.iloc[
                tuple(0 if i in axis and len(a) == 1 else slice(None)
                      for i, a in enumerate(self.axes))]
        except Exception:
            return self

    def swaplevel(self, i=-2, j=-1, axis=0):
        """
        Swap levels i and j in a MultiIndex on a particular axis

        Parameters
        ----------
        i, j : int, str (can be mixed)
            Level of index to be swapped. Can pass level name as string.

        Returns
        -------
        swapped : same type as caller (new object)

        .. versionchanged:: 0.18.1

           The indexes ``i`` and ``j`` are now optional, and default to
           the two innermost levels of the index.
        """
        axis = self._get_axis_number(axis)
        result = self.copy()
        labels = result._data.axes[axis]
        result._data.set_axis(axis, labels.swaplevel(i, j))
        return result

    # ----------------------------------------------------------------------
    # Rename

    def rename(self, *args, **kwargs):
        """
        Alter axes input function or functions. Function / dict values must be
        unique (1-to-1). Labels not contained in a dict / Series will be left
        as-is. Extra labels listed don't throw an error. Alternatively, change
        ``Series.name`` with a scalar value (Series only).

        Parameters
        ----------
        %(axes)s : scalar, list-like, dict-like or function, optional
            Scalar or list-like will alter the ``Series.name`` attribute,
            and raise on DataFrame or Panel.
            dict-like or functions are transformations to apply to
            that axis' values
        copy : bool, default True
            Also copy underlying data.
        inplace : bool, default False
            Whether to return a new %(klass)s. If True then value of copy is
            ignored.
        level : int or level name, default None
            In case of a MultiIndex, only rename labels in the specified
            level.
        errors : {'ignore', 'raise'}, default 'ignore'
            If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`,
            or `columns` contains labels that are not present in the Index
            being transformed.
            If 'ignore', existing keys will be renamed and extra keys will be
            ignored.

        Returns
        -------
        renamed : %(klass)s (new object)

        Raises
        ------
        KeyError
            If any of the labels is not found in the selected axis and
            "errors='raise'".

        See Also
        --------
        NDFrame.rename_axis

        Examples
        --------

        >>> s = pd.Series([1, 2, 3])
        >>> s
        0    1
        1    2
        2    3
        dtype: int64
        >>> s.rename("my_name") # scalar, changes Series.name
        0    1
        1    2
        2    3
        Name: my_name, dtype: int64
        >>> s.rename(lambda x: x ** 2)  # function, changes labels
        0    1
        1    2
        4    3
        dtype: int64
        >>> s.rename({1: 3, 2: 5})  # mapping, changes labels
        0    1
        3    2
        5    3
        dtype: int64

        Since ``DataFrame`` doesn't have a ``.name`` attribute,
        only mapping-type arguments are allowed.

        >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
        >>> df.rename(2)
        Traceback (most recent call last):
        ...
        TypeError: 'int' object is not callable

        ``DataFrame.rename`` supports two calling conventions

        * ``(index=index_mapper, columns=columns_mapper, ...)``
        * ``(mapper, axis={'index', 'columns'}, ...)``

        We *highly* recommend using keyword arguments to clarify your
        intent.

        >>> df.rename(index=str, columns={"A": "a", "B": "c"})
           a  c
        0  1  4
        1  2  5
        2  3  6

        >>> df.rename(index=str, columns={"A": "a", "C": "c"})
           a  B
        0  1  4
        1  2  5
        2  3  6

        Using axis-style parameters

        >>> df.rename(str.lower, axis='columns')
           a  b
        0  1  4
        1  2  5
        2  3  6

        >>> df.rename({1: 2, 2: 4}, axis='index')
           A  B
        0  1  4
        2  2  5
        4  3  6

        See the :ref:`user guide <basics.rename>` for more.
        """
        axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
        copy = kwargs.pop('copy', True)
        inplace = kwargs.pop('inplace', False)
        level = kwargs.pop('level', None)
        axis = kwargs.pop('axis', None)
        errors = kwargs.pop('errors', 'ignore')
        if axis is not None:
            # Validate the axis
            self._get_axis_number(axis)

        if kwargs:
            raise TypeError('rename() got an unexpected keyword '
                            'argument "{0}"'.format(list(kwargs.keys())[0]))

        if com.count_not_none(*axes.values()) == 0:
            raise TypeError('must pass an index to rename')

        self._consolidate_inplace()
        result = self if inplace else self.copy(deep=copy)

        # start in the axis order to eliminate too many copies
        for axis in lrange(self._AXIS_LEN):
            v = axes.get(self._AXIS_NAMES[axis])
            if v is None:
                continue
            f = com._get_rename_function(v)
            baxis = self._get_block_manager_axis(axis)
            if level is not None:
                level = self.axes[axis]._get_level_number(level)

            # GH 13473
            if not callable(v):
                indexer = self.axes[axis].get_indexer_for(v)
                if errors == 'raise' and len(indexer[indexer == -1]):
                    missing_labels = [label for index, label in enumerate(v)
                                      if indexer[index] == -1]
                    raise KeyError('{} not found in axis'
                                   .format(missing_labels))

            result._data = result._data.rename_axis(f, axis=baxis, copy=copy,
                                                    level=level)
            result._clear_item_cache()

        if inplace:
            self._update_inplace(result._data)
        else:
            return result.__finalize__(self)

    @rewrite_axis_style_signature('mapper', [('copy', True),
                                             ('inplace', False)])
    def rename_axis(self, mapper=sentinel, **kwargs):
        """
        Set the name of the axis for the index or columns.

        Parameters
        ----------
        mapper : scalar, list-like, optional
            Value to set the axis name attribute.
        index, columns : scalar, list-like, dict-like or function, optional
            A scalar, list-like, dict-like or functions transformations to
            apply to that axis' values.

            Use either ``mapper`` and ``axis`` to
            specify the axis to target with ``mapper``, or ``index``
            and/or ``columns``.

            .. versionchanged:: 0.24.0

        axis : {0 or 'index', 1 or 'columns'}, default 0
            The axis to rename.
        copy : bool, default True
            Also copy underlying data.
        inplace : bool, default False
            Modifies the object directly, instead of creating a new Series
            or DataFrame.

        Returns
        -------
        Series, DataFrame, or None
            The same type as the caller or None if `inplace` is True.

        See Also
        --------
        Series.rename : Alter Series index labels or name.
        DataFrame.rename : Alter DataFrame index labels or name.
        Index.rename : Set new names on index.

        Notes
        -----
        Prior to version 0.21.0, ``rename_axis`` could also be used to change
        the axis *labels* by passing a mapping or scalar. This behavior is
        deprecated and will be removed in a future version. Use ``rename``
        instead.

        ``DataFrame.rename_axis`` supports two calling conventions

        * ``(index=index_mapper, columns=columns_mapper, ...)``
        * ``(mapper, axis={'index', 'columns'}, ...)``

        The first calling convention will only modify the names of
        the index and/or the names of the Index object that is the columns.
        In this case, the parameter ``copy`` is ignored.

        The second calling convention will modify the names of the
        the corresponding index if mapper is a list or a scalar.
        However, if mapper is dict-like or a function, it will use the
        deprecated behavior of modifying the axis *labels*.

        We *highly* recommend using keyword arguments to clarify your
        intent.

        Examples
        --------
        **Series**

        >>> s = pd.Series(["dog", "cat", "monkey"])
        >>> s
        0       dog
        1       cat
        2    monkey
        dtype: object
        >>> s.rename_axis("animal")
        animal
        0    dog
        1    cat
        2    monkey
        dtype: object

        **DataFrame**

        >>> df = pd.DataFrame({"num_legs": [4, 4, 2],
        ...                    "num_arms": [0, 0, 2]},
        ...                   ["dog", "cat", "monkey"])
        >>> df
                num_legs  num_arms
        dog            4         0
        cat            4         0
        monkey         2         2
        >>> df = df.rename_axis("animal")
        >>> df
                num_legs  num_arms
        animal
        dog            4         0
        cat            4         0
        monkey         2         2
        >>> df = df.rename_axis("limbs", axis="columns")
        >>> df
        limbs   num_legs  num_arms
        animal
        dog            4         0
        cat            4         0
        monkey         2         2

        **MultiIndex**

        >>> df.index = pd.MultiIndex.from_product([['mammal'],
        ...                                        ['dog', 'cat', 'monkey']],
        ...                                       names=['type', 'name'])
        >>> df
        limbs          num_legs  num_arms
        type   name
        mammal dog            4         0
               cat            4         0
               monkey         2         2

        >>> df.rename_axis(index={'type': 'class'})
        limbs          num_legs  num_arms
        class  name
        mammal dog            4         0
               cat            4         0
               monkey         2         2

        >>> df.rename_axis(columns=str.upper)
        LIMBS          num_legs  num_arms
        type   name
        mammal dog            4         0
               cat            4         0
               monkey         2         2
        """
        axes, kwargs = self._construct_axes_from_arguments(
            (), kwargs, sentinel=sentinel)
        copy = kwargs.pop('copy', True)
        inplace = kwargs.pop('inplace', False)
        axis = kwargs.pop('axis', 0)
        if axis is not None:
            axis = self._get_axis_number(axis)

        if kwargs:
            raise TypeError('rename_axis() got an unexpected keyword '
                            'argument "{0}"'.format(list(kwargs.keys())[0]))

        inplace = validate_bool_kwarg(inplace, 'inplace')

        if (mapper is not sentinel):
            # Use v0.23 behavior if a scalar or list
            non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not
                                               is_dict_like(mapper))
            if non_mapper:
                return self._set_axis_name(mapper, axis=axis, inplace=inplace)
            else:
                # Deprecated (v0.21) behavior is if mapper is specified,
                # and not a list or scalar, then call rename
                msg = ("Using 'rename_axis' to alter labels is deprecated. "
                       "Use '.rename' instead")
                warnings.warn(msg, FutureWarning, stacklevel=3)
                axis = self._get_axis_name(axis)
                d = {'copy': copy, 'inplace': inplace}
                d[axis] = mapper
                return self.rename(**d)
        else:
            # Use new behavior.  Means that index and/or columns
            # is specified
            result = self if inplace else self.copy(deep=copy)

            for axis in lrange(self._AXIS_LEN):
                v = axes.get(self._AXIS_NAMES[axis])
                if v is sentinel:
                    continue
                non_mapper = is_scalar(v) or (is_list_like(v) and not
                                              is_dict_like(v))
                if non_mapper:
                    newnames = v
                else:
                    f = com._get_rename_function(v)
                    curnames = self._get_axis(axis).names
                    newnames = [f(name) for name in curnames]
                result._set_axis_name(newnames, axis=axis,
                                      inplace=True)
            if not inplace:
                return result

    def _set_axis_name(self, name, axis=0, inplace=False):
        """
        Set the name(s) of the axis.

        Parameters
        ----------
        name : str or list of str
            Name(s) to set.
        axis : {0 or 'index', 1 or 'columns'}, default 0
            The axis to set the label. The value 0 or 'index' specifies index,
            and the value 1 or 'columns' specifies columns.
        inplace : bool, default False
            If `True`, do operation inplace and return None.

            .. versionadded:: 0.21.0

        Returns
        -------
        Series, DataFrame, or None
            The same type as the caller or `None` if `inplace` is `True`.

        See Also
        --------
        DataFrame.rename : Alter the axis labels of :class:`DataFrame`.
        Series.rename : Alter the index labels or set the index name
            of :class:`Series`.
        Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.

        Examples
        --------
        >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},
        ...                   ["dog", "cat", "monkey"])
        >>> df
                num_legs
        dog            4
        cat            4
        monkey         2
        >>> df._set_axis_name("animal")
                num_legs
        animal
        dog            4
        cat            4
        monkey         2
        >>> df.index = pd.MultiIndex.from_product(
        ...                [["mammal"], ['dog', 'cat', 'monkey']])
        >>> df._set_axis_name(["type", "name"])
                       legs
        type   name
        mammal dog        4
               cat        4
               monkey     2
        """
        axis = self._get_axis_number(axis)
        idx = self._get_axis(axis).set_names(name)

        inplace = validate_bool_kwarg(inplace, 'inplace')
        renamed = self if inplace else self.copy()
        renamed.set_axis(idx, axis=axis, inplace=True)
        if not inplace:
            return renamed

    # ----------------------------------------------------------------------
    # Comparison Methods

    def _indexed_same(self, other):
        return all(self._get_axis(a).equals(other._get_axis(a))
                   for a in self._AXIS_ORDERS)

    def equals(self, other):
        """
        Test whether two objects contain the same elements.

        This function allows two Series or DataFrames to be compared against
        each other to see if they have the same shape and elements. NaNs in
        the same location are considered equal. The column headers do not
        need to have the same type, but the elements within the columns must
        be the same dtype.

        Parameters
        ----------
        other : Series or DataFrame
            The other Series or DataFrame to be compared with the first.

        Returns
        -------
        bool
            True if all elements are the same in both objects, False
            otherwise.

        See Also
        --------
        Series.eq : Compare two Series objects of the same length
            and return a Series where each element is True if the element
            in each Series is equal, False otherwise.
        DataFrame.eq : Compare two DataFrame objects of the same shape and
            return a DataFrame where each element is True if the respective
            element in each DataFrame is equal, False otherwise.
        assert_series_equal : Return True if left and right Series are equal,
            False otherwise.
        assert_frame_equal : Return True if left and right DataFrames are
            equal, False otherwise.
        numpy.array_equal : Return True if two arrays have the same shape
            and elements, False otherwise.

        Notes
        -----
        This function requires that the elements have the same dtype as their
        respective elements in the other Series or DataFrame. However, the
        column labels do not need to have the same type, as long as they are
        still considered equal.

        Examples
        --------
        >>> df = pd.DataFrame({1: [10], 2: [20]})
        >>> df
            1   2
        0  10  20

        DataFrames df and exactly_equal have the same types and values for
        their elements and column labels, which will return True.

        >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})
        >>> exactly_equal
            1   2
        0  10  20
        >>> df.equals(exactly_equal)
        True

        DataFrames df and different_column_type have the same element
        types and values, but have different types for the column labels,
        which will still return True.

        >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})
        >>> different_column_type
           1.0  2.0
        0   10   20
        >>> df.equals(different_column_type)
        True

        DataFrames df and different_data_type have different types for the
        same values for their elements, and will return False even though
        their column labels are the same values and types.

        >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})
        >>> different_data_type
              1     2
        0  10.0  20.0
        >>> df.equals(different_data_type)
        False
        """
        if not isinstance(other, self._constructor):
            return False
        return self._data.equals(other._data)

    # -------------------------------------------------------------------------
    # Unary Methods

    def __neg__(self):
        values = com.values_from_object(self)
        if is_bool_dtype(values):
            arr = operator.inv(values)
        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
                or is_object_dtype(values)):
            arr = operator.neg(values)
        else:
            raise TypeError("Unary negative expects numeric dtype, not {}"
                            .format(values.dtype))
        return self.__array_wrap__(arr)

    def __pos__(self):
        values = com.values_from_object(self)
        if (is_bool_dtype(values) or is_period_arraylike(values)):
            arr = values
        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
                or is_object_dtype(values)):
            arr = operator.pos(values)
        else:
            raise TypeError("Unary plus expects numeric dtype, not {}"
                            .format(values.dtype))
        return self.__array_wrap__(arr)

    def __invert__(self):
        try:
            arr = operator.inv(com.values_from_object(self))
            return self.__array_wrap__(arr)
        except Exception:

            # inv fails with 0 len
            if not np.prod(self.shape):
                return self

            raise

    def __nonzero__(self):
        raise ValueError("The truth value of a {0} is ambiguous. "
                         "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
                         .format(self.__class__.__name__))

    __bool__ = __nonzero__

    def bool(self):
        """
        Return the bool of a single element PandasObject.

        This must be a boolean scalar value, either True or False.  Raise a
        ValueError if the PandasObject does not have exactly 1 element, or that
        element is not boolean
        """
        v = self.squeeze()
        if isinstance(v, (bool, np.bool_)):
            return bool(v)
        elif is_scalar(v):
            raise ValueError("bool cannot act on a non-boolean single element "
                             "{0}".format(self.__class__.__name__))

        self.__nonzero__()

    def __abs__(self):
        return self.abs()

    def __round__(self, decimals=0):
        return self.round(decimals)

    # -------------------------------------------------------------------------
    # Label or Level Combination Helpers
    #
    # A collection of helper methods for DataFrame/Series operations that
    # accept a combination of column/index labels and levels.  All such
    # operations should utilize/extend these methods when possible so that we
    # have consistent precedence and validation logic throughout the library.

    def _is_level_reference(self, key, axis=0):
        """
        Test whethe…
Summary ✨

This code defines various functions for creating and manipulating pandas data structures, including NDFrame (a multi-dimensional labeled array) and Series (a one-dimensional labeled array). It sets up indexing and creates function names with docstrings, which will be used to generate documentation for these functions. The output appears to be a collection of function definitions and indexers for use in pandas.
Tech Fingerprint

Alerts (11)

'type(' Use isinstance() for type checking instead of type()
76 511
'def' Ensure functions have docstrings for documentation
173 263
'isinstance(' Overuse may indicate design issues; consider polymorphism
275 340 366 412
'list(' Avoid unnecessary list conversions; use generators where possible
310
Complexity hotspot; lines 1178 to 1179 (total complexity: 4)
1178 1179