/pandas/tseries/index.py
Python | 2073 lines | 1827 code | 122 blank | 124 comment | 195 complexity | dd7e81ae1a7f2d2209998964c8866010 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- # pylint: disable=E1101
- import operator
- from datetime import time, datetime
- from datetime import timedelta
- import numpy as np
- from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
- _values_from_object, _maybe_box,
- ABCSeries)
- from pandas.core.index import Index, Int64Index, Float64Index
- import pandas.compat as compat
- from pandas.compat import u
- from pandas.tseries.frequencies import (
- infer_freq, to_offset, get_period_alias,
- Resolution, get_reso_string, _tz_convert_with_transitions)
- from pandas.core.base import DatetimeIndexOpsMixin
- from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
- from pandas.tseries.tools import parse_time_string, normalize_date
- from pandas.util.decorators import cache_readonly
- import pandas.core.common as com
- import pandas.tseries.offsets as offsets
- import pandas.tseries.tools as tools
- from pandas.lib import Timestamp
- import pandas.lib as lib
- import pandas.tslib as tslib
- import pandas.algos as _algos
- import pandas.index as _index
- def _utc():
- import pytz
- return pytz.utc
- # -------- some conversion wrapper functions
- def _field_accessor(name, field, docstring=None):
- def f(self):
- values = self.asi8
- if self.tz is not None:
- utc = _utc()
- if self.tz is not utc:
- values = self._local_timestamps()
- if field in ['is_month_start', 'is_month_end',
- 'is_quarter_start', 'is_quarter_end',
- 'is_year_start', 'is_year_end']:
- month_kw = self.freq.kwds.get('startingMonth', self.freq.kwds.get('month', 12)) if self.freq else 12
- return tslib.get_start_end_field(values, field, self.freqstr, month_kw)
- else:
- return tslib.get_date_field(values, field)
- f.__name__ = name
- f.__doc__ = docstring
- return property(f)
- def _join_i8_wrapper(joinf, with_indexers=True):
- @staticmethod
- def wrapper(left, right):
- if isinstance(left, (np.ndarray, ABCSeries)):
- left = left.view('i8', type=np.ndarray)
- if isinstance(right, (np.ndarray, ABCSeries)):
- right = right.view('i8', type=np.ndarray)
- results = joinf(left, right)
- if with_indexers:
- join_index, left_indexer, right_indexer = results
- join_index = join_index.view('M8[ns]')
- return join_index, left_indexer, right_indexer
- return results
- return wrapper
- def _dt_index_cmp(opname, nat_result=False):
- """
- Wrap comparison operations to convert datetime-like to datetime64
- """
- def wrapper(self, other):
- func = getattr(super(DatetimeIndex, self), opname)
- if isinstance(other, datetime) or isinstance(other, compat.string_types):
- other = _to_m8(other, tz=self.tz)
- result = func(other)
- if com.isnull(other):
- result.fill(nat_result)
- else:
- if isinstance(other, list):
- other = DatetimeIndex(other)
- elif not isinstance(other, (np.ndarray, ABCSeries)):
- other = _ensure_datetime64(other)
- result = func(other)
- if isinstance(other, Index):
- o_mask = other.values.view('i8') == tslib.iNaT
- else:
- o_mask = other.view('i8') == tslib.iNaT
- if o_mask.any():
- result[o_mask] = nat_result
- mask = self.asi8 == tslib.iNaT
- if mask.any():
- result[mask] = nat_result
- return result.view(np.ndarray)
- return wrapper
- def _ensure_datetime64(other):
- if isinstance(other, np.datetime64):
- return other
- raise TypeError('%s type object %s' % (type(other), str(other)))
- _midnight = time(0, 0)
- class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index):
- """
- Immutable ndarray of datetime64 data, represented internally as int64, and
- which can be boxed to Timestamp objects that are subclasses of datetime and
- carry metadata such as frequency information.
- Parameters
- ----------
- data : array-like (1-dimensional), optional
- Optional datetime-like data to construct index with
- copy : bool
- Make a copy of input ndarray
- freq : string or pandas offset object, optional
- One of pandas date offset strings or corresponding objects
- start : starting value, datetime-like, optional
- If data is None, start is used as the start point in generating regular
- timestamp data.
- periods : int, optional, > 0
- Number of periods to generate, if generating index. Takes precedence
- over end argument
- end : end time, datetime-like, optional
- If periods is none, generated index will extend to first conforming
- time on or just past end argument
- closed : string or None, default None
- Make the interval closed with respect to the given frequency to
- the 'left', 'right', or both sides (None)
- name : object
- Name to be stored in the index
- """
- _join_precedence = 10
- _inner_indexer = _join_i8_wrapper(_algos.inner_join_indexer_int64)
- _outer_indexer = _join_i8_wrapper(_algos.outer_join_indexer_int64)
- _left_indexer = _join_i8_wrapper(_algos.left_join_indexer_int64)
- _left_indexer_unique = _join_i8_wrapper(
- _algos.left_join_indexer_unique_int64, with_indexers=False)
- _arrmap = None
- __eq__ = _dt_index_cmp('__eq__')
- __ne__ = _dt_index_cmp('__ne__', nat_result=True)
- __lt__ = _dt_index_cmp('__lt__')
- __gt__ = _dt_index_cmp('__gt__')
- __le__ = _dt_index_cmp('__le__')
- __ge__ = _dt_index_cmp('__ge__')
- # structured array cache for datetime fields
- _sarr_cache = None
- _engine_type = _index.DatetimeEngine
- tz = None
- offset = None
- _comparables = ['name','freqstr','tz']
- _allow_datetime_index_ops = True
- def __new__(cls, data=None,
- freq=None, start=None, end=None, periods=None,
- copy=False, name=None, tz=None,
- verify_integrity=True, normalize=False,
- closed=None, **kwds):
- dayfirst = kwds.pop('dayfirst', None)
- yearfirst = kwds.pop('yearfirst', None)
- infer_dst = kwds.pop('infer_dst', False)
- freq_infer = False
- if not isinstance(freq, DateOffset):
- # if a passed freq is None, don't infer automatically
- if freq != 'infer':
- freq = to_offset(freq)
- else:
- freq_infer = True
- freq = None
- if periods is not None:
- if com.is_float(periods):
- periods = int(periods)
- elif not com.is_integer(periods):
- raise ValueError('Periods must be a number, got %s' %
- str(periods))
- if data is None and freq is None:
- raise ValueError("Must provide freq argument if no data is "
- "supplied")
- if data is None:
- return cls._generate(start, end, periods, name, freq,
- tz=tz, normalize=normalize, closed=closed,
- infer_dst=infer_dst)
- if not isinstance(data, (np.ndarray, ABCSeries)):
- if np.isscalar(data):
- raise ValueError('DatetimeIndex() must be called with a '
- 'collection of some kind, %s was passed'
- % repr(data))
- # other iterable of some kind
- if not isinstance(data, (list, tuple)):
- data = list(data)
- data = np.asarray(data, dtype='O')
- # try a few ways to make it datetime64
- if lib.is_string_array(data):
- data = _str_to_dt_array(data, freq, dayfirst=dayfirst,
- yearfirst=yearfirst)
- else:
- data = tools.to_datetime(data, errors='raise')
- data.offset = freq
- if isinstance(data, DatetimeIndex):
- if name is not None:
- data.name = name
- if tz is not None:
- return data.tz_localize(tz, infer_dst=infer_dst)
- return data
- if issubclass(data.dtype.type, compat.string_types):
- data = _str_to_dt_array(data, freq, dayfirst=dayfirst,
- yearfirst=yearfirst)
- if issubclass(data.dtype.type, np.datetime64):
- if isinstance(data, ABCSeries):
- data = data.values
- if isinstance(data, DatetimeIndex):
- if tz is None:
- tz = data.tz
- subarr = data.values
- if freq is None:
- freq = data.offset
- verify_integrity = False
- else:
- if data.dtype != _NS_DTYPE:
- subarr = tslib.cast_to_nanoseconds(data)
- else:
- subarr = data
- elif data.dtype == _INT64_DTYPE:
- if isinstance(data, Int64Index):
- raise TypeError('cannot convert Int64Index->DatetimeIndex')
- if copy:
- subarr = np.asarray(data, dtype=_NS_DTYPE)
- else:
- subarr = data.view(_NS_DTYPE)
- else:
- if isinstance(data, ABCSeries):
- values = data.values
- else:
- values = data
- if lib.is_string_array(values):
- subarr = _str_to_dt_array(values, freq, dayfirst=dayfirst,
- yearfirst=yearfirst)
- else:
- try:
- subarr = tools.to_datetime(data, box=False)
- # make sure that we have a index/ndarray like (and not a Series)
- if isinstance(subarr, ABCSeries):
- subarr = subarr.values
- except ValueError:
- # tz aware
- subarr = tools.to_datetime(data, box=False, utc=True)
- if not np.issubdtype(subarr.dtype, np.datetime64):
- raise ValueError('Unable to convert %s to datetime dtype'
- % str(data))
- if isinstance(subarr, DatetimeIndex):
- if tz is None:
- tz = subarr.tz
- else:
- if tz is not None:
- tz = tools._maybe_get_tz(tz)
- if (not isinstance(data, DatetimeIndex) or
- getattr(data, 'tz', None) is None):
- # Convert tz-naive to UTC
- ints = subarr.view('i8')
- subarr = tslib.tz_localize_to_utc(ints, tz,
- infer_dst=infer_dst)
- subarr = subarr.view(_NS_DTYPE)
- subarr = subarr.view(cls)
- subarr.name = name
- subarr.offset = freq
- subarr.tz = tz
- if verify_integrity and len(subarr) > 0:
- if freq is not None and not freq_infer:
- inferred = subarr.inferred_freq
- if inferred != freq.freqstr:
- on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz)
- if not np.array_equal(subarr.asi8, on_freq.asi8):
- raise ValueError('Inferred frequency {0} from passed dates does not'
- 'conform to passed frequency {1}'.format(inferred, freq.freqstr))
- if freq_infer:
- inferred = subarr.inferred_freq
- if inferred:
- subarr.offset = to_offset(inferred)
- return subarr
- @classmethod
- def _generate(cls, start, end, periods, name, offset,
- tz=None, normalize=False, infer_dst=False, closed=None):
- if com._count_not_none(start, end, periods) != 2:
- raise ValueError('Must specify two of start, end, or periods')
- _normalized = True
- if start is not None:
- start = Timestamp(start)
- if end is not None:
- end = Timestamp(end)
- left_closed = False
- right_closed = False
- if start is None and end is None:
- if closed is not None:
- raise ValueError("Closed has to be None if not both of start"
- "and end are defined")
- if closed is None:
- left_closed = True
- right_closed = True
- elif closed == "left":
- left_closed = True
- elif closed == "right":
- right_closed = True
- else:
- raise ValueError("Closed has to be either 'left', 'right' or None")
- try:
- inferred_tz = tools._infer_tzinfo(start, end)
- except:
- raise ValueError('Start and end cannot both be tz-aware with '
- 'different timezones')
- inferred_tz = tools._maybe_get_tz(inferred_tz)
- # these may need to be localized
- tz = tools._maybe_get_tz(tz, start or end)
- if tz is not None and inferred_tz is not None:
- if not inferred_tz == tz:
- raise AssertionError("Inferred time zone not equal to passed "
- "time zone")
- elif inferred_tz is not None:
- tz = inferred_tz
- if start is not None:
- if normalize:
- start = normalize_date(start)
- _normalized = True
- else:
- _normalized = _normalized and start.time() == _midnight
- if end is not None:
- if normalize:
- end = normalize_date(end)
- _normalized = True
- else:
- _normalized = _normalized and end.time() == _midnight
- if hasattr(offset, 'delta') and offset != offsets.Day():
- if inferred_tz is None and tz is not None:
- # naive dates
- if start is not None and start.tz is None:
- start = start.tz_localize(tz)
- if end is not None and end.tz is None:
- end = end.tz_localize(tz)
- if start and end:
- if start.tz is None and end.tz is not None:
- start = start.tz_localize(end.tz)
- if end.tz is None and start.tz is not None:
- end = end.tz_localize(start.tz)
- if _use_cached_range(offset, _normalized, start, end):
- index = cls._cached_range(start, end, periods=periods,
- offset=offset, name=name)
- else:
- index = _generate_regular_range(start, end, periods, offset)
- else:
- if inferred_tz is None and tz is not None:
- # naive dates
- if start is not None and start.tz is not None:
- start = start.replace(tzinfo=None)
- if end is not None and end.tz is not None:
- end = end.replace(tzinfo=None)
- if start and end:
- if start.tz is None and end.tz is not None:
- end = end.replace(tzinfo=None)
- if end.tz is None and start.tz is not None:
- start = start.replace(tzinfo=None)
- if _use_cached_range(offset, _normalized, start, end):
- index = cls._cached_range(start, end, periods=periods,
- offset=offset, name=name)
- else:
- index = _generate_regular_range(start, end, periods, offset)
- if tz is not None and getattr(index, 'tz', None) is None:
- index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz,
- infer_dst=infer_dst)
- index = index.view(_NS_DTYPE)
- index = index.view(cls)
- index.name = name
- index.offset = offset
- index.tz = tz
- if not left_closed:
- index = index[1:]
- if not right_closed:
- index = index[:-1]
- return index
- @property
- def _box_func(self):
- return lambda x: Timestamp(x, offset=self.offset, tz=self.tz)
- def _local_timestamps(self):
- utc = _utc()
- if self.is_monotonic:
- return tslib.tz_convert(self.asi8, utc, self.tz)
- else:
- values = self.asi8
- indexer = values.argsort()
- result = tslib.tz_convert(values.take(indexer), utc, self.tz)
- n = len(indexer)
- reverse = np.empty(n, dtype=np.int_)
- reverse.put(indexer, np.arange(n))
- return result.take(reverse)
- @classmethod
- def _simple_new(cls, values, name, freq=None, tz=None):
- if values.dtype != _NS_DTYPE:
- values = com._ensure_int64(values).view(_NS_DTYPE)
- result = values.view(cls)
- result.name = name
- result.offset = freq
- result.tz = tools._maybe_get_tz(tz)
- return result
- @property
- def tzinfo(self):
- """
- Alias for tz attribute
- """
- return self.tz
- @classmethod
- def _cached_range(cls, start=None, end=None, periods=None, offset=None,
- name=None):
- if start is None and end is None:
- # I somewhat believe this should never be raised externally and therefore
- # should be a `PandasError` but whatever...
- raise TypeError('Must specify either start or end.')
- if start is not None:
- start = Timestamp(start)
- if end is not None:
- end = Timestamp(end)
- if (start is None or end is None) and periods is None:
- raise TypeError('Must either specify period or provide both start and end.')
- if offset is None:
- # This can't happen with external-facing code, therefore PandasError
- raise TypeError('Must provide offset.')
- drc = _daterange_cache
- if offset not in _daterange_cache:
- xdr = generate_range(offset=offset, start=_CACHE_START,
- end=_CACHE_END)
- arr = tools.to_datetime(list(xdr), box=False)
- cachedRange = arr.view(DatetimeIndex)
- cachedRange.offset = offset
- cachedRange.tz = None
- cachedRange.name = None
- drc[offset] = cachedRange
- else:
- cachedRange = drc[offset]
- if start is None:
- if not isinstance(end, Timestamp):
- raise AssertionError('end must be an instance of Timestamp')
- end = offset.rollback(end)
- endLoc = cachedRange.get_loc(end) + 1
- startLoc = endLoc - periods
- elif end is None:
- if not isinstance(start, Timestamp):
- raise AssertionError('start must be an instance of Timestamp')
- start = offset.rollforward(start)
- startLoc = cachedRange.get_loc(start)
- endLoc = startLoc + periods
- else:
- if not offset.onOffset(start):
- start = offset.rollforward(start)
- if not offset.onOffset(end):
- end = offset.rollback(end)
- startLoc = cachedRange.get_loc(start)
- endLoc = cachedRange.get_loc(end) + 1
- indexSlice = cachedRange[startLoc:endLoc]
- indexSlice.name = name
- indexSlice.offset = offset
- return indexSlice
- def _mpl_repr(self):
- # how to represent ourselves to matplotlib
- return tslib.ints_to_pydatetime(self.asi8, self.tz)
- _na_value = tslib.NaT
- """The expected NA value to use with this index."""
- @cache_readonly
- def _is_dates_only(self):
- from pandas.core.format import _is_dates_only
- return _is_dates_only(self.values)
- @property
- def _formatter_func(self):
- from pandas.core.format import _get_format_datetime64
- formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
- return lambda x: formatter(x, tz=self.tz)
- def __reduce__(self):
- """Necessary for making this object picklable"""
- object_state = list(np.ndarray.__reduce__(self))
- subclass_state = self.name, self.offset, self.tz
- object_state[2] = (object_state[2], subclass_state)
- return tuple(object_state)
- def __setstate__(self, state):
- """Necessary for making this object picklable"""
- if len(state) == 2:
- nd_state, own_state = state
- self.name = own_state[0]
- self.offset = own_state[1]
- self.tz = own_state[2]
- np.ndarray.__setstate__(self, nd_state)
- # provide numpy < 1.7 compat
- if nd_state[2] == 'M8[us]':
- new_state = np.ndarray.__reduce__(self.values.astype('M8[ns]'))
- np.ndarray.__setstate__(self, new_state[2])
- else: # pragma: no cover
- np.ndarray.__setstate__(self, state)
- def __add__(self, other):
- if isinstance(other, Index):
- return self.union(other)
- elif isinstance(other, (DateOffset, timedelta)):
- return self._add_delta(other)
- elif isinstance(other, np.timedelta64):
- return self._add_delta(other)
- elif com.is_integer(other):
- return self.shift(other)
- else: # pragma: no cover
- raise TypeError(other)
- def __sub__(self, other):
- if isinstance(other, Index):
- return self.diff(other)
- elif isinstance(other, (DateOffset, timedelta)):
- return self._add_delta(-other)
- elif isinstance(other, np.timedelta64):
- return self._add_delta(-other)
- elif com.is_integer(other):
- return self.shift(-other)
- else: # pragma: no cover
- raise TypeError(other)
- def _add_delta(self, delta):
- if isinstance(delta, (Tick, timedelta)):
- inc = offsets._delta_to_nanoseconds(delta)
- mask = self.asi8 == tslib.iNaT
- new_values = (self.asi8 + inc).view(_NS_DTYPE)
- new_values[mask] = tslib.iNaT
- new_values = new_values.view(_NS_DTYPE)
- elif isinstance(delta, np.timedelta64):
- new_values = self.to_series() + delta
- else:
- new_values = self.astype('O') + delta
- tz = 'UTC' if self.tz is not None else None
- result = DatetimeIndex(new_values, tz=tz, freq='infer')
- utc = _utc()
- if self.tz is not None and self.tz is not utc:
- result = result.tz_convert(self.tz)
- return result
- def __contains__(self, key):
- try:
- res = self.get_loc(key)
- return np.isscalar(res) or type(res) == slice
- except (KeyError, TypeError):
- return False
- def _format_with_header(self, header, **kwargs):
- return header + self._format_native_types(**kwargs)
- def _format_native_types(self, na_rep=u('NaT'),
- date_format=None, **kwargs):
- data = self.asobject
- from pandas.core.format import Datetime64Formatter
- return Datetime64Formatter(values=data,
- nat_rep=na_rep,
- date_format=date_format,
- justify='all').get_result()
- def isin(self, values):
- """
- Compute boolean array of whether each index value is found in the
- passed set of values
- Parameters
- ----------
- values : set or sequence of values
- Returns
- -------
- is_contained : ndarray (boolean dtype)
- """
- if not isinstance(values, DatetimeIndex):
- try:
- values = DatetimeIndex(values)
- except ValueError:
- return self.asobject.isin(values)
- value_set = set(values.asi8)
- return lib.ismember(self.asi8, value_set)
- def to_datetime(self, dayfirst=False):
- return self.copy()
- def groupby(self, f):
- objs = self.asobject
- return _algos.groupby_object(objs, f)
- def summary(self, name=None):
- if len(self) > 0:
- index_summary = ', %s to %s' % (com.pprint_thing(self[0]),
- com.pprint_thing(self[-1]))
- else:
- index_summary = ''
- if name is None:
- name = type(self).__name__
- result = '%s: %s entries%s' % (com.pprint_thing(name),
- len(self), index_summary)
- if self.freq:
- result += '\nFreq: %s' % self.freqstr
- return result
- def get_duplicates(self):
- values = Index.get_duplicates(self)
- return DatetimeIndex(values)
- def astype(self, dtype):
- dtype = np.dtype(dtype)
- if dtype == np.object_:
- return self.asobject
- elif dtype == _INT64_DTYPE:
- return self.asi8.copy()
- else: # pragma: no cover
- raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
- def _get_time_micros(self):
- utc = _utc()
- values = self.asi8
- if self.tz is not None and self.tz is not utc:
- values = self._local_timestamps()
- return tslib.get_time_micros(values)
- def to_series(self, keep_tz=False):
- """
- Create a Series with both index and values equal to the index keys
- useful with map for returning an indexer based on an index
- Parameters
- ----------
- keep_tz : optional, defaults False.
- return the data keeping the timezone.
- If keep_tz is True:
- If the timezone is not set or is UTC, the resulting
- Series will have a datetime64[ns] dtype.
- Otherwise the Series will have an object dtype.
- If keep_tz is False:
- Series will have a datetime64[ns] dtype.
- Returns
- -------
- Series
- """
- return super(DatetimeIndex, self).to_series(keep_tz=keep_tz)
- def _to_embed(self, keep_tz=False):
- """ return an array repr of this object, potentially casting to object """
- if keep_tz and self.tz is not None and str(self.tz) != 'UTC':
- return self.asobject.values
- return self.values
- def to_pydatetime(self):
- """
- Return DatetimeIndex as object ndarray of datetime.datetime objects
- Returns
- -------
- datetimes : ndarray
- """
- return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)
- def to_period(self, freq=None):
- """
- Cast to PeriodIndex at a particular frequency
- """
- from pandas.tseries.period import PeriodIndex
- if freq is None:
- freq = self.freqstr or self.inferred_freq
- if freq is None:
- msg = "You must pass a freq argument as current index has none."
- raise ValueError(msg)
- freq = get_period_alias(freq)
- return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz)
- def order(self, return_indexer=False, ascending=True):
- """
- Return sorted copy of Index
- """
- if return_indexer:
- _as = self.argsort()
- if not ascending:
- _as = _as[::-1]
- sorted_index = self.take(_as)
- return sorted_index, _as
- else:
- sorted_values = np.sort(self.values)
- if not ascending:
- sorted_values = sorted_values[::-1]
- return self._simple_new(sorted_values, self.name, None,
- self.tz)
- def snap(self, freq='S'):
- """
- Snap time stamps to nearest occurring frequency
- """
- # Superdumb, punting on any optimizing
- freq = to_offset(freq)
- snapped = np.empty(len(self), dtype=_NS_DTYPE)
- for i, v in enumerate(self):
- s = v
- if not freq.onOffset(s):
- t0 = freq.rollback(s)
- t1 = freq.rollforward(s)
- if abs(s - t0) < abs(t1 - s):
- s = t0
- else:
- s = t1
- snapped[i] = s
- # we know it conforms; skip check
- return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
- def shift(self, n, freq=None):
- """
- Specialized shift which produces a DatetimeIndex
- Parameters
- ----------
- n : int
- Periods to shift by
- freq : DateOffset or timedelta-like, optional
- Returns
- -------
- shifted : DatetimeIndex
- """
- if freq is not None and freq != self.offset:
- if isinstance(freq, compat.string_types):
- freq = to_offset(freq)
- result = Index.shift(self, n, freq)
- result.tz = self.tz
- return result
- if n == 0:
- # immutable so OK
- return self
- if self.offset is None:
- raise ValueError("Cannot shift with no offset")
- start = self[0] + n * self.offset
- end = self[-1] + n * self.offset
- return DatetimeIndex(start=start, end=end, freq=self.offset,
- name=self.name, tz=self.tz)
- def repeat(self, repeats, axis=None):
- """
- Analogous to ndarray.repeat
- """
- return DatetimeIndex(self.values.repeat(repeats),
- name=self.name)
- def take(self, indices, axis=0):
- """
- Analogous to ndarray.take
- """
- maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices))
- if isinstance(maybe_slice, slice):
- return self[maybe_slice]
- return super(DatetimeIndex, self).take(indices, axis)
- def unique(self):
- """
- Index.unique with handling for DatetimeIndex metadata
- Returns
- -------
- result : DatetimeIndex
- """
- result = Int64Index.unique(self)
- return DatetimeIndex._simple_new(result, tz=self.tz,
- name=self.name)
- def union(self, other):
- """
- Specialized union for DatetimeIndex objects. If combine
- overlapping ranges with the same DateOffset, will be much
- faster than Index.union
- Parameters
- ----------
- other : DatetimeIndex or array-like
- Returns
- -------
- y : Index or DatetimeIndex
- """
- if not isinstance(other, DatetimeIndex):
- try:
- other = DatetimeIndex(other)
- except TypeError:
- pass
- this, other = self._maybe_utc_convert(other)
- if this._can_fast_union(other):
- return this._fast_union(other)
- else:
- result = Index.union(this, other)
- if isinstance(result, DatetimeIndex):
- result.tz = this.tz
- if result.freq is None:
- result.offset = to_offset(result.inferred_freq)
- return result
- def union_many(self, others):
- """
- A bit of a hack to accelerate unioning a collection of indexes
- """
- this = self
- for other in others:
- if not isinstance(this, DatetimeIndex):
- this = Index.union(this, other)
- continue
- if not isinstance(other, DatetimeIndex):
- try:
- other = DatetimeIndex(other)
- except TypeError:
- pass
- this, other = this._maybe_utc_convert(other)
- if this._can_fast_union(other):
- this = this._fast_union(other)
- else:
- tz = this.tz
- this = Index.union(this, other)
- if isinstance(this, DatetimeIndex):
- this.tz = tz
- if this.freq is None:
- this.offset = to_offset(this.inferred_freq)
- return this
- def append(self, other):
- """
- Append a collection of Index options together
- Parameters
- ----------
- other : Index or list/tuple of indices
- Returns
- -------
- appended : Index
- """
- name = self.name
- to_concat = [self]
- if isinstance(other, (list, tuple)):
- to_concat = to_concat + list(other)
- else:
- to_concat.append(other)
- for obj in to_concat:
- if isinstance(obj, Index) and obj.name != name:
- name = None
- break
- to_concat = self._ensure_compat_concat(to_concat)
- to_concat, factory = _process_concat_data(to_concat, name)
- return factory(to_concat)
- def join(self, other, how='left', level=None, return_indexers=False):
- """
- See Index.join
- """
- if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
- other.inferred_type not in ('floating', 'mixed-integer',
- 'mixed-integer-float', 'mixed')):
- try:
- other = DatetimeIndex(other)
- except (TypeError, ValueError):
- pass
- this, other = self._maybe_utc_convert(other)
- return Index.join(this, other, how=how, level=level,
- return_indexers=return_indexers)
- def _maybe_utc_convert(self, other):
- this = self
- if isinstance(other, DatetimeIndex):
- if self.tz is not None:
- if other.tz is None:
- raise TypeError('Cannot join tz-naive with tz-aware '
- 'DatetimeIndex')
- elif other.tz is not None:
- raise TypeError('Cannot join tz-naive with tz-aware '
- 'DatetimeIndex')
- if self.tz != other.tz:
- this = self.tz_convert('UTC')
- other = other.tz_convert('UTC')
- return this, other
- def _wrap_joined_index(self, joined, other):
- name = self.name if self.name == other.name else None
- if (isinstance(other, DatetimeIndex)
- and self.offset == other.offset
- and self._can_fast_union(other)):
- joined = self._view_like(joined)
- joined.name = name
- return joined
- else:
- tz = getattr(other, 'tz', None)
- return self._simple_new(joined, name, tz=tz)
- def _can_fast_union(self, other):
- if not isinstance(other, DatetimeIndex):
- return False
- offset = self.offset
- if offset is None or offset != other.offset:
- return False
- if not self.is_monotonic or not other.is_monotonic:
- return False
- if len(self) == 0 or len(other) == 0:
- return True
- # to make our life easier, "sort" the two ranges
- if self[0] <= other[0]:
- left, right = self, other
- else:
- left, right = other, self
- right_start = right[0]
- left_end = left[-1]
- # Only need to "adjoin", not overlap
- try:
- return (right_start == left_end + offset) or right_start in left
- except (ValueError):
- # if we are comparing an offset that does not propogate timezones
- # this will raise
- return False
- def _fast_union(self, other):
- if len(other) == 0:
- return self.view(type(self))
- if len(self) == 0:
- return other.view(type(self))
- # to make our life easier, "sort" the two ranges
- if self[0] <= other[0]:
- left, right = self, other
- else:
- left, right = other, self
- left_start, left_end = left[0], left[-1]
- right_end = right[-1]
- if not self.offset._should_cache():
- # concatenate dates
- if left_end < right_end:
- loc = right.searchsorted(left_end, side='right')
- right_chunk = right.values[loc:]
- dates = com._concat_compat((left.values, right_chunk))
- return self._view_like(dates)
- else:
- return left
- else:
- return type(self)(start=left_start,
- end=max(left_end, right_end),
- freq=left.offset)
- def __array_finalize__(self, obj):
- if self.ndim == 0: # pragma: no cover
- return self.item()
- self.offset = getattr(obj, 'offset', None)
- self.tz = getattr(obj, 'tz', None)
- self.name = getattr(obj, 'name', None)
- self._reset_identity()
- def _wrap_union_result(self, other, result):
- name = self.name if self.name == other.name else None
- if self.tz != other.tz:
- raise ValueError('Passed item and index have different timezone')
- return self._simple_new(result, name=name, freq=None, tz=self.tz)
- def intersection(self, other):
- """
- Specialized intersection for DatetimeIndex objects. May be much faster
- than Index.intersection
- Parameters
- ----------
- other : DatetimeIndex or array-like
- Returns
- -------
- y : Index or DatetimeIndex
- """
- if not isinstance(other, DatetimeIndex):
- try:
- other = DatetimeIndex(other)
- except (TypeError, ValueError):
- pass
- result = Index.intersection(self, other)
- if isinstance(result, DatetimeIndex):
- if result.freq is None:
- result.offset = to_offset(result.inferred_freq)
- return result
- elif (other.offset is None or self.offset is None or
- other.offset != self.offset or
- not other.offset.isAnchored() or
- (not self.is_monotonic or not other.is_monotonic)):
- result = Index.intersection(self, other)
- if isinstance(result, DatetimeIndex):
- if result.freq is None:
- result.offset = to_offset(result.inferred_freq)
- return result
- if len(self) == 0:
- return self
- if len(other) == 0:
- return other
- # to make our life easier, "sort" the two ranges
- if self[0] <= other[0]:
- left, right = self, other
- else:
- left, right = other, self
- end = min(left[-1], right[-1])
- start = right[0]
- if end < start:
- return type(self)(data=[])
- else:
- lslice = slice(*left.slice_locs(start, end))
- left_chunk = left.values[lslice]
- return self._view_like(left_chunk)
- def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
- is_monotonic = self.is_monotonic
- if reso == 'year':
- t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz)
- t2 = Timestamp(datetime(parsed.year, 12, 31, 23, 59, 59, 999999), tz=self.tz)
- elif reso == 'month':
- d = tslib.monthrange(parsed.year, parsed.month)[1]
- t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
- t2 = Timestamp(datetime(parsed.year, parsed.month, d, 23, 59, 59, 999999), tz=self.tz)
- elif reso == 'quarter':
- qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
- d = tslib.monthrange(parsed.year, qe)[1] # at end of month
- t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
- t2 = Timestamp(datetime(parsed.year, qe, d, 23, 59, 59, 999999), tz=self.tz)
- elif (reso == 'day' and (self._resolution < Resolution.RESO_DAY or not is_monotonic)):
- st = datetime(parsed.year, parsed.month, parsed.day)
- t1 = Timestamp(st, tz=self.tz)
- t2 = st + offsets.Day()
- t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1)
- elif (reso == 'hour' and (
- self._resolution < Resolution.RESO_HR or not is_monotonic)):
- st = datetime(parsed.year, parsed.month, parsed.day,
- hour=parsed.hour)
- t1 = Timestamp(st, tz=self.tz)
- t2 = Timestamp(Timestamp(st + offsets.Hour(),
- tz=self.tz).value - 1)
- elif (reso == 'minute' and (
- self._resolution < Resolution.RESO_MIN or not is_monotonic)):
- st = datetime(parsed.year, parsed.month, parsed.day,
- hour=parsed.hour, minute=parsed.minute)
- t1 = Timestamp(st, tz=self.tz)
- t2 = Timestamp(Timestamp(st + offsets.Minute(),
- tz=self.tz).value - 1)
- elif (reso == 'second' and (
- self._resolution == Resolution.RESO_SEC or not is_monotonic)):
- st = datetime(parsed.year, parsed.month, parsed.day,
- hour=parsed.hour, minute=parsed.minute, second=parsed.second)
- t1 = Timestamp(st, tz=self.tz)
- t2 = Timestamp(Timestamp(st + offsets.Second(),
- tz=self.tz).value - 1)
- else:
- raise KeyError
- stamps = self.asi8
- if is_monotonic:
- # we are out of range
- if len(stamps) and (
- (use_lhs and t1.value < stamps[0] and t2.value < stamps[0]) or (
- (use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1]))):
- raise KeyError
- # a monotonic (sorted) series can be sliced
- left = stamps.searchsorted(t1.value, side='left') if use_lhs else None
- right = stamps.searchsorted(t2.value, side='right') if use_rhs else None
- return slice(left, right)
- lhs_mask = (stamps >= t1.value) if use_lhs else True
- rhs_mask = (stamps <= t2.value) if use_rhs else True
- # try to find a the dates
- return (lhs_mask & rhs_mask).nonzero()[0]
- def _possibly_promote(self, other):
- if other.inferred_type == 'date':
- other = DatetimeIndex(other)
- return self, other
- def get_value(self, series, key):
- """
- Fast lookup of value from 1-dimensional ndarray. Only use this if you
- know what you're doing
- """
- if isinstance(key, datetime):
- # needed to localize naive datetimes
- if self.tz is not None:
- key = Timestamp(key, tz=self.tz)
- return self.get_value_maybe_box(series, key)
- try:
- return _maybe_box(self, Index.get_value(self, series, key), series, key)
- except KeyError:
- try:
- loc = self._get_string_slice(key)
- return series[loc]
- except (TypeError, ValueError, KeyError):
- pass
- if isinstance(key, time):
- locs = self.indexer_at_time(key)
- return series.take(locs)
- try:
- return self.get_value_maybe_box(series, key)
- except (TypeError, ValueError, KeyError):
- raise KeyError(key)
- def get_value_maybe_box(self, series, key):
- # needed to localize naive datetimes
- if self.tz is not None:
- key = Timestamp(key, tz=self.tz)
- elif not isinstance(key, Timestamp):
- key = Timestamp(key)
- values = self._engine.get_value(_values_from_object(series), key)
- return _maybe_box(self, values, series, key)
- def get_loc(self, key):
- """
- Get integer location for requested label
- Returns
- -------
- loc : int
- """
- if isinstance(key, datetime):
- # needed to localize naive datetimes
- stamp = Timestamp(key, tz=self.tz)
- return self._engine.get_loc(stamp)
- try:
- return Index.get_loc(self, key)
- except (KeyError, ValueError):
- try:
- return self._get_string_slice(key)
- except (TypeError, KeyError, ValueError):
- pass
- if isinstance(key, time):
- return self.indexer_at_time(key)
- try:
- stamp = Timestamp(key, tz=self.tz)
- return self._engine.get_loc(stamp)
- except (KeyError, ValueError):
- raise KeyError(key)
- def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
- freq = getattr(self, 'freqstr',
- getattr(self, 'inferred_freq', None))
- _, parsed, reso = parse_time_string(key, freq)
- loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs,
- use_rhs=use_rhs)
- return loc
- def slice_indexer(self, start=None, end=None, step=None):
- """
- Index.slice_indexer, customized to handle time slicing
- """
- if isinstance(start, time) and isinstance(end, time):
- if step is not None and step != 1:
- raise ValueError('Must have step size of 1 with time slices')
- return self.indexer_between_time(start, end)
- if isinstance(start, time) or isinstance(end, time):
- raise KeyError('Cannot mix time and non-time slice keys')
- if isinstance(start, float) or isinstance(end, float):
- raise TypeError('Cannot index datetime64 with float keys')
- return Index.slice_indexer(self, start, end, step)
- def slice_locs(self, start=None, end=None):
- """
- Index.slice_locs, customized to handle partial ISO-8601 string slicing
- """
- if isinstance(start, compat.string_types) or isinstance(end, compat.string_types):
- if self.is_monotonic:
- try:
- if start:
- start_loc = self._get_string_slice(start).start
- else:
- start_loc = 0
- if end:
- end_loc = self._get_string_slice(end).stop
- else:
- end_loc = len(self)
- return start_loc, end_loc
- except KeyError:
- pass
- else:
- # can't use a slice indexer because we are not sorted!
- # so create an indexer directly
- try:
- if start:
- start_loc = self._get_string_slice(start,
- use_rhs=False)
- else:
- start_loc = np.arange(len(self))
- if end:
- end_loc = self._get_string_slice(end, use_lhs=False)
- else:
- end_loc = np.arange(len(self))
- return start_loc, end_loc
- except KeyError:
- pass
- if isinstance(start, time) or isinstance(end, time):
- raise KeyError('Cannot use slice_locs with time slice keys')
- return Index.slice_locs(self, start, end)
- def __getitem__(self, key):
- """Override numpy.ndarray's __getitem__ method to work as desired"""
- arr_idx = self.view(np.ndarray)
- if np.isscalar(key):
- val = arr_idx[key]
- return Timestamp(val, offset=self.offset, tz=self.tz)
- else:
- if com._is_bool_indexer(key):
- key = np.asarray(key)
- if key.all():
- key = slice(0,None,None)
- else:
- key = lib.maybe_booleans_to_slice(key.view(np.uint8))
- new_offset = None
- if isinstance(key, slice):
- if self.offset is not None and key.step is not None:
- new_offset = key.step * self.offset
- else:
- new_offset = self.offset
- result = arr_idx[key]
- if result.ndim > 1:
- return result
- return self._simple_new(result, self.name, new_offset, self.tz)
- # Try to run function on index first, and then on elements of index
- # Especially important for group-by functionality
- def map(self, f):
- try:
- result = f(self)
- if not isinstance(result, np.ndarray):
- raise TypeError
- return result
- except Exception:
- return _algos.arrmap_object(self.asobject, f)
- # alias to offset
- @property
- def freq(self):
- """ return the frequency object if its set, otherwise None """
- return self.offset
- @cache_readonly
- def inferred_freq(self):
- try:
- return infer_freq(self)
- except ValueError:
- return None
- @property
- def freqstr(self):
- """ return the frequency object as a string if its set, otherwise None """
- if self.freq is None:
- return None
- return self.offset.freqstr
- _year = _field_accessor('year', 'Y')
- _month = _field_accessor('month', 'M', "The month as January=1, December=12")
- _day = _field_accessor('day', 'D')
- _hour = _field_accessor('hour', 'h')
- _minute = _field_accessor('minute', 'm')
- _second = _field_accessor('second', 's')
- _microsecond = _field_accessor('microsecond', 'us')
- _nanosecond = _field_accessor('nanosecond', 'ns')
- _weekofyear = _field_accessor('weekofyear', 'woy')
- _week = _weekofyear
- _dayofweek = _field_accessor('dayofweek', 'dow',
- "The day of the week with Monday=0, Sunday=6")
- _weekday = _dayofweek
- _dayofyear = _field_accessor('dayofyear', 'doy')
- _quarter = _field_accessor('quarter', 'q')
- _is_…
Large files files are truncated, but you can click here to view the full file