PageRenderTime 68ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tseries/index.py

http://github.com/pydata/pandas
Python | 2073 lines | 1827 code | 122 blank | 124 comment | 195 complexity | dd7e81ae1a7f2d2209998964c8866010 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # pylint: disable=E1101
  2. import operator
  3. from datetime import time, datetime
  4. from datetime import timedelta
  5. import numpy as np
  6. from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
  7. _values_from_object, _maybe_box,
  8. ABCSeries)
  9. from pandas.core.index import Index, Int64Index, Float64Index
  10. import pandas.compat as compat
  11. from pandas.compat import u
  12. from pandas.tseries.frequencies import (
  13. infer_freq, to_offset, get_period_alias,
  14. Resolution, get_reso_string, _tz_convert_with_transitions)
  15. from pandas.core.base import DatetimeIndexOpsMixin
  16. from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
  17. from pandas.tseries.tools import parse_time_string, normalize_date
  18. from pandas.util.decorators import cache_readonly
  19. import pandas.core.common as com
  20. import pandas.tseries.offsets as offsets
  21. import pandas.tseries.tools as tools
  22. from pandas.lib import Timestamp
  23. import pandas.lib as lib
  24. import pandas.tslib as tslib
  25. import pandas.algos as _algos
  26. import pandas.index as _index
  27. def _utc():
  28. import pytz
  29. return pytz.utc
  30. # -------- some conversion wrapper functions
  31. def _field_accessor(name, field, docstring=None):
  32. def f(self):
  33. values = self.asi8
  34. if self.tz is not None:
  35. utc = _utc()
  36. if self.tz is not utc:
  37. values = self._local_timestamps()
  38. if field in ['is_month_start', 'is_month_end',
  39. 'is_quarter_start', 'is_quarter_end',
  40. 'is_year_start', 'is_year_end']:
  41. month_kw = self.freq.kwds.get('startingMonth', self.freq.kwds.get('month', 12)) if self.freq else 12
  42. return tslib.get_start_end_field(values, field, self.freqstr, month_kw)
  43. else:
  44. return tslib.get_date_field(values, field)
  45. f.__name__ = name
  46. f.__doc__ = docstring
  47. return property(f)
  48. def _join_i8_wrapper(joinf, with_indexers=True):
  49. @staticmethod
  50. def wrapper(left, right):
  51. if isinstance(left, (np.ndarray, ABCSeries)):
  52. left = left.view('i8', type=np.ndarray)
  53. if isinstance(right, (np.ndarray, ABCSeries)):
  54. right = right.view('i8', type=np.ndarray)
  55. results = joinf(left, right)
  56. if with_indexers:
  57. join_index, left_indexer, right_indexer = results
  58. join_index = join_index.view('M8[ns]')
  59. return join_index, left_indexer, right_indexer
  60. return results
  61. return wrapper
  62. def _dt_index_cmp(opname, nat_result=False):
  63. """
  64. Wrap comparison operations to convert datetime-like to datetime64
  65. """
  66. def wrapper(self, other):
  67. func = getattr(super(DatetimeIndex, self), opname)
  68. if isinstance(other, datetime) or isinstance(other, compat.string_types):
  69. other = _to_m8(other, tz=self.tz)
  70. result = func(other)
  71. if com.isnull(other):
  72. result.fill(nat_result)
  73. else:
  74. if isinstance(other, list):
  75. other = DatetimeIndex(other)
  76. elif not isinstance(other, (np.ndarray, ABCSeries)):
  77. other = _ensure_datetime64(other)
  78. result = func(other)
  79. if isinstance(other, Index):
  80. o_mask = other.values.view('i8') == tslib.iNaT
  81. else:
  82. o_mask = other.view('i8') == tslib.iNaT
  83. if o_mask.any():
  84. result[o_mask] = nat_result
  85. mask = self.asi8 == tslib.iNaT
  86. if mask.any():
  87. result[mask] = nat_result
  88. return result.view(np.ndarray)
  89. return wrapper
  90. def _ensure_datetime64(other):
  91. if isinstance(other, np.datetime64):
  92. return other
  93. raise TypeError('%s type object %s' % (type(other), str(other)))
  94. _midnight = time(0, 0)
  95. class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index):
  96. """
  97. Immutable ndarray of datetime64 data, represented internally as int64, and
  98. which can be boxed to Timestamp objects that are subclasses of datetime and
  99. carry metadata such as frequency information.
  100. Parameters
  101. ----------
  102. data : array-like (1-dimensional), optional
  103. Optional datetime-like data to construct index with
  104. copy : bool
  105. Make a copy of input ndarray
  106. freq : string or pandas offset object, optional
  107. One of pandas date offset strings or corresponding objects
  108. start : starting value, datetime-like, optional
  109. If data is None, start is used as the start point in generating regular
  110. timestamp data.
  111. periods : int, optional, > 0
  112. Number of periods to generate, if generating index. Takes precedence
  113. over end argument
  114. end : end time, datetime-like, optional
  115. If periods is none, generated index will extend to first conforming
  116. time on or just past end argument
  117. closed : string or None, default None
  118. Make the interval closed with respect to the given frequency to
  119. the 'left', 'right', or both sides (None)
  120. name : object
  121. Name to be stored in the index
  122. """
  123. _join_precedence = 10
  124. _inner_indexer = _join_i8_wrapper(_algos.inner_join_indexer_int64)
  125. _outer_indexer = _join_i8_wrapper(_algos.outer_join_indexer_int64)
  126. _left_indexer = _join_i8_wrapper(_algos.left_join_indexer_int64)
  127. _left_indexer_unique = _join_i8_wrapper(
  128. _algos.left_join_indexer_unique_int64, with_indexers=False)
  129. _arrmap = None
  130. __eq__ = _dt_index_cmp('__eq__')
  131. __ne__ = _dt_index_cmp('__ne__', nat_result=True)
  132. __lt__ = _dt_index_cmp('__lt__')
  133. __gt__ = _dt_index_cmp('__gt__')
  134. __le__ = _dt_index_cmp('__le__')
  135. __ge__ = _dt_index_cmp('__ge__')
  136. # structured array cache for datetime fields
  137. _sarr_cache = None
  138. _engine_type = _index.DatetimeEngine
  139. tz = None
  140. offset = None
  141. _comparables = ['name','freqstr','tz']
  142. _allow_datetime_index_ops = True
  143. def __new__(cls, data=None,
  144. freq=None, start=None, end=None, periods=None,
  145. copy=False, name=None, tz=None,
  146. verify_integrity=True, normalize=False,
  147. closed=None, **kwds):
  148. dayfirst = kwds.pop('dayfirst', None)
  149. yearfirst = kwds.pop('yearfirst', None)
  150. infer_dst = kwds.pop('infer_dst', False)
  151. freq_infer = False
  152. if not isinstance(freq, DateOffset):
  153. # if a passed freq is None, don't infer automatically
  154. if freq != 'infer':
  155. freq = to_offset(freq)
  156. else:
  157. freq_infer = True
  158. freq = None
  159. if periods is not None:
  160. if com.is_float(periods):
  161. periods = int(periods)
  162. elif not com.is_integer(periods):
  163. raise ValueError('Periods must be a number, got %s' %
  164. str(periods))
  165. if data is None and freq is None:
  166. raise ValueError("Must provide freq argument if no data is "
  167. "supplied")
  168. if data is None:
  169. return cls._generate(start, end, periods, name, freq,
  170. tz=tz, normalize=normalize, closed=closed,
  171. infer_dst=infer_dst)
  172. if not isinstance(data, (np.ndarray, ABCSeries)):
  173. if np.isscalar(data):
  174. raise ValueError('DatetimeIndex() must be called with a '
  175. 'collection of some kind, %s was passed'
  176. % repr(data))
  177. # other iterable of some kind
  178. if not isinstance(data, (list, tuple)):
  179. data = list(data)
  180. data = np.asarray(data, dtype='O')
  181. # try a few ways to make it datetime64
  182. if lib.is_string_array(data):
  183. data = _str_to_dt_array(data, freq, dayfirst=dayfirst,
  184. yearfirst=yearfirst)
  185. else:
  186. data = tools.to_datetime(data, errors='raise')
  187. data.offset = freq
  188. if isinstance(data, DatetimeIndex):
  189. if name is not None:
  190. data.name = name
  191. if tz is not None:
  192. return data.tz_localize(tz, infer_dst=infer_dst)
  193. return data
  194. if issubclass(data.dtype.type, compat.string_types):
  195. data = _str_to_dt_array(data, freq, dayfirst=dayfirst,
  196. yearfirst=yearfirst)
  197. if issubclass(data.dtype.type, np.datetime64):
  198. if isinstance(data, ABCSeries):
  199. data = data.values
  200. if isinstance(data, DatetimeIndex):
  201. if tz is None:
  202. tz = data.tz
  203. subarr = data.values
  204. if freq is None:
  205. freq = data.offset
  206. verify_integrity = False
  207. else:
  208. if data.dtype != _NS_DTYPE:
  209. subarr = tslib.cast_to_nanoseconds(data)
  210. else:
  211. subarr = data
  212. elif data.dtype == _INT64_DTYPE:
  213. if isinstance(data, Int64Index):
  214. raise TypeError('cannot convert Int64Index->DatetimeIndex')
  215. if copy:
  216. subarr = np.asarray(data, dtype=_NS_DTYPE)
  217. else:
  218. subarr = data.view(_NS_DTYPE)
  219. else:
  220. if isinstance(data, ABCSeries):
  221. values = data.values
  222. else:
  223. values = data
  224. if lib.is_string_array(values):
  225. subarr = _str_to_dt_array(values, freq, dayfirst=dayfirst,
  226. yearfirst=yearfirst)
  227. else:
  228. try:
  229. subarr = tools.to_datetime(data, box=False)
  230. # make sure that we have a index/ndarray like (and not a Series)
  231. if isinstance(subarr, ABCSeries):
  232. subarr = subarr.values
  233. except ValueError:
  234. # tz aware
  235. subarr = tools.to_datetime(data, box=False, utc=True)
  236. if not np.issubdtype(subarr.dtype, np.datetime64):
  237. raise ValueError('Unable to convert %s to datetime dtype'
  238. % str(data))
  239. if isinstance(subarr, DatetimeIndex):
  240. if tz is None:
  241. tz = subarr.tz
  242. else:
  243. if tz is not None:
  244. tz = tools._maybe_get_tz(tz)
  245. if (not isinstance(data, DatetimeIndex) or
  246. getattr(data, 'tz', None) is None):
  247. # Convert tz-naive to UTC
  248. ints = subarr.view('i8')
  249. subarr = tslib.tz_localize_to_utc(ints, tz,
  250. infer_dst=infer_dst)
  251. subarr = subarr.view(_NS_DTYPE)
  252. subarr = subarr.view(cls)
  253. subarr.name = name
  254. subarr.offset = freq
  255. subarr.tz = tz
  256. if verify_integrity and len(subarr) > 0:
  257. if freq is not None and not freq_infer:
  258. inferred = subarr.inferred_freq
  259. if inferred != freq.freqstr:
  260. on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz)
  261. if not np.array_equal(subarr.asi8, on_freq.asi8):
  262. raise ValueError('Inferred frequency {0} from passed dates does not'
  263. 'conform to passed frequency {1}'.format(inferred, freq.freqstr))
  264. if freq_infer:
  265. inferred = subarr.inferred_freq
  266. if inferred:
  267. subarr.offset = to_offset(inferred)
  268. return subarr
  269. @classmethod
  270. def _generate(cls, start, end, periods, name, offset,
  271. tz=None, normalize=False, infer_dst=False, closed=None):
  272. if com._count_not_none(start, end, periods) != 2:
  273. raise ValueError('Must specify two of start, end, or periods')
  274. _normalized = True
  275. if start is not None:
  276. start = Timestamp(start)
  277. if end is not None:
  278. end = Timestamp(end)
  279. left_closed = False
  280. right_closed = False
  281. if start is None and end is None:
  282. if closed is not None:
  283. raise ValueError("Closed has to be None if not both of start"
  284. "and end are defined")
  285. if closed is None:
  286. left_closed = True
  287. right_closed = True
  288. elif closed == "left":
  289. left_closed = True
  290. elif closed == "right":
  291. right_closed = True
  292. else:
  293. raise ValueError("Closed has to be either 'left', 'right' or None")
  294. try:
  295. inferred_tz = tools._infer_tzinfo(start, end)
  296. except:
  297. raise ValueError('Start and end cannot both be tz-aware with '
  298. 'different timezones')
  299. inferred_tz = tools._maybe_get_tz(inferred_tz)
  300. # these may need to be localized
  301. tz = tools._maybe_get_tz(tz, start or end)
  302. if tz is not None and inferred_tz is not None:
  303. if not inferred_tz == tz:
  304. raise AssertionError("Inferred time zone not equal to passed "
  305. "time zone")
  306. elif inferred_tz is not None:
  307. tz = inferred_tz
  308. if start is not None:
  309. if normalize:
  310. start = normalize_date(start)
  311. _normalized = True
  312. else:
  313. _normalized = _normalized and start.time() == _midnight
  314. if end is not None:
  315. if normalize:
  316. end = normalize_date(end)
  317. _normalized = True
  318. else:
  319. _normalized = _normalized and end.time() == _midnight
  320. if hasattr(offset, 'delta') and offset != offsets.Day():
  321. if inferred_tz is None and tz is not None:
  322. # naive dates
  323. if start is not None and start.tz is None:
  324. start = start.tz_localize(tz)
  325. if end is not None and end.tz is None:
  326. end = end.tz_localize(tz)
  327. if start and end:
  328. if start.tz is None and end.tz is not None:
  329. start = start.tz_localize(end.tz)
  330. if end.tz is None and start.tz is not None:
  331. end = end.tz_localize(start.tz)
  332. if _use_cached_range(offset, _normalized, start, end):
  333. index = cls._cached_range(start, end, periods=periods,
  334. offset=offset, name=name)
  335. else:
  336. index = _generate_regular_range(start, end, periods, offset)
  337. else:
  338. if inferred_tz is None and tz is not None:
  339. # naive dates
  340. if start is not None and start.tz is not None:
  341. start = start.replace(tzinfo=None)
  342. if end is not None and end.tz is not None:
  343. end = end.replace(tzinfo=None)
  344. if start and end:
  345. if start.tz is None and end.tz is not None:
  346. end = end.replace(tzinfo=None)
  347. if end.tz is None and start.tz is not None:
  348. start = start.replace(tzinfo=None)
  349. if _use_cached_range(offset, _normalized, start, end):
  350. index = cls._cached_range(start, end, periods=periods,
  351. offset=offset, name=name)
  352. else:
  353. index = _generate_regular_range(start, end, periods, offset)
  354. if tz is not None and getattr(index, 'tz', None) is None:
  355. index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz,
  356. infer_dst=infer_dst)
  357. index = index.view(_NS_DTYPE)
  358. index = index.view(cls)
  359. index.name = name
  360. index.offset = offset
  361. index.tz = tz
  362. if not left_closed:
  363. index = index[1:]
  364. if not right_closed:
  365. index = index[:-1]
  366. return index
  367. @property
  368. def _box_func(self):
  369. return lambda x: Timestamp(x, offset=self.offset, tz=self.tz)
  370. def _local_timestamps(self):
  371. utc = _utc()
  372. if self.is_monotonic:
  373. return tslib.tz_convert(self.asi8, utc, self.tz)
  374. else:
  375. values = self.asi8
  376. indexer = values.argsort()
  377. result = tslib.tz_convert(values.take(indexer), utc, self.tz)
  378. n = len(indexer)
  379. reverse = np.empty(n, dtype=np.int_)
  380. reverse.put(indexer, np.arange(n))
  381. return result.take(reverse)
  382. @classmethod
  383. def _simple_new(cls, values, name, freq=None, tz=None):
  384. if values.dtype != _NS_DTYPE:
  385. values = com._ensure_int64(values).view(_NS_DTYPE)
  386. result = values.view(cls)
  387. result.name = name
  388. result.offset = freq
  389. result.tz = tools._maybe_get_tz(tz)
  390. return result
  391. @property
  392. def tzinfo(self):
  393. """
  394. Alias for tz attribute
  395. """
  396. return self.tz
  397. @classmethod
  398. def _cached_range(cls, start=None, end=None, periods=None, offset=None,
  399. name=None):
  400. if start is None and end is None:
  401. # I somewhat believe this should never be raised externally and therefore
  402. # should be a `PandasError` but whatever...
  403. raise TypeError('Must specify either start or end.')
  404. if start is not None:
  405. start = Timestamp(start)
  406. if end is not None:
  407. end = Timestamp(end)
  408. if (start is None or end is None) and periods is None:
  409. raise TypeError('Must either specify period or provide both start and end.')
  410. if offset is None:
  411. # This can't happen with external-facing code, therefore PandasError
  412. raise TypeError('Must provide offset.')
  413. drc = _daterange_cache
  414. if offset not in _daterange_cache:
  415. xdr = generate_range(offset=offset, start=_CACHE_START,
  416. end=_CACHE_END)
  417. arr = tools.to_datetime(list(xdr), box=False)
  418. cachedRange = arr.view(DatetimeIndex)
  419. cachedRange.offset = offset
  420. cachedRange.tz = None
  421. cachedRange.name = None
  422. drc[offset] = cachedRange
  423. else:
  424. cachedRange = drc[offset]
  425. if start is None:
  426. if not isinstance(end, Timestamp):
  427. raise AssertionError('end must be an instance of Timestamp')
  428. end = offset.rollback(end)
  429. endLoc = cachedRange.get_loc(end) + 1
  430. startLoc = endLoc - periods
  431. elif end is None:
  432. if not isinstance(start, Timestamp):
  433. raise AssertionError('start must be an instance of Timestamp')
  434. start = offset.rollforward(start)
  435. startLoc = cachedRange.get_loc(start)
  436. endLoc = startLoc + periods
  437. else:
  438. if not offset.onOffset(start):
  439. start = offset.rollforward(start)
  440. if not offset.onOffset(end):
  441. end = offset.rollback(end)
  442. startLoc = cachedRange.get_loc(start)
  443. endLoc = cachedRange.get_loc(end) + 1
  444. indexSlice = cachedRange[startLoc:endLoc]
  445. indexSlice.name = name
  446. indexSlice.offset = offset
  447. return indexSlice
  448. def _mpl_repr(self):
  449. # how to represent ourselves to matplotlib
  450. return tslib.ints_to_pydatetime(self.asi8, self.tz)
  451. _na_value = tslib.NaT
  452. """The expected NA value to use with this index."""
  453. @cache_readonly
  454. def _is_dates_only(self):
  455. from pandas.core.format import _is_dates_only
  456. return _is_dates_only(self.values)
  457. @property
  458. def _formatter_func(self):
  459. from pandas.core.format import _get_format_datetime64
  460. formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
  461. return lambda x: formatter(x, tz=self.tz)
  462. def __reduce__(self):
  463. """Necessary for making this object picklable"""
  464. object_state = list(np.ndarray.__reduce__(self))
  465. subclass_state = self.name, self.offset, self.tz
  466. object_state[2] = (object_state[2], subclass_state)
  467. return tuple(object_state)
  468. def __setstate__(self, state):
  469. """Necessary for making this object picklable"""
  470. if len(state) == 2:
  471. nd_state, own_state = state
  472. self.name = own_state[0]
  473. self.offset = own_state[1]
  474. self.tz = own_state[2]
  475. np.ndarray.__setstate__(self, nd_state)
  476. # provide numpy < 1.7 compat
  477. if nd_state[2] == 'M8[us]':
  478. new_state = np.ndarray.__reduce__(self.values.astype('M8[ns]'))
  479. np.ndarray.__setstate__(self, new_state[2])
  480. else: # pragma: no cover
  481. np.ndarray.__setstate__(self, state)
  482. def __add__(self, other):
  483. if isinstance(other, Index):
  484. return self.union(other)
  485. elif isinstance(other, (DateOffset, timedelta)):
  486. return self._add_delta(other)
  487. elif isinstance(other, np.timedelta64):
  488. return self._add_delta(other)
  489. elif com.is_integer(other):
  490. return self.shift(other)
  491. else: # pragma: no cover
  492. raise TypeError(other)
  493. def __sub__(self, other):
  494. if isinstance(other, Index):
  495. return self.diff(other)
  496. elif isinstance(other, (DateOffset, timedelta)):
  497. return self._add_delta(-other)
  498. elif isinstance(other, np.timedelta64):
  499. return self._add_delta(-other)
  500. elif com.is_integer(other):
  501. return self.shift(-other)
  502. else: # pragma: no cover
  503. raise TypeError(other)
  504. def _add_delta(self, delta):
  505. if isinstance(delta, (Tick, timedelta)):
  506. inc = offsets._delta_to_nanoseconds(delta)
  507. mask = self.asi8 == tslib.iNaT
  508. new_values = (self.asi8 + inc).view(_NS_DTYPE)
  509. new_values[mask] = tslib.iNaT
  510. new_values = new_values.view(_NS_DTYPE)
  511. elif isinstance(delta, np.timedelta64):
  512. new_values = self.to_series() + delta
  513. else:
  514. new_values = self.astype('O') + delta
  515. tz = 'UTC' if self.tz is not None else None
  516. result = DatetimeIndex(new_values, tz=tz, freq='infer')
  517. utc = _utc()
  518. if self.tz is not None and self.tz is not utc:
  519. result = result.tz_convert(self.tz)
  520. return result
  521. def __contains__(self, key):
  522. try:
  523. res = self.get_loc(key)
  524. return np.isscalar(res) or type(res) == slice
  525. except (KeyError, TypeError):
  526. return False
  527. def _format_with_header(self, header, **kwargs):
  528. return header + self._format_native_types(**kwargs)
  529. def _format_native_types(self, na_rep=u('NaT'),
  530. date_format=None, **kwargs):
  531. data = self.asobject
  532. from pandas.core.format import Datetime64Formatter
  533. return Datetime64Formatter(values=data,
  534. nat_rep=na_rep,
  535. date_format=date_format,
  536. justify='all').get_result()
  537. def isin(self, values):
  538. """
  539. Compute boolean array of whether each index value is found in the
  540. passed set of values
  541. Parameters
  542. ----------
  543. values : set or sequence of values
  544. Returns
  545. -------
  546. is_contained : ndarray (boolean dtype)
  547. """
  548. if not isinstance(values, DatetimeIndex):
  549. try:
  550. values = DatetimeIndex(values)
  551. except ValueError:
  552. return self.asobject.isin(values)
  553. value_set = set(values.asi8)
  554. return lib.ismember(self.asi8, value_set)
  555. def to_datetime(self, dayfirst=False):
  556. return self.copy()
  557. def groupby(self, f):
  558. objs = self.asobject
  559. return _algos.groupby_object(objs, f)
  560. def summary(self, name=None):
  561. if len(self) > 0:
  562. index_summary = ', %s to %s' % (com.pprint_thing(self[0]),
  563. com.pprint_thing(self[-1]))
  564. else:
  565. index_summary = ''
  566. if name is None:
  567. name = type(self).__name__
  568. result = '%s: %s entries%s' % (com.pprint_thing(name),
  569. len(self), index_summary)
  570. if self.freq:
  571. result += '\nFreq: %s' % self.freqstr
  572. return result
  573. def get_duplicates(self):
  574. values = Index.get_duplicates(self)
  575. return DatetimeIndex(values)
  576. def astype(self, dtype):
  577. dtype = np.dtype(dtype)
  578. if dtype == np.object_:
  579. return self.asobject
  580. elif dtype == _INT64_DTYPE:
  581. return self.asi8.copy()
  582. else: # pragma: no cover
  583. raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
  584. def _get_time_micros(self):
  585. utc = _utc()
  586. values = self.asi8
  587. if self.tz is not None and self.tz is not utc:
  588. values = self._local_timestamps()
  589. return tslib.get_time_micros(values)
  590. def to_series(self, keep_tz=False):
  591. """
  592. Create a Series with both index and values equal to the index keys
  593. useful with map for returning an indexer based on an index
  594. Parameters
  595. ----------
  596. keep_tz : optional, defaults False.
  597. return the data keeping the timezone.
  598. If keep_tz is True:
  599. If the timezone is not set or is UTC, the resulting
  600. Series will have a datetime64[ns] dtype.
  601. Otherwise the Series will have an object dtype.
  602. If keep_tz is False:
  603. Series will have a datetime64[ns] dtype.
  604. Returns
  605. -------
  606. Series
  607. """
  608. return super(DatetimeIndex, self).to_series(keep_tz=keep_tz)
  609. def _to_embed(self, keep_tz=False):
  610. """ return an array repr of this object, potentially casting to object """
  611. if keep_tz and self.tz is not None and str(self.tz) != 'UTC':
  612. return self.asobject.values
  613. return self.values
  614. def to_pydatetime(self):
  615. """
  616. Return DatetimeIndex as object ndarray of datetime.datetime objects
  617. Returns
  618. -------
  619. datetimes : ndarray
  620. """
  621. return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)
  622. def to_period(self, freq=None):
  623. """
  624. Cast to PeriodIndex at a particular frequency
  625. """
  626. from pandas.tseries.period import PeriodIndex
  627. if freq is None:
  628. freq = self.freqstr or self.inferred_freq
  629. if freq is None:
  630. msg = "You must pass a freq argument as current index has none."
  631. raise ValueError(msg)
  632. freq = get_period_alias(freq)
  633. return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz)
  634. def order(self, return_indexer=False, ascending=True):
  635. """
  636. Return sorted copy of Index
  637. """
  638. if return_indexer:
  639. _as = self.argsort()
  640. if not ascending:
  641. _as = _as[::-1]
  642. sorted_index = self.take(_as)
  643. return sorted_index, _as
  644. else:
  645. sorted_values = np.sort(self.values)
  646. if not ascending:
  647. sorted_values = sorted_values[::-1]
  648. return self._simple_new(sorted_values, self.name, None,
  649. self.tz)
  650. def snap(self, freq='S'):
  651. """
  652. Snap time stamps to nearest occurring frequency
  653. """
  654. # Superdumb, punting on any optimizing
  655. freq = to_offset(freq)
  656. snapped = np.empty(len(self), dtype=_NS_DTYPE)
  657. for i, v in enumerate(self):
  658. s = v
  659. if not freq.onOffset(s):
  660. t0 = freq.rollback(s)
  661. t1 = freq.rollforward(s)
  662. if abs(s - t0) < abs(t1 - s):
  663. s = t0
  664. else:
  665. s = t1
  666. snapped[i] = s
  667. # we know it conforms; skip check
  668. return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
  669. def shift(self, n, freq=None):
  670. """
  671. Specialized shift which produces a DatetimeIndex
  672. Parameters
  673. ----------
  674. n : int
  675. Periods to shift by
  676. freq : DateOffset or timedelta-like, optional
  677. Returns
  678. -------
  679. shifted : DatetimeIndex
  680. """
  681. if freq is not None and freq != self.offset:
  682. if isinstance(freq, compat.string_types):
  683. freq = to_offset(freq)
  684. result = Index.shift(self, n, freq)
  685. result.tz = self.tz
  686. return result
  687. if n == 0:
  688. # immutable so OK
  689. return self
  690. if self.offset is None:
  691. raise ValueError("Cannot shift with no offset")
  692. start = self[0] + n * self.offset
  693. end = self[-1] + n * self.offset
  694. return DatetimeIndex(start=start, end=end, freq=self.offset,
  695. name=self.name, tz=self.tz)
  696. def repeat(self, repeats, axis=None):
  697. """
  698. Analogous to ndarray.repeat
  699. """
  700. return DatetimeIndex(self.values.repeat(repeats),
  701. name=self.name)
  702. def take(self, indices, axis=0):
  703. """
  704. Analogous to ndarray.take
  705. """
  706. maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices))
  707. if isinstance(maybe_slice, slice):
  708. return self[maybe_slice]
  709. return super(DatetimeIndex, self).take(indices, axis)
  710. def unique(self):
  711. """
  712. Index.unique with handling for DatetimeIndex metadata
  713. Returns
  714. -------
  715. result : DatetimeIndex
  716. """
  717. result = Int64Index.unique(self)
  718. return DatetimeIndex._simple_new(result, tz=self.tz,
  719. name=self.name)
  720. def union(self, other):
  721. """
  722. Specialized union for DatetimeIndex objects. If combine
  723. overlapping ranges with the same DateOffset, will be much
  724. faster than Index.union
  725. Parameters
  726. ----------
  727. other : DatetimeIndex or array-like
  728. Returns
  729. -------
  730. y : Index or DatetimeIndex
  731. """
  732. if not isinstance(other, DatetimeIndex):
  733. try:
  734. other = DatetimeIndex(other)
  735. except TypeError:
  736. pass
  737. this, other = self._maybe_utc_convert(other)
  738. if this._can_fast_union(other):
  739. return this._fast_union(other)
  740. else:
  741. result = Index.union(this, other)
  742. if isinstance(result, DatetimeIndex):
  743. result.tz = this.tz
  744. if result.freq is None:
  745. result.offset = to_offset(result.inferred_freq)
  746. return result
  747. def union_many(self, others):
  748. """
  749. A bit of a hack to accelerate unioning a collection of indexes
  750. """
  751. this = self
  752. for other in others:
  753. if not isinstance(this, DatetimeIndex):
  754. this = Index.union(this, other)
  755. continue
  756. if not isinstance(other, DatetimeIndex):
  757. try:
  758. other = DatetimeIndex(other)
  759. except TypeError:
  760. pass
  761. this, other = this._maybe_utc_convert(other)
  762. if this._can_fast_union(other):
  763. this = this._fast_union(other)
  764. else:
  765. tz = this.tz
  766. this = Index.union(this, other)
  767. if isinstance(this, DatetimeIndex):
  768. this.tz = tz
  769. if this.freq is None:
  770. this.offset = to_offset(this.inferred_freq)
  771. return this
  772. def append(self, other):
  773. """
  774. Append a collection of Index options together
  775. Parameters
  776. ----------
  777. other : Index or list/tuple of indices
  778. Returns
  779. -------
  780. appended : Index
  781. """
  782. name = self.name
  783. to_concat = [self]
  784. if isinstance(other, (list, tuple)):
  785. to_concat = to_concat + list(other)
  786. else:
  787. to_concat.append(other)
  788. for obj in to_concat:
  789. if isinstance(obj, Index) and obj.name != name:
  790. name = None
  791. break
  792. to_concat = self._ensure_compat_concat(to_concat)
  793. to_concat, factory = _process_concat_data(to_concat, name)
  794. return factory(to_concat)
  795. def join(self, other, how='left', level=None, return_indexers=False):
  796. """
  797. See Index.join
  798. """
  799. if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
  800. other.inferred_type not in ('floating', 'mixed-integer',
  801. 'mixed-integer-float', 'mixed')):
  802. try:
  803. other = DatetimeIndex(other)
  804. except (TypeError, ValueError):
  805. pass
  806. this, other = self._maybe_utc_convert(other)
  807. return Index.join(this, other, how=how, level=level,
  808. return_indexers=return_indexers)
  809. def _maybe_utc_convert(self, other):
  810. this = self
  811. if isinstance(other, DatetimeIndex):
  812. if self.tz is not None:
  813. if other.tz is None:
  814. raise TypeError('Cannot join tz-naive with tz-aware '
  815. 'DatetimeIndex')
  816. elif other.tz is not None:
  817. raise TypeError('Cannot join tz-naive with tz-aware '
  818. 'DatetimeIndex')
  819. if self.tz != other.tz:
  820. this = self.tz_convert('UTC')
  821. other = other.tz_convert('UTC')
  822. return this, other
  823. def _wrap_joined_index(self, joined, other):
  824. name = self.name if self.name == other.name else None
  825. if (isinstance(other, DatetimeIndex)
  826. and self.offset == other.offset
  827. and self._can_fast_union(other)):
  828. joined = self._view_like(joined)
  829. joined.name = name
  830. return joined
  831. else:
  832. tz = getattr(other, 'tz', None)
  833. return self._simple_new(joined, name, tz=tz)
  834. def _can_fast_union(self, other):
  835. if not isinstance(other, DatetimeIndex):
  836. return False
  837. offset = self.offset
  838. if offset is None or offset != other.offset:
  839. return False
  840. if not self.is_monotonic or not other.is_monotonic:
  841. return False
  842. if len(self) == 0 or len(other) == 0:
  843. return True
  844. # to make our life easier, "sort" the two ranges
  845. if self[0] <= other[0]:
  846. left, right = self, other
  847. else:
  848. left, right = other, self
  849. right_start = right[0]
  850. left_end = left[-1]
  851. # Only need to "adjoin", not overlap
  852. try:
  853. return (right_start == left_end + offset) or right_start in left
  854. except (ValueError):
  855. # if we are comparing an offset that does not propogate timezones
  856. # this will raise
  857. return False
  858. def _fast_union(self, other):
  859. if len(other) == 0:
  860. return self.view(type(self))
  861. if len(self) == 0:
  862. return other.view(type(self))
  863. # to make our life easier, "sort" the two ranges
  864. if self[0] <= other[0]:
  865. left, right = self, other
  866. else:
  867. left, right = other, self
  868. left_start, left_end = left[0], left[-1]
  869. right_end = right[-1]
  870. if not self.offset._should_cache():
  871. # concatenate dates
  872. if left_end < right_end:
  873. loc = right.searchsorted(left_end, side='right')
  874. right_chunk = right.values[loc:]
  875. dates = com._concat_compat((left.values, right_chunk))
  876. return self._view_like(dates)
  877. else:
  878. return left
  879. else:
  880. return type(self)(start=left_start,
  881. end=max(left_end, right_end),
  882. freq=left.offset)
  883. def __array_finalize__(self, obj):
  884. if self.ndim == 0: # pragma: no cover
  885. return self.item()
  886. self.offset = getattr(obj, 'offset', None)
  887. self.tz = getattr(obj, 'tz', None)
  888. self.name = getattr(obj, 'name', None)
  889. self._reset_identity()
  890. def _wrap_union_result(self, other, result):
  891. name = self.name if self.name == other.name else None
  892. if self.tz != other.tz:
  893. raise ValueError('Passed item and index have different timezone')
  894. return self._simple_new(result, name=name, freq=None, tz=self.tz)
  895. def intersection(self, other):
  896. """
  897. Specialized intersection for DatetimeIndex objects. May be much faster
  898. than Index.intersection
  899. Parameters
  900. ----------
  901. other : DatetimeIndex or array-like
  902. Returns
  903. -------
  904. y : Index or DatetimeIndex
  905. """
  906. if not isinstance(other, DatetimeIndex):
  907. try:
  908. other = DatetimeIndex(other)
  909. except (TypeError, ValueError):
  910. pass
  911. result = Index.intersection(self, other)
  912. if isinstance(result, DatetimeIndex):
  913. if result.freq is None:
  914. result.offset = to_offset(result.inferred_freq)
  915. return result
  916. elif (other.offset is None or self.offset is None or
  917. other.offset != self.offset or
  918. not other.offset.isAnchored() or
  919. (not self.is_monotonic or not other.is_monotonic)):
  920. result = Index.intersection(self, other)
  921. if isinstance(result, DatetimeIndex):
  922. if result.freq is None:
  923. result.offset = to_offset(result.inferred_freq)
  924. return result
  925. if len(self) == 0:
  926. return self
  927. if len(other) == 0:
  928. return other
  929. # to make our life easier, "sort" the two ranges
  930. if self[0] <= other[0]:
  931. left, right = self, other
  932. else:
  933. left, right = other, self
  934. end = min(left[-1], right[-1])
  935. start = right[0]
  936. if end < start:
  937. return type(self)(data=[])
  938. else:
  939. lslice = slice(*left.slice_locs(start, end))
  940. left_chunk = left.values[lslice]
  941. return self._view_like(left_chunk)
  942. def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
  943. is_monotonic = self.is_monotonic
  944. if reso == 'year':
  945. t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz)
  946. t2 = Timestamp(datetime(parsed.year, 12, 31, 23, 59, 59, 999999), tz=self.tz)
  947. elif reso == 'month':
  948. d = tslib.monthrange(parsed.year, parsed.month)[1]
  949. t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
  950. t2 = Timestamp(datetime(parsed.year, parsed.month, d, 23, 59, 59, 999999), tz=self.tz)
  951. elif reso == 'quarter':
  952. qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
  953. d = tslib.monthrange(parsed.year, qe)[1] # at end of month
  954. t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
  955. t2 = Timestamp(datetime(parsed.year, qe, d, 23, 59, 59, 999999), tz=self.tz)
  956. elif (reso == 'day' and (self._resolution < Resolution.RESO_DAY or not is_monotonic)):
  957. st = datetime(parsed.year, parsed.month, parsed.day)
  958. t1 = Timestamp(st, tz=self.tz)
  959. t2 = st + offsets.Day()
  960. t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1)
  961. elif (reso == 'hour' and (
  962. self._resolution < Resolution.RESO_HR or not is_monotonic)):
  963. st = datetime(parsed.year, parsed.month, parsed.day,
  964. hour=parsed.hour)
  965. t1 = Timestamp(st, tz=self.tz)
  966. t2 = Timestamp(Timestamp(st + offsets.Hour(),
  967. tz=self.tz).value - 1)
  968. elif (reso == 'minute' and (
  969. self._resolution < Resolution.RESO_MIN or not is_monotonic)):
  970. st = datetime(parsed.year, parsed.month, parsed.day,
  971. hour=parsed.hour, minute=parsed.minute)
  972. t1 = Timestamp(st, tz=self.tz)
  973. t2 = Timestamp(Timestamp(st + offsets.Minute(),
  974. tz=self.tz).value - 1)
  975. elif (reso == 'second' and (
  976. self._resolution == Resolution.RESO_SEC or not is_monotonic)):
  977. st = datetime(parsed.year, parsed.month, parsed.day,
  978. hour=parsed.hour, minute=parsed.minute, second=parsed.second)
  979. t1 = Timestamp(st, tz=self.tz)
  980. t2 = Timestamp(Timestamp(st + offsets.Second(),
  981. tz=self.tz).value - 1)
  982. else:
  983. raise KeyError
  984. stamps = self.asi8
  985. if is_monotonic:
  986. # we are out of range
  987. if len(stamps) and (
  988. (use_lhs and t1.value < stamps[0] and t2.value < stamps[0]) or (
  989. (use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1]))):
  990. raise KeyError
  991. # a monotonic (sorted) series can be sliced
  992. left = stamps.searchsorted(t1.value, side='left') if use_lhs else None
  993. right = stamps.searchsorted(t2.value, side='right') if use_rhs else None
  994. return slice(left, right)
  995. lhs_mask = (stamps >= t1.value) if use_lhs else True
  996. rhs_mask = (stamps <= t2.value) if use_rhs else True
  997. # try to find a the dates
  998. return (lhs_mask & rhs_mask).nonzero()[0]
  999. def _possibly_promote(self, other):
  1000. if other.inferred_type == 'date':
  1001. other = DatetimeIndex(other)
  1002. return self, other
  1003. def get_value(self, series, key):
  1004. """
  1005. Fast lookup of value from 1-dimensional ndarray. Only use this if you
  1006. know what you're doing
  1007. """
  1008. if isinstance(key, datetime):
  1009. # needed to localize naive datetimes
  1010. if self.tz is not None:
  1011. key = Timestamp(key, tz=self.tz)
  1012. return self.get_value_maybe_box(series, key)
  1013. try:
  1014. return _maybe_box(self, Index.get_value(self, series, key), series, key)
  1015. except KeyError:
  1016. try:
  1017. loc = self._get_string_slice(key)
  1018. return series[loc]
  1019. except (TypeError, ValueError, KeyError):
  1020. pass
  1021. if isinstance(key, time):
  1022. locs = self.indexer_at_time(key)
  1023. return series.take(locs)
  1024. try:
  1025. return self.get_value_maybe_box(series, key)
  1026. except (TypeError, ValueError, KeyError):
  1027. raise KeyError(key)
  1028. def get_value_maybe_box(self, series, key):
  1029. # needed to localize naive datetimes
  1030. if self.tz is not None:
  1031. key = Timestamp(key, tz=self.tz)
  1032. elif not isinstance(key, Timestamp):
  1033. key = Timestamp(key)
  1034. values = self._engine.get_value(_values_from_object(series), key)
  1035. return _maybe_box(self, values, series, key)
  1036. def get_loc(self, key):
  1037. """
  1038. Get integer location for requested label
  1039. Returns
  1040. -------
  1041. loc : int
  1042. """
  1043. if isinstance(key, datetime):
  1044. # needed to localize naive datetimes
  1045. stamp = Timestamp(key, tz=self.tz)
  1046. return self._engine.get_loc(stamp)
  1047. try:
  1048. return Index.get_loc(self, key)
  1049. except (KeyError, ValueError):
  1050. try:
  1051. return self._get_string_slice(key)
  1052. except (TypeError, KeyError, ValueError):
  1053. pass
  1054. if isinstance(key, time):
  1055. return self.indexer_at_time(key)
  1056. try:
  1057. stamp = Timestamp(key, tz=self.tz)
  1058. return self._engine.get_loc(stamp)
  1059. except (KeyError, ValueError):
  1060. raise KeyError(key)
  1061. def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
  1062. freq = getattr(self, 'freqstr',
  1063. getattr(self, 'inferred_freq', None))
  1064. _, parsed, reso = parse_time_string(key, freq)
  1065. loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs,
  1066. use_rhs=use_rhs)
  1067. return loc
  1068. def slice_indexer(self, start=None, end=None, step=None):
  1069. """
  1070. Index.slice_indexer, customized to handle time slicing
  1071. """
  1072. if isinstance(start, time) and isinstance(end, time):
  1073. if step is not None and step != 1:
  1074. raise ValueError('Must have step size of 1 with time slices')
  1075. return self.indexer_between_time(start, end)
  1076. if isinstance(start, time) or isinstance(end, time):
  1077. raise KeyError('Cannot mix time and non-time slice keys')
  1078. if isinstance(start, float) or isinstance(end, float):
  1079. raise TypeError('Cannot index datetime64 with float keys')
  1080. return Index.slice_indexer(self, start, end, step)
  1081. def slice_locs(self, start=None, end=None):
  1082. """
  1083. Index.slice_locs, customized to handle partial ISO-8601 string slicing
  1084. """
  1085. if isinstance(start, compat.string_types) or isinstance(end, compat.string_types):
  1086. if self.is_monotonic:
  1087. try:
  1088. if start:
  1089. start_loc = self._get_string_slice(start).start
  1090. else:
  1091. start_loc = 0
  1092. if end:
  1093. end_loc = self._get_string_slice(end).stop
  1094. else:
  1095. end_loc = len(self)
  1096. return start_loc, end_loc
  1097. except KeyError:
  1098. pass
  1099. else:
  1100. # can't use a slice indexer because we are not sorted!
  1101. # so create an indexer directly
  1102. try:
  1103. if start:
  1104. start_loc = self._get_string_slice(start,
  1105. use_rhs=False)
  1106. else:
  1107. start_loc = np.arange(len(self))
  1108. if end:
  1109. end_loc = self._get_string_slice(end, use_lhs=False)
  1110. else:
  1111. end_loc = np.arange(len(self))
  1112. return start_loc, end_loc
  1113. except KeyError:
  1114. pass
  1115. if isinstance(start, time) or isinstance(end, time):
  1116. raise KeyError('Cannot use slice_locs with time slice keys')
  1117. return Index.slice_locs(self, start, end)
  1118. def __getitem__(self, key):
  1119. """Override numpy.ndarray's __getitem__ method to work as desired"""
  1120. arr_idx = self.view(np.ndarray)
  1121. if np.isscalar(key):
  1122. val = arr_idx[key]
  1123. return Timestamp(val, offset=self.offset, tz=self.tz)
  1124. else:
  1125. if com._is_bool_indexer(key):
  1126. key = np.asarray(key)
  1127. if key.all():
  1128. key = slice(0,None,None)
  1129. else:
  1130. key = lib.maybe_booleans_to_slice(key.view(np.uint8))
  1131. new_offset = None
  1132. if isinstance(key, slice):
  1133. if self.offset is not None and key.step is not None:
  1134. new_offset = key.step * self.offset
  1135. else:
  1136. new_offset = self.offset
  1137. result = arr_idx[key]
  1138. if result.ndim > 1:
  1139. return result
  1140. return self._simple_new(result, self.name, new_offset, self.tz)
  1141. # Try to run function on index first, and then on elements of index
  1142. # Especially important for group-by functionality
  1143. def map(self, f):
  1144. try:
  1145. result = f(self)
  1146. if not isinstance(result, np.ndarray):
  1147. raise TypeError
  1148. return result
  1149. except Exception:
  1150. return _algos.arrmap_object(self.asobject, f)
  1151. # alias to offset
  1152. @property
  1153. def freq(self):
  1154. """ return the frequency object if its set, otherwise None """
  1155. return self.offset
  1156. @cache_readonly
  1157. def inferred_freq(self):
  1158. try:
  1159. return infer_freq(self)
  1160. except ValueError:
  1161. return None
  1162. @property
  1163. def freqstr(self):
  1164. """ return the frequency object as a string if its set, otherwise None """
  1165. if self.freq is None:
  1166. return None
  1167. return self.offset.freqstr
  1168. _year = _field_accessor('year', 'Y')
  1169. _month = _field_accessor('month', 'M', "The month as January=1, December=12")
  1170. _day = _field_accessor('day', 'D')
  1171. _hour = _field_accessor('hour', 'h')
  1172. _minute = _field_accessor('minute', 'm')
  1173. _second = _field_accessor('second', 's')
  1174. _microsecond = _field_accessor('microsecond', 'us')
  1175. _nanosecond = _field_accessor('nanosecond', 'ns')
  1176. _weekofyear = _field_accessor('weekofyear', 'woy')
  1177. _week = _weekofyear
  1178. _dayofweek = _field_accessor('dayofweek', 'dow',
  1179. "The day of the week with Monday=0, Sunday=6")
  1180. _weekday = _dayofweek
  1181. _dayofyear = _field_accessor('dayofyear', 'doy')
  1182. _quarter = _field_accessor('quarter', 'q')
  1183. _is_month_start = _field_accessor('is_month_start', 'is_month_start')
  1184. _is_month_end = _field_accessor('is_month_end', 'is_month_end')
  1185. _is_quarter_start = _field_accessor('is_quarter_start', 'is_quarter_start')
  1186. _is_quarter_end = _field_accessor('is_quarter_end', 'is_quarter_end')
  1187. _is_year_start = _field_accessor('is_year_start', 'is_year_start')
  1188. _is_year_end = _field_accessor('is_year_end', 'is_year_end')
  1189. @property
  1190. def _time(self):
  1191. """
  1192. Returns numpy array of datetime.time. The time part of the Timestamps.
  1193. """
  1194. # can't call self.map() which tries to treat func as ufunc
  1195. # and causes recursion warnings on python 2.6
  1196. return _algos.arrmap_object(self.asobject, lambda x: x.time())
  1197. @property
  1198. def _date(self):
  1199. """
  1200. Returns numpy array of datetime.date. The date part of the Timestamps.
  1201. """
  1202. return _algos.arrmap_object(self.asobject, lambda x: x.date())
  1203. def normalize(self):
  1204. """
  1205. Return DatetimeIndex with times to midnight. Length is unaltered
  1206. Returns
  1207. -------
  1208. normalized : DatetimeIndex
  1209. """
  1210. new_values = tslib.date_normalize(self.asi8, self.tz)
  1211. return DatetimeIndex(new_values, freq='infer', name=self.name,
  1212. tz=self.tz)
  1213. def __iter__(self):
  1214. return iter(self.asobject)
  1215. def searchsorted(self, key, side='left'):
  1216. if isinstance(key, np.ndarray):
  1217. key = np.array(key, dtype=_NS_DTYPE, copy=False)
  1218. else:
  1219. key = _to_m8(key, tz=self.tz)
  1220. return self.values.searchsorted(key, side=side)
  1221. def is_type_compatible(self, typ):
  1222. return typ == self.inferred_type or typ == 'datetime'
  1223. def argmin(self):
  1224. # hack to workaround argmin failure
  1225. try:
  1226. return self.values.argmin()
  1227. except Exception: # pragma: no cover
  1228. return self.asi8.argmin()
  1229. @property
  1230. def inferred_type(self):
  1231. # b/c datetime is represented as microseconds since the epoch, make
  1232. # sure we can't have ambiguous indexing
  1233. return 'datetime64'
  1234. @property
  1235. def dtype(self):
  1236. return _NS_DTYPE
  1237. @property
  1238. def is_all_dates(self):
  1239. return True
  1240. @cache_readonly
  1241. def is_normalized(self):
  1242. """
  1243. Returns True if all of the dates are at midnight ("no time")
  1244. """
  1245. return tslib.dates_normalized(self.asi8, self.tz)
  1246. @cache_readonly
  1247. def resolution(self):
  1248. """
  1249. Returns day, hour, minute, second, or microsecond
  1250. """
  1251. reso = self._resolution
  1252. return get_reso_string(reso)
  1253. @cache_readonly
  1254. def _resolution(self):
  1255. return tslib.resolution(self.asi8, self.tz)
  1256. def equals(self, other):
  1257. """
  1258. Determines if two Index objects contain the same elements.
  1259. """
  1260. if self.is_(other):
  1261. return True
  1262. if (not hasattr(other, 'inferred_type') or
  1263. other.inferred_type != 'datetime64'):
  1264. if self.offset is not None:
  1265. return False
  1266. try:
  1267. other = DatetimeIndex(other)
  1268. except:
  1269. return False
  1270. if self.tz is not None:
  1271. if other.tz is None:
  1272. return False
  1273. same_zone = tslib.get_timezone(
  1274. self.tz) == tslib.get_timezone(other.tz)
  1275. else:
  1276. if other.tz is not None:
  1277. return False
  1278. same_zone = True
  1279. return same_zone and np.array_equal(self.asi8, other.asi8)
  1280. def insert(self, loc, item):
  1281. """
  1282. Make new Index inserting new item at location
  1283. Parameters
  1284. ----------
  1285. loc : int
  1286. item : object
  1287. if not either a Python datetime or a numpy integer-like, returned
  1288. Index dtype will be object rather than datetime.
  1289. Returns
  1290. -------
  1291. new_index : Index
  1292. """
  1293. freq = None
  1294. if isinstance(item, datetime):
  1295. zone = tslib.get_timezone(self.tz)
  1296. izone = tslib.get_timezone(getattr(item, 'tzinfo', None))
  1297. if zone != izone:
  1298. raise ValueError('Passed item and index have different timezone')
  1299. # check freq can be preserved on edge cases
  1300. if self.freq is not None:
  1301. if (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
  1302. freq = self.freq
  1303. elif (loc == len(self)) and item - self.freq == self[-1]:
  1304. freq = self.freq
  1305. item = _to_m8(item, tz=self.tz)
  1306. try:
  1307. new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
  1308. self[loc:].asi8))
  1309. if self.tz is not None:
  1310. new_dates = _tz_convert_with_transitions(new_dates,'UTC',self.tz)
  1311. return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
  1312. except (AttributeError, TypeError):
  1313. # fall back to object index
  1314. if isinstance(item,compat.string_types):
  1315. return self.asobject.insert(loc, item)
  1316. raise TypeError("cannot insert DatetimeIndex with incompatible label")
  1317. def delete(self, loc):
  1318. """
  1319. Make a new DatetimeIndex with passed location(s) deleted.
  1320. Parameters
  1321. ----------
  1322. loc: int, slice or array of ints
  1323. Indicate which sub-arrays to remove.
  1324. Returns
  1325. -------
  1326. new_index : DatetimeIndex
  1327. """
  1328. new_dates = np.delete(self.asi8, loc)
  1329. freq = None
  1330. if lib.is_integer(loc):
  1331. if loc in (0, -len(self), -1, len(self) - 1):
  1332. freq = self.freq
  1333. else:
  1334. if com.is_list_like(loc):
  1335. loc = lib.maybe_indices_to_slice(com._ensure_int64(np.array(loc)))
  1336. if isinstance(loc, slice) and loc.step in (1, None):
  1337. if (loc.start in (0, None) or loc.stop in (len(self), None)):
  1338. freq = self.freq
  1339. if self.tz is not None:
  1340. new_dates = _tz_convert_with_transitions(new_dates, 'UTC', self.tz)
  1341. return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
  1342. def _view_like(self, ndarray):
  1343. result = ndarray.view(type(self))
  1344. result.offset = self.offset
  1345. result.tz = self.tz
  1346. result.name = self.name
  1347. return result
  1348. def tz_convert(self, tz):
  1349. """
  1350. Convert DatetimeIndex from one time zone to another (using pytz/dateutil)
  1351. Returns
  1352. -------
  1353. normalized : DatetimeIndex
  1354. """
  1355. tz = tools._maybe_get_tz(tz)
  1356. if self.tz is None:
  1357. # tz naive, use tz_localize
  1358. raise TypeError('Cannot convert tz-naive timestamps, use '
  1359. 'tz_localize to localize')
  1360. # No conversion since timestamps are all UTC to begin with
  1361. return self._simple_new(self.values, self.name, self.offset, tz)
  1362. def tz_localize(self, tz, infer_dst=False):
  1363. """
  1364. Localize tz-naive DatetimeIndex to given time zone (using pytz/dateutil)
  1365. Parameters
  1366. ----------
  1367. tz : string or pytz.timezone or dateutil.tz.tzfile
  1368. Time zone for time. Corresponding timestamps would be converted to
  1369. time zone of the TimeSeries
  1370. infer_dst : boolean, default False
  1371. Attempt to infer fall dst-transition hours based on order
  1372. Returns
  1373. -------
  1374. localized : DatetimeIndex
  1375. """
  1376. if self.tz is not None:
  1377. raise TypeError("Already tz-aware, use tz_convert to convert.")
  1378. tz = tools._maybe_get_tz(tz)
  1379. # Convert to UTC
  1380. new_dates = tslib.tz_localize_to_utc(self.asi8, tz, infer_dst=infer_dst)
  1381. new_dates = new_dates.view(_NS_DTYPE)
  1382. return self._simple_new(new_dates, self.name, self.offset, tz)
  1383. def indexer_at_time(self, time, asof=False):
  1384. """
  1385. Select values at particular time of day (e.g. 9:30AM)
  1386. Parameters
  1387. ----------
  1388. time : datetime.time or string
  1389. tz : string or pytz.timezone or dateutil.tz.tzfile
  1390. Time zone for time. Corresponding timestamps would be converted to
  1391. time zone of the TimeSeries
  1392. Returns
  1393. -------
  1394. values_at_time : TimeSeries
  1395. """
  1396. from dateutil.parser import parse
  1397. if asof:
  1398. raise NotImplementedError
  1399. if isinstance(time, compat.string_types):
  1400. time = parse(time).time()
  1401. if time.tzinfo:
  1402. # TODO
  1403. raise NotImplementedError
  1404. time_micros = self._get_time_micros()
  1405. micros = _time_to_micros(time)
  1406. return (micros == time_micros).nonzero()[0]
  1407. def indexer_between_time(self, start_time, end_time, include_start=True,
  1408. include_end=True):
  1409. """
  1410. Select values between particular times of day (e.g., 9:00-9:30AM)
  1411. Parameters
  1412. ----------
  1413. start_time : datetime.time or string
  1414. end_time : datetime.time or string
  1415. include_start : boolean, default True
  1416. include_end : boolean, default True
  1417. tz : string or pytz.timezone or dateutil.tz.tzfile, default None
  1418. Returns
  1419. -------
  1420. values_between_time : TimeSeries
  1421. """
  1422. from dateutil.parser import parse
  1423. if isinstance(start_time, compat.string_types):
  1424. start_time = parse(start_time).time()
  1425. if isinstance(end_time, compat.string_types):
  1426. end_time = parse(end_time).time()
  1427. if start_time.tzinfo or end_time.tzinfo:
  1428. raise NotImplementedError
  1429. time_micros = self._get_time_micros()
  1430. start_micros = _time_to_micros(start_time)
  1431. end_micros = _time_to_micros(end_time)
  1432. if include_start and include_end:
  1433. lop = rop = operator.le
  1434. elif include_start:
  1435. lop = operator.le
  1436. rop = operator.lt
  1437. elif include_end:
  1438. lop = operator.lt
  1439. rop = operator.le
  1440. else:
  1441. lop = rop = operator.lt
  1442. if start_time <= end_time:
  1443. join_op = operator.and_
  1444. else:
  1445. join_op = operator.or_
  1446. mask = join_op(lop(start_micros, time_micros),
  1447. rop(time_micros, end_micros))
  1448. return mask.nonzero()[0]
  1449. def to_julian_date(self):
  1450. """
  1451. Convert DatetimeIndex to Float64Index of Julian Dates.
  1452. 0 Julian date is noon January 1, 4713 BC.
  1453. http://en.wikipedia.org/wiki/Julian_day
  1454. """
  1455. # http://mysite.verizon.net/aesir_research/date/jdalg2.htm
  1456. year = self.year
  1457. month = self.month
  1458. day = self.day
  1459. testarr = month < 3
  1460. year[testarr] -= 1
  1461. month[testarr] += 12
  1462. return Float64Index(day +
  1463. np.fix((153*month - 457)/5) +
  1464. 365*year +
  1465. np.floor(year / 4) -
  1466. np.floor(year / 100) +
  1467. np.floor(year / 400) +
  1468. 1721118.5 +
  1469. (self.hour +
  1470. self.minute/60.0 +
  1471. self.second/3600.0 +
  1472. self.microsecond/3600.0/1e+6 +
  1473. self.nanosecond/3600.0/1e+9
  1474. )/24.0)
  1475. def _generate_regular_range(start, end, periods, offset):
  1476. if isinstance(offset, Tick):
  1477. stride = offset.nanos
  1478. if periods is None:
  1479. b = Timestamp(start).value
  1480. e = Timestamp(end).value
  1481. e += stride - e % stride
  1482. # end.tz == start.tz by this point due to _generate implementation
  1483. tz = start.tz
  1484. elif start is not None:
  1485. b = Timestamp(start).value
  1486. e = b + periods * stride
  1487. tz = start.tz
  1488. elif end is not None:
  1489. e = Timestamp(end).value + stride
  1490. b = e - periods * stride
  1491. tz = end.tz
  1492. else:
  1493. raise NotImplementedError
  1494. data = np.arange(b, e, stride, dtype=np.int64)
  1495. data = DatetimeIndex._simple_new(data, None, tz=tz)
  1496. else:
  1497. if isinstance(start, Timestamp):
  1498. start = start.to_pydatetime()
  1499. if isinstance(end, Timestamp):
  1500. end = end.to_pydatetime()
  1501. xdr = generate_range(start=start, end=end,
  1502. periods=periods, offset=offset)
  1503. dates = list(xdr)
  1504. # utc = len(dates) > 0 and dates[0].tzinfo is not None
  1505. data = tools.to_datetime(dates)
  1506. return data
  1507. def date_range(start=None, end=None, periods=None, freq='D', tz=None,
  1508. normalize=False, name=None, closed=None):
  1509. """
  1510. Return a fixed frequency datetime index, with day (calendar) as the default
  1511. frequency
  1512. Parameters
  1513. ----------
  1514. start : string or datetime-like, default None
  1515. Left bound for generating dates
  1516. end : string or datetime-like, default None
  1517. Right bound for generating dates
  1518. periods : integer or None, default None
  1519. If None, must specify start and end
  1520. freq : string or DateOffset, default 'D' (calendar daily)
  1521. Frequency strings can have multiples, e.g. '5H'
  1522. tz : string or None
  1523. Time zone name for returning localized DatetimeIndex, for example
  1524. Asia/Hong_Kong
  1525. normalize : bool, default False
  1526. Normalize start/end dates to midnight before generating date range
  1527. name : str, default None
  1528. Name of the resulting index
  1529. closed : string or None, default None
  1530. Make the interval closed with respect to the given frequency to
  1531. the 'left', 'right', or both sides (None)
  1532. Notes
  1533. -----
  1534. 2 of start, end, or periods must be specified
  1535. Returns
  1536. -------
  1537. rng : DatetimeIndex
  1538. """
  1539. return DatetimeIndex(start=start, end=end, periods=periods,
  1540. freq=freq, tz=tz, normalize=normalize, name=name,
  1541. closed=closed)
  1542. def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
  1543. normalize=True, name=None, closed=None):
  1544. """
  1545. Return a fixed frequency datetime index, with business day as the default
  1546. frequency
  1547. Parameters
  1548. ----------
  1549. start : string or datetime-like, default None
  1550. Left bound for generating dates
  1551. end : string or datetime-like, default None
  1552. Right bound for generating dates
  1553. periods : integer or None, default None
  1554. If None, must specify start and end
  1555. freq : string or DateOffset, default 'B' (business daily)
  1556. Frequency strings can have multiples, e.g. '5H'
  1557. tz : string or None
  1558. Time zone name for returning localized DatetimeIndex, for example
  1559. Asia/Beijing
  1560. normalize : bool, default False
  1561. Normalize start/end dates to midnight before generating date range
  1562. name : str, default None
  1563. Name for the resulting index
  1564. closed : string or None, default None
  1565. Make the interval closed with respect to the given frequency to
  1566. the 'left', 'right', or both sides (None)
  1567. Notes
  1568. -----
  1569. 2 of start, end, or periods must be specified
  1570. Returns
  1571. -------
  1572. rng : DatetimeIndex
  1573. """
  1574. return DatetimeIndex(start=start, end=end, periods=periods,
  1575. freq=freq, tz=tz, normalize=normalize, name=name,
  1576. closed=closed)
  1577. def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
  1578. normalize=True, name=None, closed=None, **kwargs):
  1579. """
  1580. **EXPERIMENTAL** Return a fixed frequency datetime index, with
  1581. CustomBusinessDay as the default frequency
  1582. .. warning:: EXPERIMENTAL
  1583. The CustomBusinessDay class is not officially supported and the API is
  1584. likely to change in future versions. Use this at your own risk.
  1585. Parameters
  1586. ----------
  1587. start : string or datetime-like, default None
  1588. Left bound for generating dates
  1589. end : string or datetime-like, default None
  1590. Right bound for generating dates
  1591. periods : integer or None, default None
  1592. If None, must specify start and end
  1593. freq : string or DateOffset, default 'C' (CustomBusinessDay)
  1594. Frequency strings can have multiples, e.g. '5H'
  1595. tz : string or None
  1596. Time zone name for returning localized DatetimeIndex, for example
  1597. Asia/Beijing
  1598. normalize : bool, default False
  1599. Normalize start/end dates to midnight before generating date range
  1600. name : str, default None
  1601. Name for the resulting index
  1602. weekmask : str, Default 'Mon Tue Wed Thu Fri'
  1603. weekmask of valid business days, passed to ``numpy.busdaycalendar``
  1604. holidays : list
  1605. list/array of dates to exclude from the set of valid business days,
  1606. passed to ``numpy.busdaycalendar``
  1607. closed : string or None, default None
  1608. Make the interval closed with respect to the given frequency to
  1609. the 'left', 'right', or both sides (None)
  1610. Notes
  1611. -----
  1612. 2 of start, end, or periods must be specified
  1613. Returns
  1614. -------
  1615. rng : DatetimeIndex
  1616. """
  1617. if freq=='C':
  1618. holidays = kwargs.pop('holidays', [])
  1619. weekmask = kwargs.pop('weekmask', 'Mon Tue Wed Thu Fri')
  1620. freq = CDay(holidays=holidays, weekmask=weekmask)
  1621. return DatetimeIndex(start=start, end=end, periods=periods, freq=freq,
  1622. tz=tz, normalize=normalize, name=name,
  1623. closed=closed, **kwargs)
  1624. def _to_m8(key, tz=None):
  1625. '''
  1626. Timestamp-like => dt64
  1627. '''
  1628. if not isinstance(key, Timestamp):
  1629. # this also converts strings
  1630. key = Timestamp(key, tz=tz)
  1631. return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE)
  1632. def _str_to_dt_array(arr, offset=None, dayfirst=None, yearfirst=None):
  1633. def parser(x):
  1634. result = parse_time_string(x, offset, dayfirst=dayfirst,
  1635. yearfirst=yearfirst)
  1636. return result[0]
  1637. arr = np.asarray(arr, dtype=object)
  1638. data = _algos.arrmap_object(arr, parser)
  1639. return tools.to_datetime(data)
  1640. _CACHE_START = Timestamp(datetime(1950, 1, 1))
  1641. _CACHE_END = Timestamp(datetime(2030, 1, 1))
  1642. _daterange_cache = {}
  1643. def _naive_in_cache_range(start, end):
  1644. if start is None or end is None:
  1645. return False
  1646. else:
  1647. if start.tzinfo is not None or end.tzinfo is not None:
  1648. return False
  1649. return _in_range(start, end, _CACHE_START, _CACHE_END)
  1650. def _in_range(start, end, rng_start, rng_end):
  1651. return start > rng_start and end < rng_end
  1652. def _use_cached_range(offset, _normalized, start, end):
  1653. return (offset._should_cache() and
  1654. not (offset._normalize_cache and not _normalized) and
  1655. _naive_in_cache_range(start, end))
  1656. def _time_to_micros(time):
  1657. seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
  1658. return 1000000 * seconds + time.microsecond
  1659. def _process_concat_data(to_concat, name):
  1660. klass = Index
  1661. kwargs = {}
  1662. concat = np.concatenate
  1663. all_dti = True
  1664. need_utc_convert = False
  1665. has_naive = False
  1666. tz = None
  1667. for x in to_concat:
  1668. if not isinstance(x, DatetimeIndex):
  1669. all_dti = False
  1670. else:
  1671. if tz is None:
  1672. tz = x.tz
  1673. if x.tz is None:
  1674. has_naive = True
  1675. if x.tz != tz:
  1676. need_utc_convert = True
  1677. tz = 'UTC'
  1678. if all_dti:
  1679. need_obj_convert = False
  1680. if has_naive and tz is not None:
  1681. need_obj_convert = True
  1682. if need_obj_convert:
  1683. to_concat = [x.asobject.values for x in to_concat]
  1684. else:
  1685. if need_utc_convert:
  1686. to_concat = [x.tz_convert('UTC').values for x in to_concat]
  1687. else:
  1688. to_concat = [x.values for x in to_concat]
  1689. # well, technically not a "class" anymore...oh well
  1690. klass = DatetimeIndex._simple_new
  1691. kwargs = {'tz': tz}
  1692. concat = com._concat_compat
  1693. else:
  1694. for i, x in enumerate(to_concat):
  1695. if isinstance(x, DatetimeIndex):
  1696. to_concat[i] = x.asobject.values
  1697. elif isinstance(x, Index):
  1698. to_concat[i] = x.values
  1699. factory_func = lambda x: klass(concat(x), name=name, **kwargs)
  1700. return to_concat, factory_func