PageRenderTime 57ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/tseries/period.py

http://github.com/pydata/pandas
Python | 1337 lines | 1299 code | 11 blank | 27 comment | 10 complexity | e99082e4a78502b6af9ecf0def233f60 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # pylint: disable=E1101,E1103,W0232
  2. import operator
  3. from datetime import datetime, date
  4. import numpy as np
  5. from pandas.core.base import PandasObject
  6. from pandas.tseries.frequencies import (get_freq_code as _gfc,
  7. _month_numbers, FreqGroup)
  8. from pandas.tseries.index import DatetimeIndex, Int64Index, Index
  9. from pandas.core.base import DatetimeIndexOpsMixin
  10. from pandas.tseries.tools import parse_time_string
  11. import pandas.tseries.frequencies as _freq_mod
  12. import pandas.core.common as com
  13. from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box,
  14. _values_from_object, ABCSeries)
  15. from pandas import compat
  16. from pandas.lib import Timestamp
  17. import pandas.lib as lib
  18. import pandas.tslib as tslib
  19. import pandas.algos as _algos
  20. from pandas.compat import zip, u
  21. #---------------
  22. # Period logic
  23. def _period_field_accessor(name, alias):
  24. def f(self):
  25. base, mult = _gfc(self.freq)
  26. return tslib.get_period_field(alias, self.ordinal, base)
  27. f.__name__ = name
  28. return property(f)
  29. def _field_accessor(name, alias):
  30. def f(self):
  31. base, mult = _gfc(self.freq)
  32. return tslib.get_period_field_arr(alias, self.values, base)
  33. f.__name__ = name
  34. return property(f)
  35. class Period(PandasObject):
  36. """
  37. Represents an period of time
  38. Parameters
  39. ----------
  40. value : Period or compat.string_types, default None
  41. The time period represented (e.g., '4Q2005')
  42. freq : str, default None
  43. e.g., 'B' for businessday. Must be a singular rule-code (e.g. 5T is not
  44. allowed).
  45. year : int, default None
  46. month : int, default 1
  47. quarter : int, default None
  48. day : int, default 1
  49. hour : int, default 0
  50. minute : int, default 0
  51. second : int, default 0
  52. """
  53. __slots__ = ['freq', 'ordinal']
  54. _comparables = ['name','freqstr']
  55. def __init__(self, value=None, freq=None, ordinal=None,
  56. year=None, month=1, quarter=None, day=1,
  57. hour=0, minute=0, second=0):
  58. # freq points to a tuple (base, mult); base is one of the defined
  59. # periods such as A, Q, etc. Every five minutes would be, e.g.,
  60. # ('T', 5) but may be passed in as a string like '5T'
  61. self.freq = None
  62. # ordinal is the period offset from the gregorian proleptic epoch
  63. self.ordinal = None
  64. if ordinal is not None and value is not None:
  65. raise ValueError(("Only value or ordinal but not both should be "
  66. "given but not both"))
  67. elif ordinal is not None:
  68. if not com.is_integer(ordinal):
  69. raise ValueError("Ordinal must be an integer")
  70. if freq is None:
  71. raise ValueError('Must supply freq for ordinal value')
  72. self.ordinal = ordinal
  73. elif value is None:
  74. if freq is None:
  75. raise ValueError("If value is None, freq cannot be None")
  76. self.ordinal = _ordinal_from_fields(year, month, quarter, day,
  77. hour, minute, second, freq)
  78. elif isinstance(value, Period):
  79. other = value
  80. if freq is None or _gfc(freq) == _gfc(other.freq):
  81. self.ordinal = other.ordinal
  82. freq = other.freq
  83. else:
  84. converted = other.asfreq(freq)
  85. self.ordinal = converted.ordinal
  86. elif com._is_null_datelike_scalar(value) or value in tslib._nat_strings:
  87. self.ordinal = tslib.iNaT
  88. if freq is None:
  89. raise ValueError("If value is NaT, freq cannot be None "
  90. "because it cannot be inferred")
  91. elif isinstance(value, compat.string_types) or com.is_integer(value):
  92. if com.is_integer(value):
  93. value = str(value)
  94. value = value.upper()
  95. dt, _, reso = parse_time_string(value, freq)
  96. if freq is None:
  97. try:
  98. freq = _freq_mod.Resolution.get_freq(reso)
  99. except KeyError:
  100. raise ValueError("Invalid frequency or could not infer: %s" % reso)
  101. elif isinstance(value, datetime):
  102. dt = value
  103. if freq is None:
  104. raise ValueError('Must supply freq for datetime value')
  105. elif isinstance(value, date):
  106. dt = datetime(year=value.year, month=value.month, day=value.day)
  107. if freq is None:
  108. raise ValueError('Must supply freq for datetime value')
  109. else:
  110. msg = "Value must be Period, string, integer, or datetime"
  111. raise ValueError(msg)
  112. base, mult = _gfc(freq)
  113. if mult != 1:
  114. # TODO: Better error message - this is slightly confusing
  115. raise ValueError('Only mult == 1 supported')
  116. if self.ordinal is None:
  117. self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day,
  118. dt.hour, dt.minute, dt.second, dt.microsecond, 0,
  119. base)
  120. self.freq = _freq_mod._get_freq_str(base)
  121. def __eq__(self, other):
  122. if isinstance(other, Period):
  123. if other.freq != self.freq:
  124. raise ValueError("Cannot compare non-conforming periods")
  125. if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT:
  126. return False
  127. return (self.ordinal == other.ordinal
  128. and _gfc(self.freq) == _gfc(other.freq))
  129. return NotImplemented
  130. def __ne__(self, other):
  131. return not self == other
  132. def __hash__(self):
  133. return hash((self.ordinal, self.freq))
  134. def __add__(self, other):
  135. if com.is_integer(other):
  136. if self.ordinal == tslib.iNaT:
  137. ordinal = self.ordinal
  138. else:
  139. ordinal = self.ordinal + other
  140. return Period(ordinal=ordinal, freq=self.freq)
  141. else: # pragma: no cover
  142. return NotImplemented
  143. def __sub__(self, other):
  144. if com.is_integer(other):
  145. if self.ordinal == tslib.iNaT:
  146. ordinal = self.ordinal
  147. else:
  148. ordinal = self.ordinal - other
  149. return Period(ordinal=ordinal, freq=self.freq)
  150. if isinstance(other, Period):
  151. if other.freq != self.freq:
  152. raise ValueError("Cannot do arithmetic with "
  153. "non-conforming periods")
  154. if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT:
  155. return Period(ordinal=tslib.iNaT, freq=self.freq)
  156. return self.ordinal - other.ordinal
  157. else: # pragma: no cover
  158. return NotImplemented
  159. def _comp_method(func, name):
  160. def f(self, other):
  161. if isinstance(other, Period):
  162. if other.freq != self.freq:
  163. raise ValueError("Cannot compare non-conforming periods")
  164. if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT:
  165. return False
  166. return func(self.ordinal, other.ordinal)
  167. else:
  168. raise TypeError(other)
  169. f.__name__ = name
  170. return f
  171. __lt__ = _comp_method(operator.lt, '__lt__')
  172. __le__ = _comp_method(operator.le, '__le__')
  173. __gt__ = _comp_method(operator.gt, '__gt__')
  174. __ge__ = _comp_method(operator.ge, '__ge__')
  175. def asfreq(self, freq, how='E'):
  176. """
  177. Convert Period to desired frequency, either at the start or end of the
  178. interval
  179. Parameters
  180. ----------
  181. freq : string
  182. how : {'E', 'S', 'end', 'start'}, default 'end'
  183. Start or end of the timespan
  184. Returns
  185. -------
  186. resampled : Period
  187. """
  188. how = _validate_end_alias(how)
  189. base1, mult1 = _gfc(self.freq)
  190. base2, mult2 = _gfc(freq)
  191. if mult2 != 1:
  192. raise ValueError('Only mult == 1 supported')
  193. end = how == 'E'
  194. new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end)
  195. return Period(ordinal=new_ordinal, freq=base2)
  196. @property
  197. def start_time(self):
  198. return self.to_timestamp(how='S')
  199. @property
  200. def end_time(self):
  201. if self.ordinal == tslib.iNaT:
  202. ordinal = self.ordinal
  203. else:
  204. ordinal = (self + 1).start_time.value - 1
  205. return Timestamp(ordinal)
  206. def to_timestamp(self, freq=None, how='start', tz=None):
  207. """
  208. Return the Timestamp representation of the Period at the target
  209. frequency at the specified end (how) of the Period
  210. Parameters
  211. ----------
  212. freq : string or DateOffset, default is 'D' if self.freq is week or
  213. longer and 'S' otherwise
  214. Target frequency
  215. how: str, default 'S' (start)
  216. 'S', 'E'. Can be aliased as case insensitive
  217. 'Start', 'Finish', 'Begin', 'End'
  218. Returns
  219. -------
  220. Timestamp
  221. """
  222. how = _validate_end_alias(how)
  223. if freq is None:
  224. base, mult = _gfc(self.freq)
  225. freq = _freq_mod.get_to_timestamp_base(base)
  226. base, mult = _gfc(freq)
  227. val = self.asfreq(freq, how)
  228. dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base)
  229. return Timestamp(dt64, tz=tz)
  230. year = _period_field_accessor('year', 0)
  231. month = _period_field_accessor('month', 3)
  232. day = _period_field_accessor('day', 4)
  233. hour = _period_field_accessor('hour', 5)
  234. minute = _period_field_accessor('minute', 6)
  235. second = _period_field_accessor('second', 7)
  236. weekofyear = _period_field_accessor('week', 8)
  237. week = weekofyear
  238. dayofweek = _period_field_accessor('dayofweek', 10)
  239. weekday = dayofweek
  240. dayofyear = _period_field_accessor('dayofyear', 9)
  241. quarter = _period_field_accessor('quarter', 2)
  242. qyear = _period_field_accessor('qyear', 1)
  243. @classmethod
  244. def now(cls, freq=None):
  245. return Period(datetime.now(), freq=freq)
  246. def __repr__(self):
  247. base, mult = _gfc(self.freq)
  248. formatted = tslib.period_format(self.ordinal, base)
  249. freqstr = _freq_mod._reverse_period_code_map[base]
  250. if not compat.PY3:
  251. encoding = com.get_option("display.encoding")
  252. formatted = formatted.encode(encoding)
  253. return "Period('%s', '%s')" % (formatted, freqstr)
  254. def __unicode__(self):
  255. """
  256. Return a string representation for a particular DataFrame
  257. Invoked by unicode(df) in py2 only. Yields a Unicode String in both
  258. py2/py3.
  259. """
  260. base, mult = _gfc(self.freq)
  261. formatted = tslib.period_format(self.ordinal, base)
  262. value = ("%s" % formatted)
  263. return value
  264. def strftime(self, fmt):
  265. """
  266. Returns the string representation of the :class:`Period`, depending
  267. on the selected :keyword:`format`. :keyword:`format` must be a string
  268. containing one or several directives. The method recognizes the same
  269. directives as the :func:`time.strftime` function of the standard Python
  270. distribution, as well as the specific additional directives ``%f``,
  271. ``%F``, ``%q``. (formatting & docs originally from scikits.timeries)
  272. +-----------+--------------------------------+-------+
  273. | Directive | Meaning | Notes |
  274. +===========+================================+=======+
  275. | ``%a`` | Locale's abbreviated weekday | |
  276. | | name. | |
  277. +-----------+--------------------------------+-------+
  278. | ``%A`` | Locale's full weekday name. | |
  279. +-----------+--------------------------------+-------+
  280. | ``%b`` | Locale's abbreviated month | |
  281. | | name. | |
  282. +-----------+--------------------------------+-------+
  283. | ``%B`` | Locale's full month name. | |
  284. +-----------+--------------------------------+-------+
  285. | ``%c`` | Locale's appropriate date and | |
  286. | | time representation. | |
  287. +-----------+--------------------------------+-------+
  288. | ``%d`` | Day of the month as a decimal | |
  289. | | number [01,31]. | |
  290. +-----------+--------------------------------+-------+
  291. | ``%f`` | 'Fiscal' year without a | \(1) |
  292. | | century as a decimal number | |
  293. | | [00,99] | |
  294. +-----------+--------------------------------+-------+
  295. | ``%F`` | 'Fiscal' year with a century | \(2) |
  296. | | as a decimal number | |
  297. +-----------+--------------------------------+-------+
  298. | ``%H`` | Hour (24-hour clock) as a | |
  299. | | decimal number [00,23]. | |
  300. +-----------+--------------------------------+-------+
  301. | ``%I`` | Hour (12-hour clock) as a | |
  302. | | decimal number [01,12]. | |
  303. +-----------+--------------------------------+-------+
  304. | ``%j`` | Day of the year as a decimal | |
  305. | | number [001,366]. | |
  306. +-----------+--------------------------------+-------+
  307. | ``%m`` | Month as a decimal number | |
  308. | | [01,12]. | |
  309. +-----------+--------------------------------+-------+
  310. | ``%M`` | Minute as a decimal number | |
  311. | | [00,59]. | |
  312. +-----------+--------------------------------+-------+
  313. | ``%p`` | Locale's equivalent of either | \(3) |
  314. | | AM or PM. | |
  315. +-----------+--------------------------------+-------+
  316. | ``%q`` | Quarter as a decimal number | |
  317. | | [01,04] | |
  318. +-----------+--------------------------------+-------+
  319. | ``%S`` | Second as a decimal number | \(4) |
  320. | | [00,61]. | |
  321. +-----------+--------------------------------+-------+
  322. | ``%U`` | Week number of the year | \(5) |
  323. | | (Sunday as the first day of | |
  324. | | the week) as a decimal number | |
  325. | | [00,53]. All days in a new | |
  326. | | year preceding the first | |
  327. | | Sunday are considered to be in | |
  328. | | week 0. | |
  329. +-----------+--------------------------------+-------+
  330. | ``%w`` | Weekday as a decimal number | |
  331. | | [0(Sunday),6]. | |
  332. +-----------+--------------------------------+-------+
  333. | ``%W`` | Week number of the year | \(5) |
  334. | | (Monday as the first day of | |
  335. | | the week) as a decimal number | |
  336. | | [00,53]. All days in a new | |
  337. | | year preceding the first | |
  338. | | Monday are considered to be in | |
  339. | | week 0. | |
  340. +-----------+--------------------------------+-------+
  341. | ``%x`` | Locale's appropriate date | |
  342. | | representation. | |
  343. +-----------+--------------------------------+-------+
  344. | ``%X`` | Locale's appropriate time | |
  345. | | representation. | |
  346. +-----------+--------------------------------+-------+
  347. | ``%y`` | Year without century as a | |
  348. | | decimal number [00,99]. | |
  349. +-----------+--------------------------------+-------+
  350. | ``%Y`` | Year with century as a decimal | |
  351. | | number. | |
  352. +-----------+--------------------------------+-------+
  353. | ``%Z`` | Time zone name (no characters | |
  354. | | if no time zone exists). | |
  355. +-----------+--------------------------------+-------+
  356. | ``%%`` | A literal ``'%'`` character. | |
  357. +-----------+--------------------------------+-------+
  358. .. note::
  359. (1)
  360. The ``%f`` directive is the same as ``%y`` if the frequency is
  361. not quarterly.
  362. Otherwise, it corresponds to the 'fiscal' year, as defined by
  363. the :attr:`qyear` attribute.
  364. (2)
  365. The ``%F`` directive is the same as ``%Y`` if the frequency is
  366. not quarterly.
  367. Otherwise, it corresponds to the 'fiscal' year, as defined by
  368. the :attr:`qyear` attribute.
  369. (3)
  370. The ``%p`` directive only affects the output hour field
  371. if the ``%I`` directive is used to parse the hour.
  372. (4)
  373. The range really is ``0`` to ``61``; this accounts for leap
  374. seconds and the (very rare) double leap seconds.
  375. (5)
  376. The ``%U`` and ``%W`` directives are only used in calculations
  377. when the day of the week and the year are specified.
  378. .. rubric:: Examples
  379. >>> a = Period(freq='Q@JUL', year=2006, quarter=1)
  380. >>> a.strftime('%F-Q%q')
  381. '2006-Q1'
  382. >>> # Output the last month in the quarter of this date
  383. >>> a.strftime('%b-%Y')
  384. 'Oct-2005'
  385. >>>
  386. >>> a = Period(freq='D', year=2001, month=1, day=1)
  387. >>> a.strftime('%d-%b-%Y')
  388. '01-Jan-2006'
  389. >>> a.strftime('%b. %d, %Y was a %A')
  390. 'Jan. 01, 2001 was a Monday'
  391. """
  392. base, mult = _gfc(self.freq)
  393. return tslib.period_format(self.ordinal, base, fmt)
  394. def _get_ordinals(data, freq):
  395. f = lambda x: Period(x, freq=freq).ordinal
  396. if isinstance(data[0], Period):
  397. return tslib.extract_ordinals(data, freq)
  398. else:
  399. return lib.map_infer(data, f)
  400. def dt64arr_to_periodarr(data, freq, tz):
  401. if data.dtype != np.dtype('M8[ns]'):
  402. raise ValueError('Wrong dtype: %s' % data.dtype)
  403. base, mult = _gfc(freq)
  404. return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz)
  405. # --- Period index sketch
  406. def _period_index_cmp(opname, nat_result=False):
  407. """
  408. Wrap comparison operations to convert datetime-like to datetime64
  409. """
  410. def wrapper(self, other):
  411. if isinstance(other, Period):
  412. func = getattr(self.values, opname)
  413. if other.freq != self.freq:
  414. raise AssertionError("Frequencies must be equal")
  415. result = func(other.ordinal)
  416. elif isinstance(other, PeriodIndex):
  417. if other.freq != self.freq:
  418. raise AssertionError("Frequencies must be equal")
  419. result = getattr(self.values, opname)(other.values)
  420. mask = (com.mask_missing(self.values, tslib.iNaT) |
  421. com.mask_missing(other.values, tslib.iNaT))
  422. if mask.any():
  423. result[mask] = nat_result
  424. return result
  425. else:
  426. other = Period(other, freq=self.freq)
  427. func = getattr(self.values, opname)
  428. result = func(other.ordinal)
  429. if other.ordinal == tslib.iNaT:
  430. result.fill(nat_result)
  431. mask = self.values == tslib.iNaT
  432. if mask.any():
  433. result[mask] = nat_result
  434. return result
  435. return wrapper
  436. class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
  437. """
  438. Immutable ndarray holding ordinal values indicating regular periods in
  439. time such as particular years, quarters, months, etc. A value of 1 is the
  440. period containing the Gregorian proleptic datetime Jan 1, 0001 00:00:00.
  441. This ordinal representation is from the scikits.timeseries project.
  442. For instance,
  443. # construct period for day 1/1/1 and get the first second
  444. i = Period(year=1,month=1,day=1,freq='D').asfreq('S', 'S')
  445. i.ordinal
  446. ===> 1
  447. Index keys are boxed to Period objects which carries the metadata (eg,
  448. frequency information).
  449. Parameters
  450. ----------
  451. data : array-like (1-dimensional), optional
  452. Optional period-like data to construct index with
  453. dtype : NumPy dtype (default: i8)
  454. copy : bool
  455. Make a copy of input ndarray
  456. freq : string or period object, optional
  457. One of pandas period strings or corresponding objects
  458. start : starting value, period-like, optional
  459. If data is None, used as the start point in generating regular
  460. period data.
  461. periods : int, optional, > 0
  462. Number of periods to generate, if generating index. Takes precedence
  463. over end argument
  464. end : end value, period-like, optional
  465. If periods is none, generated index will extend to first conforming
  466. period on or just past end argument
  467. year : int, array, or Series, default None
  468. month : int, array, or Series, default None
  469. quarter : int, array, or Series, default None
  470. day : int, array, or Series, default None
  471. hour : int, array, or Series, default None
  472. minute : int, array, or Series, default None
  473. second : int, array, or Series, default None
  474. tz : object, default None
  475. Timezone for converting datetime64 data to Periods
  476. Examples
  477. --------
  478. >>> idx = PeriodIndex(year=year_arr, quarter=q_arr)
  479. >>> idx2 = PeriodIndex(start='2000', end='2010', freq='A')
  480. """
  481. _box_scalars = True
  482. _allow_period_index_ops = True
  483. __eq__ = _period_index_cmp('__eq__')
  484. __ne__ = _period_index_cmp('__ne__', nat_result=True)
  485. __lt__ = _period_index_cmp('__lt__')
  486. __gt__ = _period_index_cmp('__gt__')
  487. __le__ = _period_index_cmp('__le__')
  488. __ge__ = _period_index_cmp('__ge__')
  489. def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
  490. periods=None, copy=False, name=None, year=None, month=None,
  491. quarter=None, day=None, hour=None, minute=None, second=None,
  492. tz=None):
  493. freq = _freq_mod.get_standard_freq(freq)
  494. if periods is not None:
  495. if com.is_float(periods):
  496. periods = int(periods)
  497. elif not com.is_integer(periods):
  498. raise ValueError('Periods must be a number, got %s' %
  499. str(periods))
  500. if data is None:
  501. if ordinal is not None:
  502. data = np.asarray(ordinal, dtype=np.int64)
  503. else:
  504. fields = [year, month, quarter, day, hour, minute, second]
  505. data, freq = cls._generate_range(start, end, periods,
  506. freq, fields)
  507. else:
  508. ordinal, freq = cls._from_arraylike(data, freq, tz)
  509. data = np.array(ordinal, dtype=np.int64, copy=False)
  510. subarr = data.view(cls)
  511. subarr.name = name
  512. subarr.freq = freq
  513. return subarr
  514. @classmethod
  515. def _generate_range(cls, start, end, periods, freq, fields):
  516. field_count = com._count_not_none(*fields)
  517. if com._count_not_none(start, end) > 0:
  518. if field_count > 0:
  519. raise ValueError('Can either instantiate from fields '
  520. 'or endpoints, but not both')
  521. subarr, freq = _get_ordinal_range(start, end, periods, freq)
  522. elif field_count > 0:
  523. y, mth, q, d, h, minute, s = fields
  524. subarr, freq = _range_from_fields(year=y, month=mth, quarter=q,
  525. day=d, hour=h, minute=minute,
  526. second=s, freq=freq)
  527. else:
  528. raise ValueError('Not enough parameters to construct '
  529. 'Period range')
  530. return subarr, freq
  531. @classmethod
  532. def _from_arraylike(cls, data, freq, tz):
  533. if not isinstance(data, np.ndarray):
  534. if np.isscalar(data) or isinstance(data, Period):
  535. raise ValueError('PeriodIndex() must be called with a '
  536. 'collection of some kind, %s was passed'
  537. % repr(data))
  538. # other iterable of some kind
  539. if not isinstance(data, (list, tuple)):
  540. data = list(data)
  541. try:
  542. data = com._ensure_int64(data)
  543. if freq is None:
  544. raise ValueError('freq not specified')
  545. data = np.array([Period(x, freq=freq).ordinal for x in data],
  546. dtype=np.int64)
  547. except (TypeError, ValueError):
  548. data = com._ensure_object(data)
  549. if freq is None and len(data) > 0:
  550. freq = getattr(data[0], 'freq', None)
  551. if freq is None:
  552. raise ValueError('freq not specified and cannot be '
  553. 'inferred from first element')
  554. data = _get_ordinals(data, freq)
  555. else:
  556. if isinstance(data, PeriodIndex):
  557. if freq is None or freq == data.freq:
  558. freq = data.freq
  559. data = data.values
  560. else:
  561. base1, _ = _gfc(data.freq)
  562. base2, _ = _gfc(freq)
  563. data = tslib.period_asfreq_arr(data.values, base1,
  564. base2, 1)
  565. else:
  566. if freq is None and len(data) > 0:
  567. freq = getattr(data[0], 'freq', None)
  568. if freq is None:
  569. raise ValueError('freq not specified and cannot be '
  570. 'inferred from first element')
  571. if data.dtype != np.int64:
  572. if np.issubdtype(data.dtype, np.datetime64):
  573. data = dt64arr_to_periodarr(data, freq, tz)
  574. else:
  575. try:
  576. data = com._ensure_int64(data)
  577. except (TypeError, ValueError):
  578. data = com._ensure_object(data)
  579. data = _get_ordinals(data, freq)
  580. return data, freq
  581. @classmethod
  582. def _simple_new(cls, values, name, freq=None, **kwargs):
  583. result = values.view(cls)
  584. result.name = name
  585. result.freq = freq
  586. return result
  587. @property
  588. def _na_value(self):
  589. return self._box_func(tslib.iNaT)
  590. def __contains__(self, key):
  591. if not isinstance(key, Period) or key.freq != self.freq:
  592. if isinstance(key, compat.string_types):
  593. try:
  594. self.get_loc(key)
  595. return True
  596. except Exception:
  597. return False
  598. return False
  599. return key.ordinal in self._engine
  600. @property
  601. def _box_func(self):
  602. return lambda x: Period(ordinal=x, freq=self.freq)
  603. def asof_locs(self, where, mask):
  604. """
  605. where : array of timestamps
  606. mask : array of booleans where data is not NA
  607. """
  608. where_idx = where
  609. if isinstance(where_idx, DatetimeIndex):
  610. where_idx = PeriodIndex(where_idx.values, freq=self.freq)
  611. locs = self.values[mask].searchsorted(where_idx.values, side='right')
  612. locs = np.where(locs > 0, locs - 1, 0)
  613. result = np.arange(len(self))[mask].take(locs)
  614. first = mask.argmax()
  615. result[(locs == 0) & (where_idx.values < self.values[first])] = -1
  616. return result
  617. def _array_values(self):
  618. return self.asobject
  619. def astype(self, dtype):
  620. dtype = np.dtype(dtype)
  621. if dtype == np.object_:
  622. return Index(np.array(list(self), dtype), dtype)
  623. elif dtype == _INT64_DTYPE:
  624. return Index(self.values, dtype)
  625. raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
  626. def __iter__(self):
  627. for val in self.values:
  628. yield Period(ordinal=val, freq=self.freq)
  629. def searchsorted(self, key, side='left'):
  630. if isinstance(key, compat.string_types):
  631. key = Period(key, freq=self.freq).ordinal
  632. return self.values.searchsorted(key, side=side)
  633. @property
  634. def is_all_dates(self):
  635. return True
  636. @property
  637. def is_full(self):
  638. """
  639. Returns True if there are any missing periods from start to end
  640. """
  641. if len(self) == 0:
  642. return True
  643. if not self.is_monotonic:
  644. raise ValueError('Index is not monotonic')
  645. values = self.values
  646. return ((values[1:] - values[:-1]) < 2).all()
  647. @property
  648. def freqstr(self):
  649. return self.freq
  650. def asfreq(self, freq=None, how='E'):
  651. how = _validate_end_alias(how)
  652. freq = _freq_mod.get_standard_freq(freq)
  653. base1, mult1 = _gfc(self.freq)
  654. base2, mult2 = _gfc(freq)
  655. if mult2 != 1:
  656. raise ValueError('Only mult == 1 supported')
  657. end = how == 'E'
  658. new_data = tslib.period_asfreq_arr(self.values, base1, base2, end)
  659. return self._simple_new(new_data, self.name, freq=freq)
  660. def to_datetime(self, dayfirst=False):
  661. return self.to_timestamp()
  662. _year = _field_accessor('year', 0)
  663. _month = _field_accessor('month', 3)
  664. _day = _field_accessor('day', 4)
  665. _hour = _field_accessor('hour', 5)
  666. _minute = _field_accessor('minute', 6)
  667. _second = _field_accessor('second', 7)
  668. _weekofyear = _field_accessor('week', 8)
  669. _week = _weekofyear
  670. _dayofweek = _field_accessor('dayofweek', 10)
  671. _weekday = _dayofweek
  672. _dayofyear = day_of_year = _field_accessor('dayofyear', 9)
  673. _quarter = _field_accessor('quarter', 2)
  674. _qyear = _field_accessor('qyear', 1)
  675. # Try to run function on index first, and then on elements of index
  676. # Especially important for group-by functionality
  677. def map(self, f):
  678. try:
  679. result = f(self)
  680. if not isinstance(result, np.ndarray):
  681. raise TypeError
  682. return result
  683. except Exception:
  684. return _algos.arrmap_object(self.asobject, f)
  685. def _get_object_array(self):
  686. freq = self.freq
  687. boxfunc = lambda x: Period(ordinal=x, freq=freq)
  688. boxer = np.frompyfunc(boxfunc, 1, 1)
  689. return boxer(self.values)
  690. def _mpl_repr(self):
  691. # how to represent ourselves to matplotlib
  692. return self._get_object_array()
  693. def equals(self, other):
  694. """
  695. Determines if two Index objects contain the same elements.
  696. """
  697. if self.is_(other):
  698. return True
  699. return np.array_equal(self.asi8, other.asi8)
  700. def to_timestamp(self, freq=None, how='start'):
  701. """
  702. Cast to DatetimeIndex
  703. Parameters
  704. ----------
  705. freq : string or DateOffset, default 'D' for week or longer, 'S'
  706. otherwise
  707. Target frequency
  708. how : {'s', 'e', 'start', 'end'}
  709. Returns
  710. -------
  711. DatetimeIndex
  712. """
  713. how = _validate_end_alias(how)
  714. if freq is None:
  715. base, mult = _gfc(self.freq)
  716. freq = _freq_mod.get_to_timestamp_base(base)
  717. base, mult = _gfc(freq)
  718. new_data = self.asfreq(freq, how)
  719. new_data = tslib.periodarr_to_dt64arr(new_data.values, base)
  720. return DatetimeIndex(new_data, freq='infer', name=self.name)
  721. def shift(self, n):
  722. """
  723. Specialized shift which produces an PeriodIndex
  724. Parameters
  725. ----------
  726. n : int
  727. Periods to shift by
  728. freq : freq string
  729. Returns
  730. -------
  731. shifted : PeriodIndex
  732. """
  733. mask = self.values == tslib.iNaT
  734. values = self.values + n
  735. values[mask] = tslib.iNaT
  736. return PeriodIndex(data=values, name=self.name, freq=self.freq)
  737. def __add__(self, other):
  738. try:
  739. return self.shift(other)
  740. except TypeError:
  741. # self.values + other raises TypeError for invalid input
  742. return NotImplemented
  743. def __sub__(self, other):
  744. try:
  745. return self.shift(-other)
  746. except TypeError:
  747. return NotImplemented
  748. @property
  749. def inferred_type(self):
  750. # b/c data is represented as ints make sure we can't have ambiguous
  751. # indexing
  752. return 'period'
  753. def get_value(self, series, key):
  754. """
  755. Fast lookup of value from 1-dimensional ndarray. Only use this if you
  756. know what you're doing
  757. """
  758. s = _values_from_object(series)
  759. try:
  760. return _maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key)
  761. except (KeyError, IndexError):
  762. try:
  763. asdt, parsed, reso = parse_time_string(key, self.freq)
  764. grp = _freq_mod._infer_period_group(reso)
  765. freqn = _freq_mod._period_group(self.freq)
  766. vals = self.values
  767. # if our data is higher resolution than requested key, slice
  768. if grp < freqn:
  769. iv = Period(asdt, freq=(grp, 1))
  770. ord1 = iv.asfreq(self.freq, how='S').ordinal
  771. ord2 = iv.asfreq(self.freq, how='E').ordinal
  772. if ord2 < vals[0] or ord1 > vals[-1]:
  773. raise KeyError(key)
  774. pos = np.searchsorted(self.values, [ord1, ord2])
  775. key = slice(pos[0], pos[1] + 1)
  776. return series[key]
  777. elif grp == freqn:
  778. key = Period(asdt, freq=self.freq).ordinal
  779. return _maybe_box(self, self._engine.get_value(s, key), series, key)
  780. else:
  781. raise KeyError(key)
  782. except TypeError:
  783. pass
  784. key = Period(key, self.freq).ordinal
  785. return _maybe_box(self, self._engine.get_value(s, key), series, key)
  786. def get_loc(self, key):
  787. """
  788. Get integer location for requested label
  789. Returns
  790. -------
  791. loc : int
  792. """
  793. try:
  794. return self._engine.get_loc(key)
  795. except KeyError:
  796. try:
  797. asdt, parsed, reso = parse_time_string(key, self.freq)
  798. key = asdt
  799. except TypeError:
  800. pass
  801. key = Period(key, self.freq)
  802. try:
  803. return self._engine.get_loc(key.ordinal)
  804. except KeyError:
  805. raise KeyError(key)
  806. def slice_locs(self, start=None, end=None):
  807. """
  808. Index.slice_locs, customized to handle partial ISO-8601 string slicing
  809. """
  810. if isinstance(start, compat.string_types) or isinstance(end, compat.string_types):
  811. try:
  812. if start:
  813. start_loc = self._get_string_slice(start).start
  814. else:
  815. start_loc = 0
  816. if end:
  817. end_loc = self._get_string_slice(end).stop
  818. else:
  819. end_loc = len(self)
  820. return start_loc, end_loc
  821. except KeyError:
  822. pass
  823. if isinstance(start, datetime) and isinstance(end, datetime):
  824. ordinals = self.values
  825. t1 = Period(start, freq=self.freq)
  826. t2 = Period(end, freq=self.freq)
  827. left = ordinals.searchsorted(t1.ordinal, side='left')
  828. right = ordinals.searchsorted(t2.ordinal, side='right')
  829. return left, right
  830. return Int64Index.slice_locs(self, start, end)
  831. def _get_string_slice(self, key):
  832. if not self.is_monotonic:
  833. raise ValueError('Partial indexing only valid for '
  834. 'ordered time series')
  835. key, parsed, reso = parse_time_string(key, self.freq)
  836. grp = _freq_mod._infer_period_group(reso)
  837. freqn = _freq_mod._period_group(self.freq)
  838. if reso == 'year':
  839. t1 = Period(year=parsed.year, freq='A')
  840. elif reso == 'month':
  841. t1 = Period(year=parsed.year, month=parsed.month, freq='M')
  842. elif reso == 'quarter':
  843. q = (parsed.month - 1) // 3 + 1
  844. t1 = Period(year=parsed.year, quarter=q, freq='Q-DEC')
  845. elif reso == 'day' and grp < freqn:
  846. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  847. freq='D')
  848. elif reso == 'hour' and grp < freqn:
  849. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  850. hour=parsed.hour, freq='H')
  851. elif reso == 'minute' and grp < freqn:
  852. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  853. hour=parsed.hour, minute=parsed.minute, freq='T')
  854. elif reso == 'second' and grp < freqn:
  855. t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
  856. hour=parsed.hour, minute=parsed.minute, second=parsed.second,
  857. freq='S')
  858. else:
  859. raise KeyError(key)
  860. ordinals = self.values
  861. t2 = t1.asfreq(self.freq, how='end')
  862. t1 = t1.asfreq(self.freq, how='start')
  863. left = ordinals.searchsorted(t1.ordinal, side='left')
  864. right = ordinals.searchsorted(t2.ordinal, side='right')
  865. return slice(left, right)
  866. def join(self, other, how='left', level=None, return_indexers=False):
  867. """
  868. See Index.join
  869. """
  870. self._assert_can_do_setop(other)
  871. result = Int64Index.join(self, other, how=how, level=level,
  872. return_indexers=return_indexers)
  873. if return_indexers:
  874. result, lidx, ridx = result
  875. return self._apply_meta(result), lidx, ridx
  876. return self._apply_meta(result)
  877. def _assert_can_do_setop(self, other):
  878. if not isinstance(other, PeriodIndex):
  879. raise ValueError('can only call with other PeriodIndex-ed objects')
  880. if self.freq != other.freq:
  881. raise ValueError('Only like-indexed PeriodIndexes compatible '
  882. 'for join (for now)')
  883. def _wrap_union_result(self, other, result):
  884. name = self.name if self.name == other.name else None
  885. result = self._apply_meta(result)
  886. result.name = name
  887. return result
  888. def _apply_meta(self, rawarr):
  889. if not isinstance(rawarr, PeriodIndex):
  890. rawarr = rawarr.view(PeriodIndex)
  891. rawarr.freq = self.freq
  892. return rawarr
  893. def __getitem__(self, key):
  894. """Override numpy.ndarray's __getitem__ method to work as desired"""
  895. arr_idx = self.view(np.ndarray)
  896. if np.isscalar(key):
  897. val = arr_idx[key]
  898. return Period(ordinal=val, freq=self.freq)
  899. else:
  900. if com._is_bool_indexer(key):
  901. key = np.asarray(key)
  902. result = arr_idx[key]
  903. if result.ndim > 1:
  904. # MPL kludge
  905. # values = np.asarray(list(values), dtype=object)
  906. # return values.reshape(result.shape)
  907. return PeriodIndex(result, name=self.name, freq=self.freq)
  908. return PeriodIndex(result, name=self.name, freq=self.freq)
  909. def _format_with_header(self, header, **kwargs):
  910. return header + self._format_native_types(**kwargs)
  911. def _format_native_types(self, na_rep=u('NaT'), **kwargs):
  912. values = np.array(list(self), dtype=object)
  913. mask = isnull(self.values)
  914. values[mask] = na_rep
  915. imask = ~mask
  916. values[imask] = np.array([u('%s') % dt for dt in values[imask]])
  917. return values.tolist()
  918. def __array_finalize__(self, obj):
  919. if not self.ndim: # pragma: no cover
  920. return self.item()
  921. self.freq = getattr(obj, 'freq', None)
  922. self.name = getattr(obj, 'name', None)
  923. self._reset_identity()
  924. def _format_footer(self):
  925. tagline = 'Length: %d, Freq: %s'
  926. return tagline % (len(self), self.freqstr)
  927. def take(self, indices, axis=None):
  928. """
  929. Analogous to ndarray.take
  930. """
  931. indices = com._ensure_platform_int(indices)
  932. taken = self.values.take(indices, axis=axis)
  933. return self._simple_new(taken, self.name, freq=self.freq)
  934. def append(self, other):
  935. """
  936. Append a collection of Index options together
  937. Parameters
  938. ----------
  939. other : Index or list/tuple of indices
  940. Returns
  941. -------
  942. appended : Index
  943. """
  944. name = self.name
  945. to_concat = [self]
  946. if isinstance(other, (list, tuple)):
  947. to_concat = to_concat + list(other)
  948. else:
  949. to_concat.append(other)
  950. for obj in to_concat:
  951. if isinstance(obj, Index) and obj.name != name:
  952. name = None
  953. break
  954. to_concat = self._ensure_compat_concat(to_concat)
  955. if isinstance(to_concat[0], PeriodIndex):
  956. if len(set([x.freq for x in to_concat])) > 1:
  957. # box
  958. to_concat = [x.asobject for x in to_concat]
  959. else:
  960. cat_values = np.concatenate([x.values for x in to_concat])
  961. return PeriodIndex(cat_values, freq=self.freq, name=name)
  962. to_concat = [x.values if isinstance(x, Index) else x
  963. for x in to_concat]
  964. return Index(com._concat_compat(to_concat), name=name)
  965. def __reduce__(self):
  966. """Necessary for making this object picklable"""
  967. object_state = list(np.ndarray.__reduce__(self))
  968. subclass_state = (self.name, self.freq)
  969. object_state[2] = (object_state[2], subclass_state)
  970. return tuple(object_state)
  971. def __setstate__(self, state):
  972. """Necessary for making this object picklable"""
  973. if len(state) == 2:
  974. nd_state, own_state = state
  975. np.ndarray.__setstate__(self, nd_state)
  976. self.name = own_state[0]
  977. try: # backcompat
  978. self.freq = own_state[1]
  979. except:
  980. pass
  981. else: # pragma: no cover
  982. np.ndarray.__setstate__(self, state)
  983. def _get_ordinal_range(start, end, periods, freq):
  984. if com._count_not_none(start, end, periods) < 2:
  985. raise ValueError('Must specify 2 of start, end, periods')
  986. if start is not None:
  987. start = Period(start, freq)
  988. if end is not None:
  989. end = Period(end, freq)
  990. is_start_per = isinstance(start, Period)
  991. is_end_per = isinstance(end, Period)
  992. if is_start_per and is_end_per and start.freq != end.freq:
  993. raise ValueError('Start and end must have same freq')
  994. if ((is_start_per and start.ordinal == tslib.iNaT) or
  995. (is_end_per and end.ordinal == tslib.iNaT)):
  996. raise ValueError('Start and end must not be NaT')
  997. if freq is None:
  998. if is_start_per:
  999. freq = start.freq
  1000. elif is_end_per:
  1001. freq = end.freq
  1002. else: # pragma: no cover
  1003. raise ValueError('Could not infer freq from start/end')
  1004. if periods is not None:
  1005. if start is None:
  1006. data = np.arange(end.ordinal - periods + 1,
  1007. end.ordinal + 1,
  1008. dtype=np.int64)
  1009. else:
  1010. data = np.arange(start.ordinal, start.ordinal + periods,
  1011. dtype=np.int64)
  1012. else:
  1013. data = np.arange(start.ordinal, end.ordinal + 1, dtype=np.int64)
  1014. return data, freq
  1015. def _range_from_fields(year=None, month=None, quarter=None, day=None,
  1016. hour=None, minute=None, second=None, freq=None):
  1017. if hour is None:
  1018. hour = 0
  1019. if minute is None:
  1020. minute = 0
  1021. if second is None:
  1022. second = 0
  1023. if day is None:
  1024. day = 1
  1025. ordinals = []
  1026. if quarter is not None:
  1027. if freq is None:
  1028. freq = 'Q'
  1029. base = FreqGroup.FR_QTR
  1030. else:
  1031. base, mult = _gfc(freq)
  1032. if mult != 1:
  1033. raise ValueError('Only mult == 1 supported')
  1034. if base != FreqGroup.FR_QTR:
  1035. raise AssertionError("base must equal FR_QTR")
  1036. year, quarter = _make_field_arrays(year, quarter)
  1037. for y, q in zip(year, quarter):
  1038. y, m = _quarter_to_myear(y, q, freq)
  1039. val = tslib.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
  1040. ordinals.append(val)
  1041. else:
  1042. base, mult = _gfc(freq)
  1043. if mult != 1:
  1044. raise ValueError('Only mult == 1 supported')
  1045. arrays = _make_field_arrays(year, month, day, hour, minute, second)
  1046. for y, mth, d, h, mn, s in zip(*arrays):
  1047. ordinals.append(tslib.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
  1048. return np.array(ordinals, dtype=np.int64), freq
  1049. def _make_field_arrays(*fields):
  1050. length = None
  1051. for x in fields:
  1052. if isinstance(x, (list, np.ndarray, ABCSeries)):
  1053. if length is not None and len(x) != length:
  1054. raise ValueError('Mismatched Period array lengths')
  1055. elif length is None:
  1056. length = len(x)
  1057. arrays = [np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries))
  1058. else np.repeat(x, length) for x in fields]
  1059. return arrays
  1060. def _ordinal_from_fields(year, month, quarter, day, hour, minute,
  1061. second, freq):
  1062. base, mult = _gfc(freq)
  1063. if mult != 1:
  1064. raise ValueError('Only mult == 1 supported')
  1065. if quarter is not None:
  1066. year, month = _quarter_to_myear(year, quarter, freq)
  1067. return tslib.period_ordinal(year, month, day, hour, minute, second, 0, 0, base)
  1068. def _quarter_to_myear(year, quarter, freq):
  1069. if quarter is not None:
  1070. if quarter <= 0 or quarter > 4:
  1071. raise ValueError('Quarter must be 1 <= q <= 4')
  1072. mnum = _month_numbers[_freq_mod._get_rule_month(freq)] + 1
  1073. month = (mnum + (quarter - 1) * 3) % 12 + 1
  1074. if month > mnum:
  1075. year -= 1
  1076. return year, month
  1077. def _validate_end_alias(how):
  1078. how_dict = {'S': 'S', 'E': 'E',
  1079. 'START': 'S', 'FINISH': 'E',
  1080. 'BEGIN': 'S', 'END': 'E'}
  1081. how = how_dict.get(str(how).upper())
  1082. if how not in set(['S', 'E']):
  1083. raise ValueError('How must be one of S or E')
  1084. return how
  1085. def pnow(freq=None):
  1086. return Period(datetime.now(), freq=freq)
  1087. def period_range(start=None, end=None, periods=None, freq='D', name=None):
  1088. """
  1089. Return a fixed frequency datetime index, with day (calendar) as the default
  1090. frequency
  1091. Parameters
  1092. ----------
  1093. start :
  1094. end :
  1095. periods : int, default None
  1096. Number of periods in the index
  1097. freq : str/DateOffset, default 'D'
  1098. Frequency alias
  1099. name : str, default None
  1100. Name for the resulting PeriodIndex
  1101. Returns
  1102. -------
  1103. prng : PeriodIndex
  1104. """
  1105. return PeriodIndex(start=start, end=end, periods=periods,
  1106. freq=freq, name=name)