PageRenderTime 27ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/core/daterange.py

https://github.com/rkabir/pandas
Python | 490 lines | 414 code | 47 blank | 29 comment | 26 complexity | 5b2db1f5af5a391b098b0df5723644f7 MD5 | raw file
  1. # pylint: disable=E1101,E1103
  2. from datetime import datetime
  3. import operator
  4. import numpy as np
  5. from pandas.core.index import Index
  6. import pandas.core.datetools as datetools
  7. __all__ = ['DateRange']
  8. #-------------------------------------------------------------------------------
  9. # DateRange class
  10. def _bin_op(op):
  11. def f(self, other):
  12. return op(self.view(np.ndarray), other)
  13. return f
  14. _CACHE_START = datetime(1950, 1, 1)
  15. _CACHE_END = datetime(2030, 1, 1)
  16. class DateRange(Index):
  17. """
  18. Fixed frequency date range according to input parameters.
  19. Input dates satisfy:
  20. begin <= d <= end, where d lies on the given offset
  21. Parameters
  22. ----------
  23. start : {datetime, None}
  24. left boundary for range
  25. end : {datetime, None}
  26. right boundary for range
  27. periods : int
  28. Number of periods to generate.
  29. offset : DateOffset, default is 1 BusinessDay
  30. Used to determine the dates returned
  31. time_rule : time_rule to use
  32. tzinfo : pytz.timezone
  33. To endow DateRange with time zone information
  34. """
  35. _cache = {}
  36. def __new__(cls, start=None, end=None, periods=None,
  37. offset=datetools.bday, time_rule=None,
  38. tzinfo=None, **kwds):
  39. time_rule = kwds.get('timeRule', time_rule)
  40. if time_rule is not None:
  41. offset = datetools.getOffset(time_rule)
  42. if time_rule is None:
  43. if offset in datetools._offsetNames:
  44. time_rule = datetools._offsetNames[offset]
  45. # Cachable
  46. if not start:
  47. start = kwds.get('begin')
  48. if not periods:
  49. periods = kwds.get('nPeriods')
  50. start = datetools.to_datetime(start)
  51. end = datetools.to_datetime(end)
  52. # inside cache range. Handle UTC case
  53. useCache = (offset.isAnchored() and
  54. isinstance(offset, datetools.CacheableOffset))
  55. start, end, tzinfo = _figure_out_timezone(start, end, tzinfo)
  56. useCache = useCache and _naive_in_cache_range(start, end)
  57. if useCache:
  58. index = cls._cached_range(start, end, periods=periods,
  59. offset=offset, time_rule=time_rule)
  60. if tzinfo is None:
  61. return index
  62. else:
  63. xdr = generate_range(start=start, end=end, periods=periods,
  64. offset=offset, time_rule=time_rule)
  65. index = list(xdr)
  66. if tzinfo is not None:
  67. index = [d.replace(tzinfo=tzinfo) for d in index]
  68. index = np.array(index, dtype=object, copy=False)
  69. index = index.view(cls)
  70. index.offset = offset
  71. index.tzinfo = tzinfo
  72. return index
  73. def __reduce__(self):
  74. """Necessary for making this object picklable"""
  75. a, b, state = Index.__reduce__(self)
  76. aug_state = state, self.offset, self.tzinfo
  77. return a, b, aug_state
  78. def __setstate__(self, aug_state):
  79. """Necessary for making this object picklable"""
  80. index_state = aug_state[:1]
  81. offset = aug_state[1]
  82. # for backwards compatibility
  83. if len(aug_state) > 2:
  84. tzinfo = aug_state[2]
  85. else: # pragma: no cover
  86. tzinfo = None
  87. self.offset = offset
  88. self.tzinfo = tzinfo
  89. Index.__setstate__(self, *index_state)
  90. def equals(self, other):
  91. if self is other:
  92. return True
  93. if not isinstance(other, Index):
  94. return False
  95. return Index.equals(self.view(Index), other)
  96. def is_all_dates(self):
  97. return True
  98. @classmethod
  99. def _cached_range(cls, start=None, end=None, periods=None, offset=None,
  100. time_rule=None):
  101. # HACK: fix this dependency later
  102. if time_rule is not None:
  103. offset = datetools.getOffset(time_rule)
  104. if offset is None:
  105. raise Exception('Must provide a DateOffset!')
  106. if offset not in cls._cache:
  107. xdr = generate_range(_CACHE_START, _CACHE_END, offset=offset)
  108. arr = np.array(list(xdr), dtype=object, copy=False)
  109. cachedRange = arr.view(DateRange)
  110. cachedRange.offset = offset
  111. cachedRange.tzinfo = None
  112. cls._cache[offset] = cachedRange
  113. else:
  114. cachedRange = cls._cache[offset]
  115. if start is None:
  116. if end is None:
  117. raise Exception('Must provide start or end date!')
  118. if periods is None:
  119. raise Exception('Must provide number of periods!')
  120. assert(isinstance(end, datetime))
  121. end = offset.rollback(end)
  122. endLoc = cachedRange.indexMap[end] + 1
  123. startLoc = endLoc - periods
  124. elif end is None:
  125. assert(isinstance(start, datetime))
  126. start = offset.rollforward(start)
  127. startLoc = cachedRange.indexMap[start]
  128. if periods is None:
  129. raise Exception('Must provide number of periods!')
  130. endLoc = startLoc + periods
  131. else:
  132. start = offset.rollforward(start)
  133. end = offset.rollback(end)
  134. startLoc = cachedRange.indexMap[start]
  135. endLoc = cachedRange.indexMap[end] + 1
  136. indexSlice = cachedRange[startLoc:endLoc]
  137. return indexSlice
  138. def __array_finalize__(self, obj):
  139. if self.ndim == 0: # pragma: no cover
  140. return self.item()
  141. self.offset = getattr(obj, 'offset', None)
  142. __lt__ = _bin_op(operator.lt)
  143. __le__ = _bin_op(operator.le)
  144. __gt__ = _bin_op(operator.gt)
  145. __ge__ = _bin_op(operator.ge)
  146. __eq__ = _bin_op(operator.eq)
  147. def __getslice__(self, i, j):
  148. return self.__getitem__(slice(i, j))
  149. def __getitem__(self, key):
  150. """Override numpy.ndarray's __getitem__ method to work as desired"""
  151. result = self.view(np.ndarray)[key]
  152. if isinstance(key, (int, np.integer)):
  153. return result
  154. elif isinstance(key, slice):
  155. new_index = result.view(DateRange)
  156. if key.step is not None:
  157. new_index.offset = key.step * self.offset
  158. else:
  159. new_index.offset = self.offset
  160. new_index.tzinfo = self.tzinfo
  161. return new_index
  162. else:
  163. return Index(result)
  164. def summary(self):
  165. if len(self) > 0:
  166. index_summary = ', %s to %s' % (self[0], self[-1])
  167. else:
  168. index_summary = ''
  169. sum_line = 'DateRange: %s entries%s' % (len(self), index_summary)
  170. sum_line += '\noffset: %s' % self.offset
  171. if self.tzinfo is not None:
  172. sum_line += ', tzinfo: %s' % self.tzinfo
  173. return sum_line
  174. def __repr__(self):
  175. output = str(self.__class__) + '\n'
  176. output += 'offset: %s, tzinfo: %s\n' % (self.offset, self.tzinfo)
  177. if len(self) > 0:
  178. output += '[%s, ..., %s]\n' % (self[0], self[-1])
  179. output += 'length: %d' % len(self)
  180. return output
  181. __str__ = __repr__
  182. def shift(self, n, offset=None):
  183. """
  184. Specialized shift which produces a DateRange
  185. Parameters
  186. ----------
  187. n : int
  188. Periods to shift by
  189. offset : DateOffset or timedelta-like, optional
  190. Returns
  191. -------
  192. shifted : DateRange
  193. """
  194. if offset is not None and offset != self.offset:
  195. return Index.shift(self, n, offset)
  196. if n == 0:
  197. # immutable so OK
  198. return self
  199. start = self[0] + n * self.offset
  200. end = self[-1] + n * self.offset
  201. return DateRange(start, end, offset=self.offset)
  202. def union(self, other):
  203. """
  204. Specialized union for DateRange objects. If combine
  205. overlapping ranges with the same DateOffset, will be much
  206. faster than Index.union
  207. Parameters
  208. ----------
  209. other : DateRange or array-like
  210. Returns
  211. -------
  212. y : Index or DateRange
  213. """
  214. if not isinstance(other, DateRange) or other.offset != self.offset:
  215. return Index.union(self.view(Index), other)
  216. offset = self.offset
  217. # to make our life easier, "sort" the two ranges
  218. if self[0] <= other[0]:
  219. left, right = self, other
  220. else:
  221. left, right = other, self
  222. left_start, left_end = left[0], left[-1]
  223. right_start, right_end = right[0], right[-1]
  224. # Only need to "adjoin", not overlap
  225. if (left_end + offset) >= right_start:
  226. return DateRange(left_start, max(left_end, right_end),
  227. offset=offset)
  228. else:
  229. return Index.union(self, other)
  230. def tz_normalize(self, tz):
  231. """
  232. Convert DateRange from one time zone to another (using pytz)
  233. Returns
  234. -------
  235. normalized : DateRange
  236. """
  237. new_dates = np.array([tz.normalize(x) for x in self])
  238. new_dates = new_dates.view(DateRange)
  239. new_dates.offset = self.offset
  240. new_dates.tzinfo = tz
  241. return new_dates
  242. def tz_localize(self, tz):
  243. """
  244. Localize tzinfo-naive DateRange to given time zone (using pytz)
  245. Returns
  246. -------
  247. localized : DateRange
  248. """
  249. new_dates = np.array([tz.localize(x) for x in self])
  250. new_dates = new_dates.view(DateRange)
  251. new_dates.offset = self.offset
  252. new_dates.tzinfo = tz
  253. return new_dates
  254. def tz_validate(self):
  255. """
  256. For a localized time zone, verify that there are no DST ambiguities
  257. Returns
  258. -------
  259. result : boolean
  260. True if there are no DST ambiguities
  261. """
  262. import pytz
  263. tz = self.tzinfo
  264. if tz is None or tz is pytz.utc:
  265. return True
  266. # See if there are any DST resolution problems
  267. for date in self:
  268. try:
  269. tz.utcoffset(date.replace(tzinfo=None))
  270. except pytz.InvalidTimeError:
  271. return False
  272. return True
  273. def generate_range(start=None, end=None, periods=None,
  274. offset=datetools.BDay(), time_rule=None):
  275. """
  276. Generates a sequence of dates corresponding to the specified time
  277. offset. Similar to dateutil.rrule except uses pandas DateOffset
  278. objects to represent time increments
  279. Parameters
  280. ----------
  281. start : datetime (default None)
  282. end : datetime (default None)
  283. periods : int, optional
  284. Note
  285. ----
  286. * This method is faster for generating weekdays than dateutil.rrule
  287. * At least two of (start, end, periods) must be specified.
  288. * If both start and end are specified, the returned dates will
  289. satisfy start <= date <= end.
  290. Returns
  291. -------
  292. dates : generator object
  293. See also
  294. --------
  295. DateRange, dateutil.rrule
  296. """
  297. if time_rule is not None:
  298. offset = datetools.getOffset(time_rule)
  299. if time_rule is None:
  300. if offset in datetools._offsetNames:
  301. time_rule = datetools._offsetNames[offset]
  302. start = datetools.to_datetime(start)
  303. end = datetools.to_datetime(end)
  304. if start and not offset.onOffset(start):
  305. start = offset.rollforward(start)
  306. if end and not offset.onOffset(end):
  307. end = offset.rollback(end)
  308. if periods is None and end < start:
  309. end = None
  310. periods = 0
  311. if end is None:
  312. end = start + (periods - 1) * offset
  313. if start is None:
  314. start = end - (periods - 1) * offset
  315. cur = start
  316. if offset._normalizeFirst:
  317. cur = datetools.normalize_date(cur)
  318. while cur <= end:
  319. yield cur
  320. # faster than cur + offset
  321. cur = offset.apply(cur)
  322. # Do I want to cache UTC dates? Can't decide...
  323. # def _utc_in_cache_range(start, end):
  324. # import pytz
  325. # if start is None or end is None:
  326. # return False
  327. # _CACHE_START = datetime(1950, 1, 1, tzinfo=pytz.utc)
  328. # _CACHE_END = datetime(2030, 1, 1, tzinfo=pytz.utc)
  329. # try:
  330. # assert(_isutc(start))
  331. # assert(_isutc(end))
  332. # except AssertionError:
  333. # raise Exception('To use localized time zone, create '
  334. # 'DateRange with pytz.UTC then call '
  335. # 'tz_normalize')
  336. # return _in_range(start, end, _CACHE_START, _CACHE_END)
  337. # def _isutc(dt):
  338. # import pytz
  339. # return dt.tzinfo is pytz.utc
  340. # def _hastz(dt):
  341. # return dt is not None and dt.tzinfo is not None
  342. # def _have_pytz():
  343. # try:
  344. # import pytz
  345. # return True
  346. # except ImportError:
  347. # return False
  348. def _in_range(start, end, rng_start, rng_end):
  349. return start > rng_start and end < rng_end
  350. def _naive_in_cache_range(start, end):
  351. if start is None or end is None:
  352. return False
  353. else:
  354. return _in_range(start, end, _CACHE_START, _CACHE_END)
  355. def _figure_out_timezone(start, end, tzinfo):
  356. inferred_tz = _infer_tzinfo(start, end)
  357. tz = inferred_tz
  358. if inferred_tz is None and tzinfo is not None:
  359. tz = tzinfo
  360. elif tzinfo is not None:
  361. assert(inferred_tz == tzinfo)
  362. # make tz naive for now
  363. start = start if start is None else start.replace(tzinfo=None)
  364. end = end if end is None else end.replace(tzinfo=None)
  365. return start, end, tz
  366. def _infer_tzinfo(start, end):
  367. def _infer(a, b):
  368. tz = a.tzinfo
  369. if b and b.tzinfo:
  370. assert(tz == b.tzinfo)
  371. return tz
  372. tz = None
  373. if start is not None:
  374. tz = _infer(start, end)
  375. elif end is not None:
  376. tz = _infer(end, start)
  377. return tz
  378. if __name__ == '__main__':
  379. import pytz
  380. # just want it to work
  381. tz = pytz.timezone('US/Eastern')
  382. dr = DateRange(datetime(2011, 3, 12, tzinfo=pytz.utc),
  383. periods=50, offset=datetools.Hour())
  384. dr2 = dr.tz_normalize(tz)