PageRenderTime 48ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/tseries/util.py

https://github.com/thouis/pandas
Python | 94 lines | 72 code | 4 blank | 18 comment | 0 complexity | 305da021dc6b3f9231198907053328e5 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import numpy as np
  2. import pandas as pd
  3. import pandas.core.common as com
  4. from pandas.core.frame import DataFrame
  5. import pandas.core.nanops as nanops
  6. def pivot_annual(series, freq=None):
  7. """
  8. Group a series by years, taking leap years into account.
  9. The output has as many rows as distinct years in the original series,
  10. and as many columns as the length of a leap year in the units corresponding
  11. to the original frequency (366 for daily frequency, 366*24 for hourly...).
  12. The fist column of the output corresponds to Jan. 1st, 00:00:00,
  13. while the last column corresponds to Dec, 31st, 23:59:59.
  14. Entries corresponding to Feb. 29th are masked for non-leap years.
  15. For example, if the initial series has a daily frequency, the 59th column
  16. of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
  17. and the 60th column is masked for non-leap years.
  18. With a hourly initial frequency, the (59*24)th column of the output always
  19. correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
  20. the 24 columns between (59*24) and (61*24) are masked.
  21. If the original frequency is less than daily, the output is equivalent to
  22. ``series.convert('A', func=None)``.
  23. Parameters
  24. ----------
  25. series : TimeSeries
  26. freq : string or None, default None
  27. Returns
  28. -------
  29. annual : DataFrame
  30. """
  31. index = series.index
  32. year = index.year
  33. years = nanops.unique1d(year)
  34. if freq is not None:
  35. freq = freq.upper()
  36. else:
  37. freq = series.index.freq
  38. if freq == 'D':
  39. width = 366
  40. offset = index.dayofyear - 1
  41. # adjust for leap year
  42. offset[(-isleapyear(year)) & (offset >= 59)] += 1
  43. columns = range(1, 367)
  44. # todo: strings like 1/1, 1/25, etc.?
  45. elif freq in ('M', 'BM'):
  46. width = 12
  47. offset = index.month - 1
  48. columns = range(1, 13)
  49. elif freq == 'H':
  50. width = 8784
  51. grouped = series.groupby(series.index.year)
  52. defaulted = grouped.apply(lambda x: x.reset_index(drop=True))
  53. defaulted.index = defaulted.index.droplevel(0)
  54. offset = np.asarray(defaulted.index)
  55. offset[-isleapyear(year) & (offset >= 1416)] += 24
  56. columns = range(1, 8785)
  57. else:
  58. raise NotImplementedError(freq)
  59. flat_index = (year - years.min()) * width + offset
  60. flat_index = com._ensure_platform_int(flat_index)
  61. values = np.empty((len(years), width))
  62. values.fill(np.nan)
  63. values.put(flat_index, series.values)
  64. return DataFrame(values, index=years, columns=columns)
  65. def isleapyear(year):
  66. """
  67. Returns true if year is a leap year.
  68. Parameters
  69. ----------
  70. year : integer / sequence
  71. A given (list of) year(s).
  72. """
  73. year = np.asarray(year)
  74. return np.logical_or(year % 400 == 0,
  75. np.logical_and(year % 4 == 0, year % 100 > 0))