/pandas/tseries/util.py
Python | 94 lines | 72 code | 4 blank | 18 comment | 0 complexity | 305da021dc6b3f9231198907053328e5 MD5 | raw file
Possible License(s): BSD-3-Clause
- import numpy as np
- import pandas as pd
- import pandas.core.common as com
- from pandas.core.frame import DataFrame
- import pandas.core.nanops as nanops
- def pivot_annual(series, freq=None):
- """
- Group a series by years, taking leap years into account.
- The output has as many rows as distinct years in the original series,
- and as many columns as the length of a leap year in the units corresponding
- to the original frequency (366 for daily frequency, 366*24 for hourly...).
- The fist column of the output corresponds to Jan. 1st, 00:00:00,
- while the last column corresponds to Dec, 31st, 23:59:59.
- Entries corresponding to Feb. 29th are masked for non-leap years.
- For example, if the initial series has a daily frequency, the 59th column
- of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
- and the 60th column is masked for non-leap years.
- With a hourly initial frequency, the (59*24)th column of the output always
- correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
- the 24 columns between (59*24) and (61*24) are masked.
- If the original frequency is less than daily, the output is equivalent to
- ``series.convert('A', func=None)``.
- Parameters
- ----------
- series : TimeSeries
- freq : string or None, default None
- Returns
- -------
- annual : DataFrame
- """
- index = series.index
- year = index.year
- years = nanops.unique1d(year)
- if freq is not None:
- freq = freq.upper()
- else:
- freq = series.index.freq
- if freq == 'D':
- width = 366
- offset = index.dayofyear - 1
- # adjust for leap year
- offset[(-isleapyear(year)) & (offset >= 59)] += 1
- columns = range(1, 367)
- # todo: strings like 1/1, 1/25, etc.?
- elif freq in ('M', 'BM'):
- width = 12
- offset = index.month - 1
- columns = range(1, 13)
- elif freq == 'H':
- width = 8784
- grouped = series.groupby(series.index.year)
- defaulted = grouped.apply(lambda x: x.reset_index(drop=True))
- defaulted.index = defaulted.index.droplevel(0)
- offset = np.asarray(defaulted.index)
- offset[-isleapyear(year) & (offset >= 1416)] += 24
- columns = range(1, 8785)
- else:
- raise NotImplementedError(freq)
- flat_index = (year - years.min()) * width + offset
- flat_index = com._ensure_platform_int(flat_index)
- values = np.empty((len(years), width))
- values.fill(np.nan)
- values.put(flat_index, series.values)
- return DataFrame(values, index=years, columns=columns)
- def isleapyear(year):
- """
- Returns true if year is a leap year.
- Parameters
- ----------
- year : integer / sequence
- A given (list of) year(s).
- """
- year = np.asarray(year)
- return np.logical_or(year % 400 == 0,
- np.logical_and(year % 4 == 0, year % 100 > 0))