util.py | searchcode

/pandas/tseries/util.py

https://github.com/thouis/pandas · Python · 94 lines · 72 code · 4 blank · 18 comment · 0 complexity · 305da021dc6b3f9231198907053328e5 MD5 · raw file

import numpy as np

import pandas as pd

import pandas.core.common as com
from pandas.core.frame import DataFrame
import pandas.core.nanops as nanops


def pivot_annual(series, freq=None):
    """
    Group a series by years, taking leap years into account.

    The output has as many rows as distinct years in the original series,
    and as many columns as the length of a leap year in the units corresponding
    to the original frequency (366 for daily frequency, 366*24 for hourly...).
    The fist column of the output corresponds to Jan. 1st, 00:00:00,
    while the last column corresponds to Dec, 31st, 23:59:59.
    Entries corresponding to Feb. 29th are masked for non-leap years.

    For example, if the initial series has a daily frequency, the 59th column
    of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
    and the 60th column is masked for non-leap years.
    With a hourly initial frequency, the (59*24)th column of the output always
    correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
    the 24 columns between (59*24) and (61*24) are masked.

    If the original frequency is less than daily, the output is equivalent to
    ``series.convert('A', func=None)``.

    Parameters
    ----------
    series : TimeSeries
    freq : string or None, default None

    Returns
    -------
    annual : DataFrame
    """
    index = series.index
    year = index.year
    years = nanops.unique1d(year)

    if freq is not None:
        freq = freq.upper()
    else:
        freq = series.index.freq

    if freq == 'D':
        width = 366
        offset = index.dayofyear - 1

        # adjust for leap year
        offset[(-isleapyear(year)) & (offset >= 59)] += 1

        columns = range(1, 367)
        # todo: strings like 1/1, 1/25, etc.?
    elif freq in ('M', 'BM'):
        width = 12
        offset = index.month - 1
        columns = range(1, 13)
    elif freq == 'H':
        width = 8784
        grouped = series.groupby(series.index.year)
        defaulted = grouped.apply(lambda x: x.reset_index(drop=True))
        defaulted.index = defaulted.index.droplevel(0)
        offset = np.asarray(defaulted.index)
        offset[-isleapyear(year) & (offset >= 1416)] += 24
        columns = range(1, 8785)
    else:
        raise NotImplementedError(freq)

    flat_index = (year - years.min()) * width + offset
    flat_index = com._ensure_platform_int(flat_index)

    values = np.empty((len(years), width))
    values.fill(np.nan)
    values.put(flat_index, series.values)

    return DataFrame(values, index=years, columns=columns)


def isleapyear(year):
    """
    Returns true if year is a leap year.

    Parameters
    ----------
    year : integer / sequence
        A given (list of) year(s).
    """
    year = np.asarray(year)
    return np.logical_or(year % 400 == 0,
                         np.logical_and(year % 4 == 0, year % 100 > 0))