PageRenderTime 40ms CodeModel.GetById 22ms app.highlight 15ms RepoModel.GetById 1ms app.codeStats 0ms

/statsmodels/tsa/seasonal.py

https://github.com/chatcannon/statsmodels
Python | 162 lines | 131 code | 18 blank | 13 comment | 13 complexity | fa4c5e50ad569896c90d7efc72047507 MD5 | raw file
  1"""
  2Seasonal Decomposition by Moving Averages
  3"""
  4from statsmodels.compat.python import lmap, range, iteritems
  5import numpy as np
  6from pandas.core.nanops import nanmean as pd_nanmean
  7from .filters._utils import _maybe_get_pandas_wrapper_freq
  8from .filters.filtertools import convolution_filter
  9from statsmodels.tsa.tsatools import freq_to_period
 10
 11
 12def seasonal_mean(x, freq):
 13    """
 14    Return means for each period in x. freq is an int that gives the
 15    number of periods per cycle. E.g., 12 for monthly. NaNs are ignored
 16    in the mean.
 17    """
 18    return np.array([pd_nanmean(x[i::freq]) for i in range(freq)])
 19
 20
 21def seasonal_decompose(x, model="additive", filt=None, freq=None):
 22    """
 23    Parameters
 24    ----------
 25    x : array-like
 26        Time series
 27    model : str {"additive", "multiplicative"}
 28        Type of seasonal component. Abbreviations are accepted.
 29    filt : array-like
 30        The filter coefficients for filtering out the seasonal component.
 31        The default is a symmetric moving average.
 32    freq : int, optional
 33        Frequency of the series. Must be used if x is not a pandas
 34        object with a timeseries index.
 35
 36    Returns
 37    -------
 38    results : obj
 39        A object with seasonal, trend, and resid attributes.
 40
 41    Notes
 42    -----
 43    This is a naive decomposition. More sophisticated methods should
 44    be preferred.
 45
 46    The additive model is Y[t] = T[t] + S[t] + e[t]
 47
 48    The multiplicative model is Y[t] = T[t] * S[t] * e[t]
 49
 50    The seasonal component is first removed by applying a convolution
 51    filter to the data. The average of this smoothed series for each
 52    period is the returned seasonal component.
 53
 54    See Also
 55    --------
 56    statsmodels.tsa.filters.convolution_filter
 57    """
 58    _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x)
 59    x = np.asanyarray(x).squeeze()
 60    nobs = len(x)
 61
 62    if not np.all(np.isfinite(x)):
 63        raise ValueError("This function does not handle missing values")
 64    if model.startswith('m'):
 65        if np.any(x <= 0):
 66            raise ValueError("Multiplicative seasonality is not appropriate "
 67                             "for zero and negative values")
 68
 69    if pfreq is not None:
 70        pfreq = freq_to_period(pfreq)
 71        if freq and pfreq != freq:
 72            raise ValueError("Inferred frequency of index and frequency "
 73                             "don't match. This function does not re-sample")
 74        else:
 75            freq = pfreq
 76
 77    elif freq is None:
 78        raise ValueError("You must specify a freq or x must be a "
 79                         "pandas object with a timeseries index")
 80
 81
 82    if filt is None:
 83        if freq % 2 == 0:  # split weights at ends
 84            filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq
 85        else:
 86            filt = np.repeat(1./freq, freq)
 87
 88    trend = convolution_filter(x, filt)
 89
 90    # nan pad for conformability - convolve doesn't do it
 91    if model.startswith('m'):
 92        detrended = x / trend
 93    else:
 94        detrended = x - trend
 95
 96    period_averages = seasonal_mean(detrended, freq)
 97
 98    if model.startswith('m'):
 99        period_averages /= np.mean(period_averages)
100    else:
101        period_averages -= np.mean(period_averages)
102
103    seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs]
104
105    if model.startswith('m'):
106        resid = x / seasonal / trend
107    else:
108        resid = detrended - seasonal
109
110    results = lmap(_pandas_wrapper, [seasonal, trend, resid, x])
111    return DecomposeResult(seasonal=results[0], trend=results[1],
112                           resid=results[2], observed=results[3])
113
114
115class DecomposeResult(object):
116    def __init__(self, **kwargs):
117        for key, value in iteritems(kwargs):
118            setattr(self, key, value)
119        self.nobs = len(self.observed)
120
121    def plot(self):
122        from statsmodels.graphics.utils import _import_mpl
123        plt = _import_mpl()
124        fig, axes = plt.subplots(4, 1, sharex=True)
125        if hasattr(self.observed, 'plot'):  # got pandas use it
126            self.observed.plot(ax=axes[0], legend=False)
127            axes[0].set_ylabel('Observed')
128            self.trend.plot(ax=axes[1], legend=False)
129            axes[1].set_ylabel('Trend')
130            self.seasonal.plot(ax=axes[2], legend=False)
131            axes[2].set_ylabel('Seasonal')
132            self.resid.plot(ax=axes[3], legend=False)
133            axes[3].set_ylabel('Residual')
134        else:
135            axes[0].plot(self.observed)
136            axes[0].set_ylabel('Observed')
137            axes[1].plot(self.trend)
138            axes[1].set_ylabel('Trend')
139            axes[2].plot(self.seasonal)
140            axes[2].set_ylabel('Seasonal')
141            axes[3].plot(self.resid)
142            axes[3].set_ylabel('Residual')
143            axes[3].set_xlabel('Time')
144            axes[3].set_xlim(0, self.nobs)
145
146        fig.tight_layout()
147        return fig
148
149
150if __name__ == "__main__":
151    x = np.array([-50, 175, 149, 214, 247, 237, 225, 329, 729, 809,
152                  530, 489, 540, 457, 195, 176, 337, 239, 128, 102,
153                  232, 429, 3, 98, 43, -141, -77, -13, 125, 361, -45, 184])
154    results = seasonal_decompose(x, freq=4)
155
156    from pandas import DataFrame, DatetimeIndex
157    data = DataFrame(x, DatetimeIndex(start='1/1/1951',
158                                      periods=len(x),
159                                      freq='Q'))
160
161    res = seasonal_decompose(data)
162