PageRenderTime 20ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/statsmodels/tsa/seasonal.py

https://github.com/chatcannon/statsmodels
Python | 162 lines | 131 code | 18 blank | 13 comment | 13 complexity | fa4c5e50ad569896c90d7efc72047507 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. """
  2. Seasonal Decomposition by Moving Averages
  3. """
  4. from statsmodels.compat.python import lmap, range, iteritems
  5. import numpy as np
  6. from pandas.core.nanops import nanmean as pd_nanmean
  7. from .filters._utils import _maybe_get_pandas_wrapper_freq
  8. from .filters.filtertools import convolution_filter
  9. from statsmodels.tsa.tsatools import freq_to_period
  10. def seasonal_mean(x, freq):
  11. """
  12. Return means for each period in x. freq is an int that gives the
  13. number of periods per cycle. E.g., 12 for monthly. NaNs are ignored
  14. in the mean.
  15. """
  16. return np.array([pd_nanmean(x[i::freq]) for i in range(freq)])
  17. def seasonal_decompose(x, model="additive", filt=None, freq=None):
  18. """
  19. Parameters
  20. ----------
  21. x : array-like
  22. Time series
  23. model : str {"additive", "multiplicative"}
  24. Type of seasonal component. Abbreviations are accepted.
  25. filt : array-like
  26. The filter coefficients for filtering out the seasonal component.
  27. The default is a symmetric moving average.
  28. freq : int, optional
  29. Frequency of the series. Must be used if x is not a pandas
  30. object with a timeseries index.
  31. Returns
  32. -------
  33. results : obj
  34. A object with seasonal, trend, and resid attributes.
  35. Notes
  36. -----
  37. This is a naive decomposition. More sophisticated methods should
  38. be preferred.
  39. The additive model is Y[t] = T[t] + S[t] + e[t]
  40. The multiplicative model is Y[t] = T[t] * S[t] * e[t]
  41. The seasonal component is first removed by applying a convolution
  42. filter to the data. The average of this smoothed series for each
  43. period is the returned seasonal component.
  44. See Also
  45. --------
  46. statsmodels.tsa.filters.convolution_filter
  47. """
  48. _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x)
  49. x = np.asanyarray(x).squeeze()
  50. nobs = len(x)
  51. if not np.all(np.isfinite(x)):
  52. raise ValueError("This function does not handle missing values")
  53. if model.startswith('m'):
  54. if np.any(x <= 0):
  55. raise ValueError("Multiplicative seasonality is not appropriate "
  56. "for zero and negative values")
  57. if pfreq is not None:
  58. pfreq = freq_to_period(pfreq)
  59. if freq and pfreq != freq:
  60. raise ValueError("Inferred frequency of index and frequency "
  61. "don't match. This function does not re-sample")
  62. else:
  63. freq = pfreq
  64. elif freq is None:
  65. raise ValueError("You must specify a freq or x must be a "
  66. "pandas object with a timeseries index")
  67. if filt is None:
  68. if freq % 2 == 0: # split weights at ends
  69. filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq
  70. else:
  71. filt = np.repeat(1./freq, freq)
  72. trend = convolution_filter(x, filt)
  73. # nan pad for conformability - convolve doesn't do it
  74. if model.startswith('m'):
  75. detrended = x / trend
  76. else:
  77. detrended = x - trend
  78. period_averages = seasonal_mean(detrended, freq)
  79. if model.startswith('m'):
  80. period_averages /= np.mean(period_averages)
  81. else:
  82. period_averages -= np.mean(period_averages)
  83. seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs]
  84. if model.startswith('m'):
  85. resid = x / seasonal / trend
  86. else:
  87. resid = detrended - seasonal
  88. results = lmap(_pandas_wrapper, [seasonal, trend, resid, x])
  89. return DecomposeResult(seasonal=results[0], trend=results[1],
  90. resid=results[2], observed=results[3])
  91. class DecomposeResult(object):
  92. def __init__(self, **kwargs):
  93. for key, value in iteritems(kwargs):
  94. setattr(self, key, value)
  95. self.nobs = len(self.observed)
  96. def plot(self):
  97. from statsmodels.graphics.utils import _import_mpl
  98. plt = _import_mpl()
  99. fig, axes = plt.subplots(4, 1, sharex=True)
  100. if hasattr(self.observed, 'plot'): # got pandas use it
  101. self.observed.plot(ax=axes[0], legend=False)
  102. axes[0].set_ylabel('Observed')
  103. self.trend.plot(ax=axes[1], legend=False)
  104. axes[1].set_ylabel('Trend')
  105. self.seasonal.plot(ax=axes[2], legend=False)
  106. axes[2].set_ylabel('Seasonal')
  107. self.resid.plot(ax=axes[3], legend=False)
  108. axes[3].set_ylabel('Residual')
  109. else:
  110. axes[0].plot(self.observed)
  111. axes[0].set_ylabel('Observed')
  112. axes[1].plot(self.trend)
  113. axes[1].set_ylabel('Trend')
  114. axes[2].plot(self.seasonal)
  115. axes[2].set_ylabel('Seasonal')
  116. axes[3].plot(self.resid)
  117. axes[3].set_ylabel('Residual')
  118. axes[3].set_xlabel('Time')
  119. axes[3].set_xlim(0, self.nobs)
  120. fig.tight_layout()
  121. return fig
  122. if __name__ == "__main__":
  123. x = np.array([-50, 175, 149, 214, 247, 237, 225, 329, 729, 809,
  124. 530, 489, 540, 457, 195, 176, 337, 239, 128, 102,
  125. 232, 429, 3, 98, 43, -141, -77, -13, 125, 361, -45, 184])
  126. results = seasonal_decompose(x, freq=4)
  127. from pandas import DataFrame, DatetimeIndex
  128. data = DataFrame(x, DatetimeIndex(start='1/1/1951',
  129. periods=len(x),
  130. freq='Q'))
  131. res = seasonal_decompose(data)