PageRenderTime 49ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/statsmodels/tsa/vector_ar/dynamic.py

http://github.com/statsmodels/statsmodels
Python | 373 lines | 353 code | 10 blank | 10 comment | 9 complexity | 22d56453037dcde51afdf32903690f86 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. # pylint: disable=W0201
  2. import numpy as np
  3. from statsmodels.tools.decorators import cache_readonly
  4. import var_model as _model
  5. import util
  6. import plotting
  7. FULL_SAMPLE = 0
  8. ROLLING = 1
  9. EXPANDING = 2
  10. try:
  11. import pandas as pn
  12. except ImportError:
  13. pass
  14. def _get_window_type(window_type):
  15. if window_type in (FULL_SAMPLE, ROLLING, EXPANDING):
  16. return window_type
  17. elif isinstance(window_type, basestring):
  18. window_type_up = window_type.upper()
  19. if window_type_up in ('FULL SAMPLE', 'FULL_SAMPLE'):
  20. return FULL_SAMPLE
  21. elif window_type_up == 'ROLLING':
  22. return ROLLING
  23. elif window_type_up == 'EXPANDING':
  24. return EXPANDING
  25. raise Exception('Unrecognized window type: %s' % window_type)
  26. def require_pandas():
  27. try:
  28. import pandas as pn
  29. except ImportError:
  30. raise ImportError('pandas is required to use this code (for now)')
  31. class DynamicVAR(object):
  32. """
  33. Estimates time-varying vector autoregression (VAR(p)) using
  34. equation-by-equation least squares
  35. Parameters
  36. ----------
  37. data : pandas.{DataFrame, DataMatrix}
  38. lag_order : int, default 1
  39. window : int
  40. window_type : {'expanding', 'rolling'}
  41. min_periods : int or None
  42. Minimum number of observations to require in window, defaults to window
  43. size if None specified
  44. trend : {'c', 'nc', 'ct', 'ctt'}
  45. TODO
  46. Returns
  47. -------
  48. **Attributes**:
  49. coefs : WidePanel
  50. items : coefficient names
  51. major_axis : dates
  52. minor_axis : VAR equation names
  53. """
  54. def __init__(self, data, lag_order=1, window=None, window_type='expanding',
  55. trend='c', min_periods=None):
  56. require_pandas()
  57. self.lag_order = lag_order
  58. self.names = list(data.columns)
  59. self.neqs = len(self.names)
  60. self._y_orig = data
  61. # TODO: deal with trend
  62. self._x_orig = _make_lag_matrix(data, lag_order)
  63. self._x_orig['intercept'] = 1
  64. (self.y, self.x, self.x_filtered, self._index,
  65. self._time_has_obs) = _filter_data(self._y_orig, self._x_orig)
  66. self.lag_order = lag_order
  67. self.trendorder = util.get_trendorder(trend)
  68. self._set_window(window_type, window, min_periods)
  69. def _set_window(self, window_type, window, min_periods):
  70. self._window_type = _get_window_type(window_type)
  71. if self._is_rolling:
  72. if window is None:
  73. raise Exception('Must pass window when doing rolling '
  74. 'regression')
  75. if min_periods is None:
  76. min_periods = window
  77. else:
  78. window = len(self.x)
  79. if min_periods is None:
  80. min_periods = 1
  81. self._window = int(window)
  82. self._min_periods = min_periods
  83. @cache_readonly
  84. def T(self):
  85. """
  86. Number of time periods in results
  87. """
  88. return len(self.result_index)
  89. @property
  90. def nobs(self):
  91. # Stub, do I need this?
  92. data = dict((eq, r.nobs) for eq, r in self.equations.iteritems())
  93. return pn.DataMatrix(data)
  94. @cache_readonly
  95. def equations(self):
  96. eqs = {}
  97. for col, ts in self.y.iteritems():
  98. model = pn.ols(y=ts, x=self.x, window=self._window,
  99. window_type=self._window_type,
  100. min_periods=self._min_periods)
  101. eqs[col] = model
  102. return eqs
  103. @cache_readonly
  104. def coefs(self):
  105. """
  106. Return dynamic regression coefficients as WidePanel
  107. """
  108. data = {}
  109. for eq, result in self.equations.iteritems():
  110. data[eq] = result.beta
  111. panel = pn.WidePanel.fromDict(data)
  112. # Coefficient names become items
  113. return panel.swapaxes('items', 'minor')
  114. @property
  115. def result_index(self):
  116. return self.coefs.major_axis
  117. @cache_readonly
  118. def _coefs_raw(self):
  119. """
  120. Reshape coefficients to be more amenable to dynamic calculations
  121. Returns
  122. -------
  123. coefs : (time_periods x lag_order x neqs x neqs)
  124. """
  125. coef_panel = self.coefs.copy()
  126. del coef_panel['intercept']
  127. coef_values = coef_panel.swapaxes('items', 'major').values
  128. coef_values = coef_values.reshape((len(coef_values),
  129. self.lag_order,
  130. self.neqs, self.neqs))
  131. return coef_values
  132. @cache_readonly
  133. def _intercepts_raw(self):
  134. """
  135. Similar to _coefs_raw, return intercept values in easy-to-use matrix
  136. form
  137. Returns
  138. -------
  139. intercepts : (T x K)
  140. """
  141. return self.coefs['intercept'].values
  142. @cache_readonly
  143. def resid(self):
  144. data = {}
  145. for eq, result in self.equations.iteritems():
  146. data[eq] = result.resid
  147. return pn.DataMatrix(data)
  148. def forecast(self, steps=1):
  149. """
  150. Produce dynamic forecast
  151. Parameters
  152. ----------
  153. steps
  154. Returns
  155. -------
  156. forecasts : pandas.DataMatrix
  157. """
  158. output = np.empty((self.T - steps, self.neqs))
  159. y_values = self.y.values
  160. y_index_map = self.y.index.indexMap
  161. result_index_map = self.result_index.indexMap
  162. coefs = self._coefs_raw
  163. intercepts = self._intercepts_raw
  164. # can only produce this many forecasts
  165. forc_index = self.result_index[steps:]
  166. for i, date in enumerate(forc_index):
  167. # TODO: check that this does the right thing in weird cases...
  168. idx = y_index_map[date] - steps
  169. result_idx = result_index_map[date] - steps
  170. y_slice = y_values[:idx]
  171. forcs = _model.forecast(y_slice, coefs[result_idx],
  172. intercepts[result_idx], steps)
  173. output[i] = forcs[-1]
  174. return pn.DataMatrix(output, index=forc_index, columns=self.names)
  175. def plot_forecast(self, steps=1, figsize=(10, 10)):
  176. """
  177. Plot h-step ahead forecasts against actual realizations of time
  178. series. Note that forecasts are lined up with their respective
  179. realizations.
  180. Parameters
  181. ----------
  182. steps :
  183. """
  184. import matplotlib.pyplot as plt
  185. fig, axes = plt.subplots(figsize=figsize, nrows=self.neqs,
  186. sharex=True)
  187. forc = self.forecast(steps=steps)
  188. dates = forc.index
  189. y_overlay = self.y.reindex(dates)
  190. for i, col in enumerate(forc.columns):
  191. ax = axes[i]
  192. y_ts = y_overlay[col]
  193. forc_ts = forc[col]
  194. y_handle = ax.plot(dates, y_ts.values, 'k.', ms=2)
  195. forc_handle = ax.plot(dates, forc_ts.values, 'k-')
  196. fig.legend((y_handle, forc_handle), ('Y', 'Forecast'))
  197. fig.autofmt_xdate()
  198. fig.suptitle('Dynamic %d-step forecast' % steps)
  199. # pretty things up a bit
  200. plotting.adjust_subplots(bottom=0.15, left=0.10)
  201. plt.draw_if_interactive()
  202. @property
  203. def _is_rolling(self):
  204. return self._window_type == ROLLING
  205. @cache_readonly
  206. def r2(self):
  207. """Returns the r-squared values."""
  208. data = dict((eq, r.r2) for eq, r in self.equations.iteritems())
  209. return pn.DataMatrix(data)
  210. class DynamicPanelVAR(DynamicVAR):
  211. """
  212. Dynamic (time-varying) panel vector autoregression using panel ordinary
  213. least squares
  214. Parameters
  215. ----------
  216. """
  217. def __init__(self, data, lag_order=1, window=None, window_type='expanding',
  218. trend='c', min_periods=None):
  219. self.lag_order = lag_order
  220. self.neqs = len(data.columns)
  221. self._y_orig = data
  222. # TODO: deal with trend
  223. self._x_orig = _make_lag_matrix(data, lag_order)
  224. self._x_orig['intercept'] = 1
  225. (self.y, self.x, self.x_filtered, self._index,
  226. self._time_has_obs) = _filter_data(self._y_orig, self._x_orig)
  227. self.lag_order = lag_order
  228. self.trendorder = util.get_trendorder(trend)
  229. self._set_window(window_type, window, min_periods)
  230. def _filter_data(lhs, rhs):
  231. """
  232. Data filtering routine for dynamic VAR
  233. lhs : DataFrame
  234. original data
  235. rhs : DataFrame
  236. lagged variables
  237. Returns
  238. -------
  239. """
  240. def _has_all_columns(df):
  241. return np.isfinite(df.values).sum(1) == len(df.columns)
  242. rhs_valid = _has_all_columns(rhs)
  243. if not rhs_valid.all():
  244. pre_filtered_rhs = rhs[rhs_valid]
  245. else:
  246. pre_filtered_rhs = rhs
  247. index = lhs.index.union(rhs.index)
  248. if not index.equals(rhs.index) or not index.equals(lhs.index):
  249. rhs = rhs.reindex(index)
  250. lhs = lhs.reindex(index)
  251. rhs_valid = _has_all_columns(rhs)
  252. lhs_valid = _has_all_columns(lhs)
  253. valid = rhs_valid & lhs_valid
  254. if not valid.all():
  255. filt_index = rhs.index[valid]
  256. filtered_rhs = rhs.reindex(filt_index)
  257. filtered_lhs = lhs.reindex(filt_index)
  258. else:
  259. filtered_rhs, filtered_lhs = rhs, lhs
  260. return filtered_lhs, filtered_rhs, pre_filtered_rhs, index, valid
  261. def _make_lag_matrix(x, lags):
  262. data = {}
  263. columns = []
  264. for i in range(1, 1 + lags):
  265. lagstr = 'L%d.'% i
  266. lag = x.shift(i).rename(columns=lambda c: lagstr + c)
  267. data.update(lag._series)
  268. columns.extend(lag.columns)
  269. return pn.DataMatrix(data, columns=columns)
  270. class Equation(object):
  271. """
  272. Stub, estimate one equation
  273. """
  274. def __init__(self, y, x):
  275. pass
  276. if __name__ == '__main__':
  277. import pandas.util.testing as ptest
  278. ptest.N = 500
  279. data = ptest.makeTimeDataFrame().cumsum(0)
  280. var = DynamicVAR(data, lag_order=2, window_type='expanding')
  281. var2 = DynamicVAR(data, lag_order=2, window=10,
  282. window_type='rolling')