PageRenderTime 3ms CodeModel.GetById 33ms app.highlight 16ms RepoModel.GetById 1ms app.codeStats 1ms

/statsmodels/tsa/vector_ar/dynamic.py

http://github.com/statsmodels/statsmodels
Python | 373 lines | 353 code | 10 blank | 10 comment | 4 complexity | 22d56453037dcde51afdf32903690f86 MD5 | raw file
  1# pylint: disable=W0201
  2
  3import numpy as np
  4
  5from statsmodels.tools.decorators import cache_readonly
  6
  7import var_model as _model
  8import util
  9import plotting
 10
 11FULL_SAMPLE = 0
 12ROLLING = 1
 13EXPANDING = 2
 14
 15try:
 16    import pandas as pn
 17except ImportError:
 18    pass
 19
 20def _get_window_type(window_type):
 21    if window_type in (FULL_SAMPLE, ROLLING, EXPANDING):
 22        return window_type
 23    elif isinstance(window_type, basestring):
 24        window_type_up = window_type.upper()
 25
 26        if window_type_up in ('FULL SAMPLE', 'FULL_SAMPLE'):
 27            return FULL_SAMPLE
 28        elif window_type_up == 'ROLLING':
 29            return ROLLING
 30        elif window_type_up == 'EXPANDING':
 31            return EXPANDING
 32
 33    raise Exception('Unrecognized window type: %s' % window_type)
 34
 35def require_pandas():
 36    try:
 37        import pandas as pn
 38    except ImportError:
 39        raise ImportError('pandas is required to use this code (for now)')
 40
 41class DynamicVAR(object):
 42    """
 43    Estimates time-varying vector autoregression (VAR(p)) using
 44    equation-by-equation least squares
 45
 46    Parameters
 47    ----------
 48    data : pandas.{DataFrame, DataMatrix}
 49    lag_order : int, default 1
 50    window : int
 51    window_type : {'expanding', 'rolling'}
 52    min_periods : int or None
 53        Minimum number of observations to require in window, defaults to window
 54        size if None specified
 55    trend : {'c', 'nc', 'ct', 'ctt'}
 56        TODO
 57
 58    Returns
 59    -------
 60    **Attributes**:
 61
 62    coefs : WidePanel
 63        items : coefficient names
 64        major_axis : dates
 65        minor_axis : VAR equation names
 66    """
 67    def __init__(self, data, lag_order=1, window=None, window_type='expanding',
 68                 trend='c', min_periods=None):
 69        require_pandas()
 70
 71        self.lag_order = lag_order
 72
 73        self.names = list(data.columns)
 74        self.neqs = len(self.names)
 75
 76        self._y_orig = data
 77
 78        # TODO: deal with trend
 79        self._x_orig = _make_lag_matrix(data, lag_order)
 80        self._x_orig['intercept'] = 1
 81
 82        (self.y, self.x, self.x_filtered, self._index,
 83         self._time_has_obs) = _filter_data(self._y_orig, self._x_orig)
 84
 85        self.lag_order = lag_order
 86        self.trendorder = util.get_trendorder(trend)
 87
 88        self._set_window(window_type, window, min_periods)
 89
 90    def _set_window(self, window_type, window, min_periods):
 91        self._window_type = _get_window_type(window_type)
 92
 93        if self._is_rolling:
 94            if window is None:
 95                raise Exception('Must pass window when doing rolling '
 96                                'regression')
 97
 98            if min_periods is None:
 99                min_periods = window
100        else:
101            window = len(self.x)
102            if min_periods is None:
103                min_periods = 1
104
105        self._window = int(window)
106        self._min_periods = min_periods
107
108    @cache_readonly
109    def T(self):
110        """
111        Number of time periods in results
112        """
113        return len(self.result_index)
114
115    @property
116    def nobs(self):
117        # Stub, do I need this?
118        data = dict((eq, r.nobs) for eq, r in self.equations.iteritems())
119        return pn.DataMatrix(data)
120
121    @cache_readonly
122    def equations(self):
123        eqs = {}
124        for col, ts in self.y.iteritems():
125            model = pn.ols(y=ts, x=self.x, window=self._window,
126                           window_type=self._window_type,
127                           min_periods=self._min_periods)
128
129            eqs[col] = model
130
131        return eqs
132
133    @cache_readonly
134    def coefs(self):
135        """
136        Return dynamic regression coefficients as WidePanel
137        """
138        data = {}
139        for eq, result in self.equations.iteritems():
140            data[eq] = result.beta
141
142        panel = pn.WidePanel.fromDict(data)
143
144        # Coefficient names become items
145        return panel.swapaxes('items', 'minor')
146
147    @property
148    def result_index(self):
149        return self.coefs.major_axis
150
151    @cache_readonly
152    def _coefs_raw(self):
153        """
154        Reshape coefficients to be more amenable to dynamic calculations
155
156        Returns
157        -------
158        coefs : (time_periods x lag_order x neqs x neqs)
159        """
160        coef_panel = self.coefs.copy()
161        del coef_panel['intercept']
162
163        coef_values = coef_panel.swapaxes('items', 'major').values
164        coef_values = coef_values.reshape((len(coef_values),
165                                           self.lag_order,
166                                           self.neqs, self.neqs))
167
168        return coef_values
169
170    @cache_readonly
171    def _intercepts_raw(self):
172        """
173        Similar to _coefs_raw, return intercept values in easy-to-use matrix
174        form
175
176        Returns
177        -------
178        intercepts : (T x K)
179        """
180        return self.coefs['intercept'].values
181
182    @cache_readonly
183    def resid(self):
184        data = {}
185        for eq, result in self.equations.iteritems():
186            data[eq] = result.resid
187
188        return pn.DataMatrix(data)
189
190    def forecast(self, steps=1):
191        """
192        Produce dynamic forecast
193
194        Parameters
195        ----------
196        steps
197
198        Returns
199        -------
200        forecasts : pandas.DataMatrix
201        """
202        output = np.empty((self.T - steps, self.neqs))
203
204        y_values = self.y.values
205        y_index_map = self.y.index.indexMap
206        result_index_map = self.result_index.indexMap
207
208        coefs = self._coefs_raw
209        intercepts = self._intercepts_raw
210
211        # can only produce this many forecasts
212        forc_index = self.result_index[steps:]
213        for i, date in enumerate(forc_index):
214            # TODO: check that this does the right thing in weird cases...
215            idx = y_index_map[date] - steps
216            result_idx = result_index_map[date] - steps
217
218            y_slice = y_values[:idx]
219
220            forcs = _model.forecast(y_slice, coefs[result_idx],
221                                    intercepts[result_idx], steps)
222
223            output[i] = forcs[-1]
224
225        return pn.DataMatrix(output, index=forc_index, columns=self.names)
226
227    def plot_forecast(self, steps=1, figsize=(10, 10)):
228        """
229        Plot h-step ahead forecasts against actual realizations of time
230        series. Note that forecasts are lined up with their respective
231        realizations.
232
233        Parameters
234        ----------
235        steps :
236        """
237        import matplotlib.pyplot as plt
238
239        fig, axes = plt.subplots(figsize=figsize, nrows=self.neqs,
240                                 sharex=True)
241
242        forc = self.forecast(steps=steps)
243        dates = forc.index
244
245        y_overlay = self.y.reindex(dates)
246
247        for i, col in enumerate(forc.columns):
248            ax = axes[i]
249
250            y_ts = y_overlay[col]
251            forc_ts = forc[col]
252
253            y_handle = ax.plot(dates, y_ts.values, 'k.', ms=2)
254            forc_handle = ax.plot(dates, forc_ts.values, 'k-')
255
256        fig.legend((y_handle, forc_handle), ('Y', 'Forecast'))
257        fig.autofmt_xdate()
258
259        fig.suptitle('Dynamic %d-step forecast' % steps)
260
261        # pretty things up a bit
262        plotting.adjust_subplots(bottom=0.15, left=0.10)
263        plt.draw_if_interactive()
264
265    @property
266    def _is_rolling(self):
267        return self._window_type == ROLLING
268
269    @cache_readonly
270    def r2(self):
271        """Returns the r-squared values."""
272        data = dict((eq, r.r2) for eq, r in self.equations.iteritems())
273        return pn.DataMatrix(data)
274
275class DynamicPanelVAR(DynamicVAR):
276    """
277    Dynamic (time-varying) panel vector autoregression using panel ordinary
278    least squares
279
280    Parameters
281    ----------
282    """
283    def __init__(self, data, lag_order=1, window=None, window_type='expanding',
284                 trend='c', min_periods=None):
285        self.lag_order = lag_order
286        self.neqs = len(data.columns)
287
288        self._y_orig = data
289
290        # TODO: deal with trend
291        self._x_orig = _make_lag_matrix(data, lag_order)
292        self._x_orig['intercept'] = 1
293
294        (self.y, self.x, self.x_filtered, self._index,
295         self._time_has_obs) = _filter_data(self._y_orig, self._x_orig)
296
297        self.lag_order = lag_order
298        self.trendorder = util.get_trendorder(trend)
299
300        self._set_window(window_type, window, min_periods)
301
302
303def _filter_data(lhs, rhs):
304    """
305    Data filtering routine for dynamic VAR
306
307    lhs : DataFrame
308        original data
309    rhs : DataFrame
310        lagged variables
311
312    Returns
313    -------
314
315    """
316    def _has_all_columns(df):
317        return np.isfinite(df.values).sum(1) == len(df.columns)
318
319    rhs_valid = _has_all_columns(rhs)
320    if not rhs_valid.all():
321        pre_filtered_rhs = rhs[rhs_valid]
322    else:
323        pre_filtered_rhs = rhs
324
325    index = lhs.index.union(rhs.index)
326    if not index.equals(rhs.index) or not index.equals(lhs.index):
327        rhs = rhs.reindex(index)
328        lhs = lhs.reindex(index)
329
330        rhs_valid = _has_all_columns(rhs)
331
332    lhs_valid = _has_all_columns(lhs)
333    valid = rhs_valid & lhs_valid
334
335    if not valid.all():
336        filt_index = rhs.index[valid]
337        filtered_rhs = rhs.reindex(filt_index)
338        filtered_lhs = lhs.reindex(filt_index)
339    else:
340        filtered_rhs, filtered_lhs = rhs, lhs
341
342    return filtered_lhs, filtered_rhs, pre_filtered_rhs, index, valid
343
344def _make_lag_matrix(x, lags):
345    data = {}
346    columns = []
347    for i in range(1, 1 + lags):
348        lagstr = 'L%d.'% i
349        lag = x.shift(i).rename(columns=lambda c: lagstr + c)
350        data.update(lag._series)
351        columns.extend(lag.columns)
352
353    return pn.DataMatrix(data, columns=columns)
354
355class Equation(object):
356    """
357    Stub, estimate one equation
358    """
359
360    def __init__(self, y, x):
361        pass
362
363if __name__ == '__main__':
364    import pandas.util.testing as ptest
365
366    ptest.N = 500
367    data = ptest.makeTimeDataFrame().cumsum(0)
368
369    var = DynamicVAR(data, lag_order=2, window_type='expanding')
370    var2 = DynamicVAR(data, lag_order=2, window=10,
371                      window_type='rolling')
372
373