PageRenderTime 57ms CodeModel.GetById 43ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/statsmodels/tsa/vector_ar/util.py

http://github.com/statsmodels/statsmodels
Python | 330 lines | 312 code | 4 blank | 14 comment | 0 complexity | c34db9546696f1c06c702542446c69e3 MD5 | raw file
  1# -*- coding: utf-8 -*-
  2"""
  3Miscellaneous utility code for VAR estimation
  4"""
  5from statsmodels.compat.python import asbytes
  6from statsmodels.compat.pandas import frequencies
  7import numpy as np
  8import scipy.stats as stats
  9import scipy.linalg.decomp as decomp
 10import pandas as pd
 11
 12import statsmodels.tsa.tsatools as tsa
 13
 14
 15#-------------------------------------------------------------------------------
 16# Auxiliary functions for estimation
 17
 18def get_var_endog(y, lags, trend='c', has_constant='skip'):
 19    """
 20    Make predictor matrix for VAR(p) process
 21
 22    Z := (Z_0, ..., Z_T).T (T x Kp)
 23    Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1)
 24
 25    Ref: L├╝tkepohl p.70 (transposed)
 26
 27    has_constant can be 'raise', 'add', or 'skip'. See add_constant.
 28    """
 29    nobs = len(y)
 30    # Ravel C order, need to put in descending order
 31    Z = np.array([y[t-lags : t][::-1].ravel() for t in range(lags, nobs)])
 32
 33    # Add constant, trend, etc.
 34    if trend != 'nc':
 35        Z = tsa.add_trend(Z, prepend=True, trend=trend,
 36                          has_constant=has_constant)
 37
 38    return Z
 39
 40
 41def get_trendorder(trend='c'):
 42    # Handle constant, etc.
 43    if trend == 'c':
 44        trendorder = 1
 45    elif trend in ('n', 'nc'):
 46        trendorder = 0
 47    elif trend == 'ct':
 48        trendorder = 2
 49    elif trend == 'ctt':
 50        trendorder = 3
 51    return trendorder
 52
 53
 54def make_lag_names(names, lag_order, trendorder=1, exog=None):
 55    """
 56    Produce list of lag-variable names. Constant / trends go at the beginning
 57
 58    Examples
 59    --------
 60    >>> make_lag_names(['foo', 'bar'], 2, 1)
 61    ['const', 'L1.foo', 'L1.bar', 'L2.foo', 'L2.bar']
 62    """
 63    lag_names = []
 64    if isinstance(names, str):
 65        names = [names]
 66
 67    # take care of lagged endogenous names
 68    for i in range(1, lag_order + 1):
 69        for name in names:
 70            if not isinstance(name, str):
 71                name = str(name) # will need consistent unicode handling
 72            lag_names.append('L'+str(i)+'.'+name)
 73
 74    # handle the constant name
 75    if trendorder != 0:
 76        lag_names.insert(0, 'const')
 77    if trendorder > 1:
 78        lag_names.insert(1, 'trend')
 79    if trendorder > 2:
 80        lag_names.insert(2, 'trend**2')
 81    if exog is not None:
 82        if isinstance(exog, pd.Series):
 83            exog = pd.DataFrame(exog)
 84        elif not hasattr(exog, 'ndim'):
 85            exog = np.asarray(exog)
 86        if exog.ndim == 1:
 87            exog = exog[:, None]
 88        for i in range(exog.shape[1]):
 89            if isinstance(exog, pd.DataFrame):
 90                exog_name = str(exog.columns[i])
 91            else:
 92                exog_name = "exog" + str(i)
 93            lag_names.insert(trendorder + i, exog_name)
 94    return lag_names
 95
 96
 97def comp_matrix(coefs):
 98    """
 99    Return compansion matrix for the VAR(1) representation for a VAR(p) process
100    (companion form)
101
102    A = [A_1 A_2 ... A_p-1 A_p
103         I_K 0       0     0
104         0   I_K ... 0     0
105         0 ...       I_K   0]
106    """
107    p, k1, k2 = coefs.shape
108    if k1 != k2:
109        raise ValueError('coefs must be 3-d with shape (p, k, k).')
110
111    kp = k1 * p
112
113    result = np.zeros((kp, kp))
114    result[:k1] = np.concatenate(coefs, axis=1)
115
116    # Set I_K matrices
117    if p > 1:
118        result[np.arange(k1, kp), np.arange(kp-k1)] = 1
119
120    return result
121
122#-------------------------------------------------------------------------------
123# Miscellaneous stuff
124
125
126def parse_lutkepohl_data(path): # pragma: no cover
127    """
128    Parse data files from L├╝tkepohl (2005) book
129
130    Source for data files: www.jmulti.de
131    """
132
133    from collections import deque
134    from datetime import datetime
135    import re
136
137    regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*'))
138    with open(path, 'rb') as f:
139        lines = deque(f)
140
141    to_skip = 0
142    while asbytes('*/') not in lines.popleft():
143        #while '*/' not in lines.popleft():
144        to_skip += 1
145
146    while True:
147        to_skip += 1
148        line = lines.popleft()
149        m = regex.match(line)
150        if m:
151            year, freq, start_point = m.groups()
152            break
153
154    data = (pd.read_csv(path, delimiter=r"\s+", header=to_skip+1)
155            .to_records(index=False))
156
157    n = len(data)
158
159    # generate the corresponding date range (using pandas for now)
160    start_point = int(start_point)
161    year = int(year)
162
163    offsets = {
164        asbytes('Q'): frequencies.BQuarterEnd(),
165        asbytes('M'): frequencies.BMonthEnd(),
166        asbytes('A'): frequencies.BYearEnd()
167    }
168
169    # create an instance
170    offset = offsets[freq]
171
172    inc = offset * (start_point - 1)
173    start_date = offset.rollforward(datetime(year, 1, 1)) + inc
174
175    offset = offsets[freq]
176    date_range = pd.date_range(start=start_date, freq=offset, periods=n)
177
178    return data, date_range
179
180
181def norm_signif_level(alpha=0.05):
182    return stats.norm.ppf(1 - alpha / 2)
183
184
185def acf_to_acorr(acf):
186    diag = np.diag(acf[0])
187    # numpy broadcasting sufficient
188    return acf / np.sqrt(np.outer(diag, diag))
189
190
191def varsim(coefs, intercept, sig_u, steps=100, initvalues=None, seed=None):
192    """
193    Simulate VAR(p) process, given coefficients and assuming Gaussian noise
194
195    Parameters
196    ----------
197    coefs : ndarray
198        Coefficients for the VAR lags of endog.
199    intercept : None or ndarray 1-D (neqs,) or (steps, neqs)
200        This can be either the intercept for each equation or an offset.
201        If None, then the VAR process has a zero intercept.
202        If intercept is 1-D, then the same (endog specific) intercept is added
203        to all observations.
204        If intercept is 2-D, then it is treated as an offset and is added as
205        an observation specific intercept to the autoregression. In this case,
206        the intercept/offset should have same number of rows as steps, and the
207        same number of columns as endogenous variables (neqs).
208    sig_u : ndarray
209        Covariance matrix of the residuals or innovations.
210        If sig_u is None, then an identity matrix is used.
211    steps : {None, int}
212        number of observations to simulate, this includes the initial
213        observations to start the autoregressive process.
214        If offset is not None, then exog of the model are used if they were
215        provided in the model
216    seed : {None, int}
217        If seed is not None, then it will be used with for the random
218        variables generated by numpy.random.
219
220    Returns
221    -------
222    endog_simulated : nd_array
223        Endog of the simulated VAR process
224    """
225    rs = np.random.RandomState(seed=seed)
226    rmvnorm = rs.multivariate_normal
227    p, k, k = coefs.shape
228    if sig_u is None:
229        sig_u = np.eye(k)
230    ugen = rmvnorm(np.zeros(len(sig_u)), sig_u, steps)
231    result = np.zeros((steps, k))
232    if intercept is not None:
233        # intercept can be 2-D like an offset variable
234        if np.ndim(intercept) > 1:
235            if not len(intercept) == len(ugen):
236                raise ValueError('2-D intercept needs to have length `steps`')
237        # add intercept/offset also to intial values
238        result += intercept
239        result[p:] += ugen[p:]
240    else:
241        result[p:] = ugen[p:]
242
243    # add in AR terms
244    for t in range(p, steps):
245        ygen = result[t]
246        for j in range(p):
247            ygen += np.dot(coefs[j], result[t-j-1])
248
249    return result
250
251
252def get_index(lst, name):
253    try:
254        result = lst.index(name)
255    except Exception:
256        if not isinstance(name, int):
257            raise
258        result = name
259    return result
260
261
262#method used repeatedly in Sims-Zha error bands
263def eigval_decomp(sym_array):
264    """
265    Returns
266    -------
267    W: array of eigenvectors
268    eigva: list of eigenvalues
269    k: largest eigenvector
270    """
271    #check if symmetric, do not include shock period
272    eigva, W = decomp.eig(sym_array, left=True, right=False)
273    k = np.argmax(eigva)
274    return W, eigva, k
275
276
277def vech(A):
278    """
279    Simple vech operator
280    Returns
281    -------
282    vechvec: vector of all elements on and below diagonal
283    """
284
285    length=A.shape[1]
286    vechvec=[]
287    for i in range(length):
288        b=i
289        while b < length:
290            vechvec.append(A[b,i])
291            b=b+1
292    vechvec=np.asarray(vechvec)
293    return vechvec
294
295
296def seasonal_dummies(n_seasons, len_endog, first_period=0, centered=False):
297    """
298
299    Parameters
300    ----------
301    n_seasons : int >= 0
302        Number of seasons (e.g. 12 for monthly data and 4 for quarterly data).
303    len_endog : int >= 0
304        Total number of observations.
305    first_period : int, default: 0
306        Season of the first observation. As an example, suppose we have monthly
307        data and the first observation is in March (third month of the year).
308        In this case we pass 2 as first_period. (0 for the first season,
309        1 for the second, ..., n_seasons-1 for the last season).
310        An integer greater than n_seasons-1 are treated in the same way as the
311        integer modulo n_seasons.
312    centered : bool, default: False
313        If True, center (demean) the dummy variables. That is useful in order
314        to get seasonal dummies that are orthogonal to the vector of constant
315        dummy variables (a vector of ones).
316
317    Returns
318    -------
319    seasonal_dummies : ndarray (len_endog x n_seasons-1)
320    """
321    if n_seasons == 0:
322        return np.empty((len_endog, 0))
323    if n_seasons > 0:
324        season_exog = np.zeros((len_endog, n_seasons - 1))
325        for i in range(n_seasons - 1):
326            season_exog[(i-first_period) % n_seasons::n_seasons, i] = 1
327
328        if centered:
329            season_exog -= 1 / n_seasons
330        return season_exog