#### /EChanBook2/johansen_test.py

Python | 448 lines | 235 code | 49 blank | 164 comment | 16 complexity | 1514ee96d19c1862e87ec31e28c637a7 MD5 | raw file
```  1
2'''
3function result = johansen(x,p,k)
4% PURPOSE: perform Johansen cointegration tests
5% -------------------------------------------------------
6% USAGE: result = johansen(x,p,k)
7% where:      x = input matrix of time-series in levels, (nobs x m)
8%             p = order of time polynomial in the null-hypothesis
9%                 p = -1, no deterministic part
10%                 p =  0, for constant term
11%                 p =  1, for constant plus time-trend
12%                 p >  1, for higher order polynomial
13%             k = number of lagged difference terms used when
14%                 computing the estimator
15% -------------------------------------------------------
16% RETURNS: a results structure:
17%          result.eig  = eigenvalues  (m x 1)
18%          result.evec = eigenvectors (m x m), where first
19%                        r columns are normalized coint vectors
20%          result.lr1  = likelihood ratio trace statistic for r=0 to m-1
21%                        (m x 1) vector
22%          result.lr2  = maximum eigenvalue statistic for r=0 to m-1
23%                        (m x 1) vector
24%          result.cvt  = critical values for trace statistic
25%                        (m x 3) vector [90% 95% 99%]
26%          result.cvm  = critical values for max eigen value statistic
27%                        (m x 3) vector [90% 95% 99%]
28%          result.ind  = index of co-integrating variables ordered by
29%                        size of the eigenvalues from large to small
30% -------------------------------------------------------
31% NOTE: c_sja(), c_sjt() provide critical values generated using
32%       a method of MacKinnon (1994, 1996).
33%       critical values are available for n<=12 and -1 <= p <= 1,
34%       zeros are returned for other cases.
35% -------------------------------------------------------
37% -------------------------------------------------------
38% References: Johansen (1988), 'Statistical Analysis of Co-integration
39% vectors', Journal of Economic Dynamics and Control, 12, pp. 231-254.
40% MacKinnon, Haug, Michelis (1996) 'Numerical distribution
41% functions of likelihood ratio tests for cointegration',
42% Queen's University Institute for Economic Research Discussion paper.
44% -------------------------------------------------------
45
46% written by:
47% James P. LeSage, Dept of Economics
48% University of Toledo
49% 2801 W. Bancroft St,
50% Toledo, OH 43606
51% jlesage@spatial-econometrics.com
52
53% ****************************************************************
54% NOTE: Adina Enache provided some bug fixes and corrections that
55%       she notes below in comments. 4/10/2000
56% ****************************************************************
57'''
58
59import numpy as np
60from numpy import zeros, ones, flipud, log
61from numpy.linalg import inv, eig, cholesky as chol
62from statsmodels.regression.linear_model import OLS
63
64
65tdiff = np.diff
66
67class Holder(object):
68    pass
69
70def rows(x):
71    return x.shape[0]
72
73def trimr(x, front, end):
74    if end > 0:
75        return x[front:-end]
76    else:
77        return x[front:]
78
79import statsmodels.tsa.tsatools as tsat
80mlag = tsat.lagmat
81
82def mlag_(x, maxlag):
83    '''return all lags up to maxlag
84    '''
85    return x[:-lag]
86
87def lag(x, lag):
88    return x[:-lag]
89
90def detrend(y, order):
91    if order == -1:
92        return y
93    return OLS(y, np.vander(np.linspace(-1, 1, len(y)), order + 1)).fit().resid
94
95def resid(y, x):
96    r = y - np.dot(x, np.dot(np.linalg.pinv(x), y))
97    return r
98
99
100
101
102def coint_johansen(x, p, k, print_on_console=True):
103
104    #    % error checking on inputs
105    #    if (nargin ~= 3)
106    #     error('Wrong # of inputs to johansen')
107    #    end
108    nobs, m = x.shape
109
110    # why this?  f is detrend transformed series, p is detrend data
111    if (p > -1):
112        f = 0
113    else:
114        f = p
115
116    x = detrend(x, p)
117    dx = tdiff(x, 1, axis=0)
118    # dx    = trimr(dx,1,0)
119    z = mlag(dx, k)  # [k-1:]
120#    print z.shape
121    z = trimr(z, k, 0)
122    z = detrend(z, f)
123#    print dx.shape
124    dx = trimr(dx, k, 0)
125
126    dx = detrend(dx, f)
127    # r0t   = dx - z*(z\dx)
128    r0t = resid(dx, z)  # diff on lagged diffs
129    # lx = trimr(lag(x,k),k,0)
130    lx = lag(x, k)
131    lx = trimr(lx, 1, 0)
132    dx = detrend(lx, f)
133#    print 'rkt', dx.shape, z.shape
134    # rkt   = dx - z*(z\dx)
135    rkt = resid(dx, z)  # level on lagged diffs
136    skk = np.dot(rkt.T, rkt) / rows(rkt)
137    sk0 = np.dot(rkt.T, r0t) / rows(rkt)
138    s00 = np.dot(r0t.T, r0t) / rows(r0t)
139    sig = np.dot(sk0, np.dot(inv(s00), (sk0.T)))
140    tmp = inv(skk)
141    # du, au = eig(np.dot(tmp, sig))
142    au, du = eig(np.dot(tmp, sig))  # au is eval, du is evec
143    # orig = np.dot(tmp, sig)
144
145    # % Normalize the eigen vectors such that (du'skk*du) = I
146    temp = inv(chol(np.dot(du.T, np.dot(skk, du))))
147    dt = np.dot(du, temp)
148
149
150    # JP: the next part can be done much  easier
151
152    # %      NOTE: At this point, the eigenvectors are aligned by column. To
153    # %            physically move the column elements using the MATLAB sort,
154    # %            take the transpose to put the eigenvectors across the row
155
156    # dt = transpose(dt)
157
158    # % sort eigenvalues and vectors
159
160    # au, auind = np.sort(diag(au))
161    auind = np.argsort(au)
162    # a = flipud(au)
163    aind = flipud(auind)
164    a = au[aind]
165    # d = dt[aind,:]
166    d = dt[:, aind]
167
168    # %NOTE: The eigenvectors have been sorted by row based on auind and moved to array "d".
169    # %      Put the eigenvectors back in column format after the sort by taking the
170    # %      transpose of "d". Since the eigenvectors have been physically moved, there is
171    # %      no need for aind at all. To preserve existing programming, aind is reset back to
172    # %      1, 2, 3, ....
173
174    # d  =  transpose(d)
175    # test = np.dot(transpose(d), np.dot(skk, d))
176
177    # %EXPLANATION:  The MATLAB sort function sorts from low to high. The flip realigns
178    # %auind to go from the largest to the smallest eigenvalue (now aind). The original procedure
179    # %physically moved the rows of dt (to d) based on the alignment in aind and then used
180    # %aind as a column index to address the eigenvectors from high to low. This is a double
181    # %sort. If you wanted to extract the eigenvector corresponding to the largest eigenvalue by,
182    # %using aind as a reference, you would get the correct eigenvector, but with sorted
183    # %coefficients and, therefore, any follow-on calculation would seem to be in error.
184    # %If alternative programming methods are used to evaluate the eigenvalues, e.g. Frame method
185    # %followed by a root extraction on the characteristic equation, then the roots can be
186    # %quickly sorted. One by one, the corresponding eigenvectors can be generated. The resultant
187    # %array can be operated on using the Cholesky transformation, which enables a unit
188    # %diagonalization of skk. But nowhere along the way are the coefficients within the
189    # %eigenvector array ever changed. The final value of the "beta" array using either method
190    # %should be the same.
191
192
193    # % Compute the trace and max eigenvalue statistics */
194    lr1 = zeros(m)
195    lr2 = zeros(m)
196    cvm = zeros((m, 3))
197    cvt = zeros((m, 3))
198    iota = ones(m)
199    t, junk = rkt.shape
200    for i in range(0, m):
201        tmp = trimr(log(iota - a), i , 0)
202        lr1[i] = -t * np.sum(tmp, 0)  # columnsum ?
203        # tmp = np.log(1-a)
204        # lr1[i] = -t * np.sum(tmp[i:])
205        lr2[i] = -t * log(1 - a[i])
206        cvm[i, :] = c_sja(m - i, p)
207        cvt[i, :] = c_sjt(m - i, p)
208        aind[i] = i
209    # end
210
211    result = Holder()
212    # % set up results structure
213    # estimation results, residuals
214    result.rkt = rkt
215    result.r0t = r0t
216    result.eig = a
217    result.evec = d  # transposed compared to matlab ?
218    result.lr1 = lr1
219    result.lr2 = lr2
220    result.cvt = cvt
221    result.cvm = cvm
222    result.ind = aind
223    result.meth = 'johansen'
224
225    if print_on_console == True:
226        print ('--------------------------------------------------')
227        print ('--> Trace Statistics')
228        print ('variable statistic Crit-90% Crit-95%  Crit-99%')
229        for i in range(len(result.lr1)):
230            print ('r =', i, '\t', round(result.lr1[i], 4), result.cvt[i, 0], result.cvt[i, 1], result.cvt[i, 2])
231        print ('--------------------------------------------------')
232        print ('--> Eigen Statistics')
233        print ('variable statistic Crit-90% Crit-95%  Crit-99%')
234        for i in range(len(result.lr2)):
235            print ('r =', i, '\t', round(result.lr2[i], 4), result.cvm[i, 0], result.cvm[i, 1], result.cvm[i, 2])
236        print ('--------------------------------------------------')
237        print ('eigenvectors:\n', result.evec)
238        print ('--------------------------------------------------')
239        print ('eigenvalues:\n', result.eig)
240        print ('--------------------------------------------------')
241
242
243    return result
244
245def c_sjt(n, p):
246
247# PURPOSE: find critical values for Johansen trace statistic
248# ------------------------------------------------------------
249# USAGE:  jc = c_sjt(n,p)
250# where:    n = dimension of the VAR system
251#               NOTE: routine doesn't work for n > 12
252#           p = order of time polynomial in the null-hypothesis
253#                 p = -1, no deterministic part
254#                 p =  0, for constant term
255#                 p =  1, for constant plus time-trend
256#                 p >  1  returns no critical values
257# ------------------------------------------------------------
258# RETURNS: a (3x1) vector of percentiles for the trace
259#          statistic for [90# 95# 99#]
260# ------------------------------------------------------------
261# NOTES: for n > 12, the function returns a (3x1) vector of zeros.
262#        The values returned by the function were generated using
263#        a method described in MacKinnon (1996), using his FORTRAN
264#        program johdist.f
265# ------------------------------------------------------------
267# ------------------------------------------------------------
268# # References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution
269# functions of likelihood ratio tests for cointegration',
270# Queen's University Institute for Economic Research Discussion paper.
271# -------------------------------------------------------
272
273# written by:
274# James P. LeSage, Dept of Economics
275# University of Toledo
276# 2801 W. Bancroft St,
277# Toledo, OH 43606
278# jlesage@spatial-econometrics.com
279#
280# Ported to Python by Javier Garcia
282
283# these are the values from Johansen's 1995 book
284# for comparison to the MacKinnon values
285# jcp0 = [ 2.98   4.14   7.02
286#        10.35  12.21  16.16
287#        21.58  24.08  29.19
288#        36.58  39.71  46.00
289#        55.54  59.24  66.71
290#        78.30  86.36  91.12
291#       104.93 109.93 119.58
292#       135.16 140.74 151.70
293#       169.30 175.47 187.82
294#       207.21 214.07 226.95
295#       248.77 256.23 270.47
296#       293.83 301.95 318.14];
297
298
299
300
301    jcp0 = ((2.9762, 4.1296, 6.9406),
302            (10.4741, 12.3212, 16.3640),
303            (21.7781, 24.2761, 29.5147),
304            (37.0339, 40.1749, 46.5716),
305            (56.2839, 60.0627, 67.6367),
306            (79.5329, 83.9383, 92.7136),
307            (106.7351, 111.7797, 121.7375),
308            (137.9954, 143.6691, 154.7977),
309            (173.2292, 179.5199, 191.8122),
310            (212.4721, 219.4051, 232.8291),
311            (255.6732, 263.2603, 277.9962),
312            (302.9054, 311.1288, 326.9716))
313
314
315    jcp1 = ((2.7055, 3.8415, 6.6349),
316            (13.4294, 15.4943, 19.9349),
317            (27.0669, 29.7961, 35.4628),
318            (44.4929, 47.8545, 54.6815),
319            (65.8202, 69.8189, 77.8202),
320            (91.1090, 95.7542, 104.9637),
321            (120.3673, 125.6185, 135.9825),
322            (153.6341, 159.5290, 171.0905),
323            (190.8714, 197.3772, 210.0366),
324            (232.1030, 239.2468, 253.2526),
325            (277.3740, 285.1402, 300.2821),
326            (326.5354, 334.9795, 351.2150))
327
328    jcp2 = ((2.7055, 3.8415, 6.6349),
329            (16.1619, 18.3985, 23.1485),
330            (32.0645, 35.0116, 41.0815),
331            (51.6492, 55.2459, 62.5202),
332            (75.1027, 79.3422, 87.7748),
333            (102.4674, 107.3429, 116.9829),
334            (133.7852, 139.2780, 150.0778),
335            (169.0618, 175.1584, 187.1891),
336            (208.3582, 215.1268, 228.2226),
337            (251.6293, 259.0267, 273.3838),
338            (298.8836, 306.8988, 322.4264),
339            (350.1125, 358.7190, 375.3203))
340
341
342
343    if (p > 1) or (p < -1):
344        jc = (0, 0, 0)
345    elif (n > 12) or (n < 1):
346        jc = (0, 0, 0)
347    elif p == -1:
348        jc = jcp0[n - 1]
349    elif p == 0:
350        jc = jcp1[n - 1]
351    elif p == 1:
352        jc = jcp2[n - 1]
353
354
355
356    return jc
357
358def c_sja(n, p):
359
360# PURPOSE: find critical values for Johansen maximum eigenvalue statistic
361# ------------------------------------------------------------
362# USAGE:  jc = c_sja(n,p)
363# where:    n = dimension of the VAR system
364#           p = order of time polynomial in the null-hypothesis
365#                 p = -1, no deterministic part
366#                 p =  0, for constant term
367#                 p =  1, for constant plus time-trend
368#                 p >  1  returns no critical values
369# ------------------------------------------------------------
370# RETURNS: a (3x1) vector of percentiles for the maximum eigenvalue
371#          statistic for: [90# 95# 99#]
372# ------------------------------------------------------------
373# NOTES: for n > 12, the function returns a (3x1) vector of zeros.
374#        The values returned by the function were generated using
375#        a method described in MacKinnon (1996), using his FORTRAN
376#        program johdist.f
377# ------------------------------------------------------------
379# ------------------------------------------------------------
380# References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution
381# functions of likelihood ratio tests for cointegration',
382# Queen's University Institute for Economic Research Discussion paper.
383# -------------------------------------------------------
384
385# written by:
386# James P. LeSage, Dept of Economics
387# University of Toledo
388# 2801 W. Bancroft St,
389# Toledo, OH 43606
390# jlesage@spatial-econometrics.com
391# Ported to Python by Javier Garcia
393
394
395    jcp0 = ((2.9762, 4.1296, 6.9406),
396            (9.4748, 11.2246, 15.0923),
397            (15.7175, 17.7961, 22.2519),
398            (21.8370, 24.1592, 29.0609),
399            (27.9160, 30.4428, 35.7359),
400            (33.9271, 36.6301, 42.2333),
401            (39.9085, 42.7679, 48.6606),
402            (45.8930, 48.8795, 55.0335),
403            (51.8528, 54.9629, 61.3449),
404            (57.7954, 61.0404, 67.6415),
405            (63.7248, 67.0756, 73.8856),
406            (69.6513, 73.0946, 80.0937))
407
408    jcp1 = ((2.7055, 3.8415, 6.6349),
409            (12.2971, 14.2639, 18.5200),
410            (18.8928, 21.1314, 25.8650),
411            (25.1236, 27.5858, 32.7172),
412            (31.2379, 33.8777, 39.3693),
413            (37.2786, 40.0763, 45.8662),
414            (43.2947, 46.2299, 52.3069),
415            (49.2855, 52.3622, 58.6634),
416            (55.2412, 58.4332, 64.9960),
417            (61.2041, 64.5040, 71.2525),
418            (67.1307, 70.5392, 77.4877),
419            (73.0563, 76.5734, 83.7105))
420
421    jcp2 = ((2.7055, 3.8415, 6.6349),
422            (15.0006, 17.1481, 21.7465),
423            (21.8731, 24.2522, 29.2631),
424            (28.2398, 30.8151, 36.1930),
425            (34.4202, 37.1646, 42.8612),
426            (40.5244, 43.4183, 49.4095),
427            (46.5583, 49.5875, 55.8171),
428            (52.5858, 55.7302, 62.1741),
429            (58.5316, 61.8051, 68.5030),
430            (64.5292, 67.9040, 74.7434),
431            (70.4630, 73.9355, 81.0678),
432            (76.4081, 79.9878, 87.2395))
433
434
435    if (p > 1) or (p < -1):
436        jc = (0, 0, 0)
437    elif (n > 12) or (n < 1):
438        jc = (0, 0, 0)
439    elif p == -1:
440        jc = jcp0[n - 1]
441    elif p == 0:
442        jc = jcp1[n - 1]
443    elif p == 1:
444        jc = jcp2[n - 1]
445
446
447    return jc
448
```