PageRenderTime 4ms CodeModel.GetById 9ms app.highlight 19ms RepoModel.GetById 1ms app.codeStats 0ms

/EChanBook2/johansen_test.py

https://github.com/JavierGarciaD/AlgoRepo
Python | 448 lines | 235 code | 49 blank | 164 comment | 16 complexity | 1514ee96d19c1862e87ec31e28c637a7 MD5 | raw file
  1
  2'''

  3function result = johansen(x,p,k)

  4% PURPOSE: perform Johansen cointegration tests

  5% -------------------------------------------------------

  6% USAGE: result = johansen(x,p,k)

  7% where:      x = input matrix of time-series in levels, (nobs x m)

  8%             p = order of time polynomial in the null-hypothesis

  9%                 p = -1, no deterministic part

 10%                 p =  0, for constant term

 11%                 p =  1, for constant plus time-trend

 12%                 p >  1, for higher order polynomial

 13%             k = number of lagged difference terms used when

 14%                 computing the estimator

 15% -------------------------------------------------------

 16% RETURNS: a results structure:

 17%          result.eig  = eigenvalues  (m x 1)

 18%          result.evec = eigenvectors (m x m), where first

 19%                        r columns are normalized coint vectors

 20%          result.lr1  = likelihood ratio trace statistic for r=0 to m-1

 21%                        (m x 1) vector

 22%          result.lr2  = maximum eigenvalue statistic for r=0 to m-1

 23%                        (m x 1) vector

 24%          result.cvt  = critical values for trace statistic

 25%                        (m x 3) vector [90% 95% 99%]

 26%          result.cvm  = critical values for max eigen value statistic

 27%                        (m x 3) vector [90% 95% 99%]

 28%          result.ind  = index of co-integrating variables ordered by

 29%                        size of the eigenvalues from large to small

 30% -------------------------------------------------------

 31% NOTE: c_sja(), c_sjt() provide critical values generated using

 32%       a method of MacKinnon (1994, 1996).

 33%       critical values are available for n<=12 and -1 <= p <= 1,

 34%       zeros are returned for other cases.

 35% -------------------------------------------------------

 36% SEE ALSO: prt_coint, a function that prints results

 37% -------------------------------------------------------

 38% References: Johansen (1988), 'Statistical Analysis of Co-integration

 39% vectors', Journal of Economic Dynamics and Control, 12, pp. 231-254.

 40% MacKinnon, Haug, Michelis (1996) 'Numerical distribution

 41% functions of likelihood ratio tests for cointegration',

 42% Queen's University Institute for Economic Research Discussion paper.

 43% (see also: MacKinnon's JBES 1994 article

 44% -------------------------------------------------------

 45

 46% written by:

 47% James P. LeSage, Dept of Economics

 48% University of Toledo

 49% 2801 W. Bancroft St,

 50% Toledo, OH 43606

 51% jlesage@spatial-econometrics.com

 52

 53% ****************************************************************

 54% NOTE: Adina Enache provided some bug fixes and corrections that

 55%       she notes below in comments. 4/10/2000

 56% ****************************************************************

 57'''
 58
 59import numpy as np
 60from numpy import zeros, ones, flipud, log
 61from numpy.linalg import inv, eig, cholesky as chol
 62from statsmodels.regression.linear_model import OLS
 63
 64
 65tdiff = np.diff
 66
 67class Holder(object):
 68    pass
 69
 70def rows(x):
 71    return x.shape[0]
 72
 73def trimr(x, front, end):
 74    if end > 0:
 75        return x[front:-end]
 76    else:
 77        return x[front:]
 78
 79import statsmodels.tsa.tsatools as tsat
 80mlag = tsat.lagmat
 81
 82def mlag_(x, maxlag):
 83    '''return all lags up to maxlag

 84    '''
 85    return x[:-lag]
 86
 87def lag(x, lag):
 88    return x[:-lag]
 89
 90def detrend(y, order):
 91    if order == -1:
 92        return y
 93    return OLS(y, np.vander(np.linspace(-1, 1, len(y)), order + 1)).fit().resid
 94
 95def resid(y, x):
 96    r = y - np.dot(x, np.dot(np.linalg.pinv(x), y))
 97    return r
 98
 99
100
101
102def coint_johansen(x, p, k, print_on_console=True):
103
104    #    % error checking on inputs

105    #    if (nargin ~= 3)

106    #     error('Wrong # of inputs to johansen')

107    #    end

108    nobs, m = x.shape
109
110    # why this?  f is detrend transformed series, p is detrend data

111    if (p > -1):
112        f = 0
113    else:
114        f = p
115
116    x = detrend(x, p)
117    dx = tdiff(x, 1, axis=0)
118    # dx    = trimr(dx,1,0)

119    z = mlag(dx, k)  # [k-1:]

120#    print z.shape

121    z = trimr(z, k, 0)
122    z = detrend(z, f)
123#    print dx.shape

124    dx = trimr(dx, k, 0)
125
126    dx = detrend(dx, f)
127    # r0t   = dx - z*(z\dx)

128    r0t = resid(dx, z)  # diff on lagged diffs

129    # lx = trimr(lag(x,k),k,0)

130    lx = lag(x, k)
131    lx = trimr(lx, 1, 0)
132    dx = detrend(lx, f)
133#    print 'rkt', dx.shape, z.shape

134    # rkt   = dx - z*(z\dx)

135    rkt = resid(dx, z)  # level on lagged diffs

136    skk = np.dot(rkt.T, rkt) / rows(rkt)
137    sk0 = np.dot(rkt.T, r0t) / rows(rkt)
138    s00 = np.dot(r0t.T, r0t) / rows(r0t)
139    sig = np.dot(sk0, np.dot(inv(s00), (sk0.T)))
140    tmp = inv(skk)
141    # du, au = eig(np.dot(tmp, sig))

142    au, du = eig(np.dot(tmp, sig))  # au is eval, du is evec

143    # orig = np.dot(tmp, sig)

144
145    # % Normalize the eigen vectors such that (du'skk*du) = I

146    temp = inv(chol(np.dot(du.T, np.dot(skk, du))))
147    dt = np.dot(du, temp)
148
149
150    # JP: the next part can be done much  easier

151
152    # %      NOTE: At this point, the eigenvectors are aligned by column. To

153    # %            physically move the column elements using the MATLAB sort,

154    # %            take the transpose to put the eigenvectors across the row

155
156    # dt = transpose(dt)

157
158    # % sort eigenvalues and vectors

159
160    # au, auind = np.sort(diag(au))

161    auind = np.argsort(au)
162    # a = flipud(au)

163    aind = flipud(auind)
164    a = au[aind]
165    # d = dt[aind,:]

166    d = dt[:, aind]
167
168    # %NOTE: The eigenvectors have been sorted by row based on auind and moved to array "d".

169    # %      Put the eigenvectors back in column format after the sort by taking the

170    # %      transpose of "d". Since the eigenvectors have been physically moved, there is

171    # %      no need for aind at all. To preserve existing programming, aind is reset back to

172    # %      1, 2, 3, ....

173
174    # d  =  transpose(d)

175    # test = np.dot(transpose(d), np.dot(skk, d))

176
177    # %EXPLANATION:  The MATLAB sort function sorts from low to high. The flip realigns

178    # %auind to go from the largest to the smallest eigenvalue (now aind). The original procedure

179    # %physically moved the rows of dt (to d) based on the alignment in aind and then used

180    # %aind as a column index to address the eigenvectors from high to low. This is a double

181    # %sort. If you wanted to extract the eigenvector corresponding to the largest eigenvalue by,

182    # %using aind as a reference, you would get the correct eigenvector, but with sorted

183    # %coefficients and, therefore, any follow-on calculation would seem to be in error.

184    # %If alternative programming methods are used to evaluate the eigenvalues, e.g. Frame method

185    # %followed by a root extraction on the characteristic equation, then the roots can be

186    # %quickly sorted. One by one, the corresponding eigenvectors can be generated. The resultant

187    # %array can be operated on using the Cholesky transformation, which enables a unit

188    # %diagonalization of skk. But nowhere along the way are the coefficients within the

189    # %eigenvector array ever changed. The final value of the "beta" array using either method

190    # %should be the same.

191
192
193    # % Compute the trace and max eigenvalue statistics */

194    lr1 = zeros(m)
195    lr2 = zeros(m)
196    cvm = zeros((m, 3))
197    cvt = zeros((m, 3))
198    iota = ones(m)
199    t, junk = rkt.shape
200    for i in range(0, m):
201        tmp = trimr(log(iota - a), i , 0)
202        lr1[i] = -t * np.sum(tmp, 0)  # columnsum ?

203        # tmp = np.log(1-a)

204        # lr1[i] = -t * np.sum(tmp[i:])

205        lr2[i] = -t * log(1 - a[i])
206        cvm[i, :] = c_sja(m - i, p)
207        cvt[i, :] = c_sjt(m - i, p)
208        aind[i] = i
209    # end

210
211    result = Holder()
212    # % set up results structure

213    # estimation results, residuals

214    result.rkt = rkt
215    result.r0t = r0t
216    result.eig = a
217    result.evec = d  # transposed compared to matlab ?

218    result.lr1 = lr1
219    result.lr2 = lr2
220    result.cvt = cvt
221    result.cvm = cvm
222    result.ind = aind
223    result.meth = 'johansen'
224
225    if print_on_console == True:
226        print ('--------------------------------------------------')
227        print ('--> Trace Statistics')
228        print ('variable statistic Crit-90% Crit-95%  Crit-99%')
229        for i in range(len(result.lr1)):
230            print ('r =', i, '\t', round(result.lr1[i], 4), result.cvt[i, 0], result.cvt[i, 1], result.cvt[i, 2])
231        print ('--------------------------------------------------')
232        print ('--> Eigen Statistics')
233        print ('variable statistic Crit-90% Crit-95%  Crit-99%')
234        for i in range(len(result.lr2)):
235            print ('r =', i, '\t', round(result.lr2[i], 4), result.cvm[i, 0], result.cvm[i, 1], result.cvm[i, 2])
236        print ('--------------------------------------------------')
237        print ('eigenvectors:\n', result.evec)
238        print ('--------------------------------------------------')
239        print ('eigenvalues:\n', result.eig)
240        print ('--------------------------------------------------')
241
242
243    return result
244
245def c_sjt(n, p):
246
247# PURPOSE: find critical values for Johansen trace statistic

248# ------------------------------------------------------------

249# USAGE:  jc = c_sjt(n,p)

250# where:    n = dimension of the VAR system

251#               NOTE: routine doesn't work for n > 12

252#           p = order of time polynomial in the null-hypothesis

253#                 p = -1, no deterministic part

254#                 p =  0, for constant term

255#                 p =  1, for constant plus time-trend

256#                 p >  1  returns no critical values

257# ------------------------------------------------------------

258# RETURNS: a (3x1) vector of percentiles for the trace

259#          statistic for [90# 95# 99#]

260# ------------------------------------------------------------

261# NOTES: for n > 12, the function returns a (3x1) vector of zeros.

262#        The values returned by the function were generated using

263#        a method described in MacKinnon (1996), using his FORTRAN

264#        program johdist.f

265# ------------------------------------------------------------

266# SEE ALSO: johansen()

267# ------------------------------------------------------------

268# # References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution

269# functions of likelihood ratio tests for cointegration',

270# Queen's University Institute for Economic Research Discussion paper.

271# -------------------------------------------------------

272
273# written by:

274# James P. LeSage, Dept of Economics

275# University of Toledo

276# 2801 W. Bancroft St,

277# Toledo, OH 43606

278# jlesage@spatial-econometrics.com

279#

280# Ported to Python by Javier Garcia

281# javier.macro.trader@gmail.com

282
283# these are the values from Johansen's 1995 book

284# for comparison to the MacKinnon values

285# jcp0 = [ 2.98   4.14   7.02

286#        10.35  12.21  16.16

287#        21.58  24.08  29.19

288#        36.58  39.71  46.00

289#        55.54  59.24  66.71

290#        78.30  86.36  91.12

291#       104.93 109.93 119.58

292#       135.16 140.74 151.70

293#       169.30 175.47 187.82

294#       207.21 214.07 226.95

295#       248.77 256.23 270.47

296#       293.83 301.95 318.14];

297
298
299
300
301    jcp0 = ((2.9762, 4.1296, 6.9406),
302            (10.4741, 12.3212, 16.3640),
303            (21.7781, 24.2761, 29.5147),
304            (37.0339, 40.1749, 46.5716),
305            (56.2839, 60.0627, 67.6367),
306            (79.5329, 83.9383, 92.7136),
307            (106.7351, 111.7797, 121.7375),
308            (137.9954, 143.6691, 154.7977),
309            (173.2292, 179.5199, 191.8122),
310            (212.4721, 219.4051, 232.8291),
311            (255.6732, 263.2603, 277.9962),
312            (302.9054, 311.1288, 326.9716))
313
314
315    jcp1 = ((2.7055, 3.8415, 6.6349),
316            (13.4294, 15.4943, 19.9349),
317            (27.0669, 29.7961, 35.4628),
318            (44.4929, 47.8545, 54.6815),
319            (65.8202, 69.8189, 77.8202),
320            (91.1090, 95.7542, 104.9637),
321            (120.3673, 125.6185, 135.9825),
322            (153.6341, 159.5290, 171.0905),
323            (190.8714, 197.3772, 210.0366),
324            (232.1030, 239.2468, 253.2526),
325            (277.3740, 285.1402, 300.2821),
326            (326.5354, 334.9795, 351.2150))
327
328    jcp2 = ((2.7055, 3.8415, 6.6349),
329            (16.1619, 18.3985, 23.1485),
330            (32.0645, 35.0116, 41.0815),
331            (51.6492, 55.2459, 62.5202),
332            (75.1027, 79.3422, 87.7748),
333            (102.4674, 107.3429, 116.9829),
334            (133.7852, 139.2780, 150.0778),
335            (169.0618, 175.1584, 187.1891),
336            (208.3582, 215.1268, 228.2226),
337            (251.6293, 259.0267, 273.3838),
338            (298.8836, 306.8988, 322.4264),
339            (350.1125, 358.7190, 375.3203))
340
341
342
343    if (p > 1) or (p < -1):
344        jc = (0, 0, 0)
345    elif (n > 12) or (n < 1):
346        jc = (0, 0, 0)
347    elif p == -1:
348        jc = jcp0[n - 1]
349    elif p == 0:
350        jc = jcp1[n - 1]
351    elif p == 1:
352        jc = jcp2[n - 1]
353
354
355
356    return jc
357
358def c_sja(n, p):
359
360# PURPOSE: find critical values for Johansen maximum eigenvalue statistic

361# ------------------------------------------------------------

362# USAGE:  jc = c_sja(n,p)

363# where:    n = dimension of the VAR system

364#           p = order of time polynomial in the null-hypothesis

365#                 p = -1, no deterministic part

366#                 p =  0, for constant term

367#                 p =  1, for constant plus time-trend

368#                 p >  1  returns no critical values

369# ------------------------------------------------------------

370# RETURNS: a (3x1) vector of percentiles for the maximum eigenvalue

371#          statistic for: [90# 95# 99#]

372# ------------------------------------------------------------

373# NOTES: for n > 12, the function returns a (3x1) vector of zeros.

374#        The values returned by the function were generated using

375#        a method described in MacKinnon (1996), using his FORTRAN

376#        program johdist.f

377# ------------------------------------------------------------

378# SEE ALSO: johansen()

379# ------------------------------------------------------------

380# References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution

381# functions of likelihood ratio tests for cointegration',

382# Queen's University Institute for Economic Research Discussion paper.

383# -------------------------------------------------------

384
385# written by:

386# James P. LeSage, Dept of Economics

387# University of Toledo

388# 2801 W. Bancroft St,

389# Toledo, OH 43606

390# jlesage@spatial-econometrics.com

391# Ported to Python by Javier Garcia

392# javier.macro.trader@gmail.com

393
394
395    jcp0 = ((2.9762, 4.1296, 6.9406),
396            (9.4748, 11.2246, 15.0923),
397            (15.7175, 17.7961, 22.2519),
398            (21.8370, 24.1592, 29.0609),
399            (27.9160, 30.4428, 35.7359),
400            (33.9271, 36.6301, 42.2333),
401            (39.9085, 42.7679, 48.6606),
402            (45.8930, 48.8795, 55.0335),
403            (51.8528, 54.9629, 61.3449),
404            (57.7954, 61.0404, 67.6415),
405            (63.7248, 67.0756, 73.8856),
406            (69.6513, 73.0946, 80.0937))
407
408    jcp1 = ((2.7055, 3.8415, 6.6349),
409            (12.2971, 14.2639, 18.5200),
410            (18.8928, 21.1314, 25.8650),
411            (25.1236, 27.5858, 32.7172),
412            (31.2379, 33.8777, 39.3693),
413            (37.2786, 40.0763, 45.8662),
414            (43.2947, 46.2299, 52.3069),
415            (49.2855, 52.3622, 58.6634),
416            (55.2412, 58.4332, 64.9960),
417            (61.2041, 64.5040, 71.2525),
418            (67.1307, 70.5392, 77.4877),
419            (73.0563, 76.5734, 83.7105))
420
421    jcp2 = ((2.7055, 3.8415, 6.6349),
422            (15.0006, 17.1481, 21.7465),
423            (21.8731, 24.2522, 29.2631),
424            (28.2398, 30.8151, 36.1930),
425            (34.4202, 37.1646, 42.8612),
426            (40.5244, 43.4183, 49.4095),
427            (46.5583, 49.5875, 55.8171),
428            (52.5858, 55.7302, 62.1741),
429            (58.5316, 61.8051, 68.5030),
430            (64.5292, 67.9040, 74.7434),
431            (70.4630, 73.9355, 81.0678),
432            (76.4081, 79.9878, 87.2395))
433
434
435    if (p > 1) or (p < -1):
436        jc = (0, 0, 0)
437    elif (n > 12) or (n < 1):
438        jc = (0, 0, 0)
439    elif p == -1:
440        jc = jcp0[n - 1]
441    elif p == 0:
442        jc = jcp1[n - 1]
443    elif p == 1:
444        jc = jcp2[n - 1]
445
446
447    return jc
448