/EChanBook2/johansen_test.py
Python | 448 lines | 235 code | 49 blank | 164 comment | 16 complexity | 1514ee96d19c1862e87ec31e28c637a7 MD5 | raw file
1 2''' 3function result = johansen(x,p,k) 4% PURPOSE: perform Johansen cointegration tests 5% ------------------------------------------------------- 6% USAGE: result = johansen(x,p,k) 7% where: x = input matrix of time-series in levels, (nobs x m) 8% p = order of time polynomial in the null-hypothesis 9% p = -1, no deterministic part 10% p = 0, for constant term 11% p = 1, for constant plus time-trend 12% p > 1, for higher order polynomial 13% k = number of lagged difference terms used when 14% computing the estimator 15% ------------------------------------------------------- 16% RETURNS: a results structure: 17% result.eig = eigenvalues (m x 1) 18% result.evec = eigenvectors (m x m), where first 19% r columns are normalized coint vectors 20% result.lr1 = likelihood ratio trace statistic for r=0 to m-1 21% (m x 1) vector 22% result.lr2 = maximum eigenvalue statistic for r=0 to m-1 23% (m x 1) vector 24% result.cvt = critical values for trace statistic 25% (m x 3) vector [90% 95% 99%] 26% result.cvm = critical values for max eigen value statistic 27% (m x 3) vector [90% 95% 99%] 28% result.ind = index of co-integrating variables ordered by 29% size of the eigenvalues from large to small 30% ------------------------------------------------------- 31% NOTE: c_sja(), c_sjt() provide critical values generated using 32% a method of MacKinnon (1994, 1996). 33% critical values are available for n<=12 and -1 <= p <= 1, 34% zeros are returned for other cases. 35% ------------------------------------------------------- 36% SEE ALSO: prt_coint, a function that prints results 37% ------------------------------------------------------- 38% References: Johansen (1988), 'Statistical Analysis of Co-integration 39% vectors', Journal of Economic Dynamics and Control, 12, pp. 231-254. 40% MacKinnon, Haug, Michelis (1996) 'Numerical distribution 41% functions of likelihood ratio tests for cointegration', 42% Queen's University Institute for Economic Research Discussion paper. 43% (see also: MacKinnon's JBES 1994 article 44% ------------------------------------------------------- 45 46% written by: 47% James P. LeSage, Dept of Economics 48% University of Toledo 49% 2801 W. Bancroft St, 50% Toledo, OH 43606 51% jlesage@spatial-econometrics.com 52 53% **************************************************************** 54% NOTE: Adina Enache provided some bug fixes and corrections that 55% she notes below in comments. 4/10/2000 56% **************************************************************** 57''' 58 59import numpy as np 60from numpy import zeros, ones, flipud, log 61from numpy.linalg import inv, eig, cholesky as chol 62from statsmodels.regression.linear_model import OLS 63 64 65tdiff = np.diff 66 67class Holder(object): 68 pass 69 70def rows(x): 71 return x.shape[0] 72 73def trimr(x, front, end): 74 if end > 0: 75 return x[front:-end] 76 else: 77 return x[front:] 78 79import statsmodels.tsa.tsatools as tsat 80mlag = tsat.lagmat 81 82def mlag_(x, maxlag): 83 '''return all lags up to maxlag 84 ''' 85 return x[:-lag] 86 87def lag(x, lag): 88 return x[:-lag] 89 90def detrend(y, order): 91 if order == -1: 92 return y 93 return OLS(y, np.vander(np.linspace(-1, 1, len(y)), order + 1)).fit().resid 94 95def resid(y, x): 96 r = y - np.dot(x, np.dot(np.linalg.pinv(x), y)) 97 return r 98 99 100 101 102def coint_johansen(x, p, k, print_on_console=True): 103 104 # % error checking on inputs 105 # if (nargin ~= 3) 106 # error('Wrong # of inputs to johansen') 107 # end 108 nobs, m = x.shape 109 110 # why this? f is detrend transformed series, p is detrend data 111 if (p > -1): 112 f = 0 113 else: 114 f = p 115 116 x = detrend(x, p) 117 dx = tdiff(x, 1, axis=0) 118 # dx = trimr(dx,1,0) 119 z = mlag(dx, k) # [k-1:] 120# print z.shape 121 z = trimr(z, k, 0) 122 z = detrend(z, f) 123# print dx.shape 124 dx = trimr(dx, k, 0) 125 126 dx = detrend(dx, f) 127 # r0t = dx - z*(z\dx) 128 r0t = resid(dx, z) # diff on lagged diffs 129 # lx = trimr(lag(x,k),k,0) 130 lx = lag(x, k) 131 lx = trimr(lx, 1, 0) 132 dx = detrend(lx, f) 133# print 'rkt', dx.shape, z.shape 134 # rkt = dx - z*(z\dx) 135 rkt = resid(dx, z) # level on lagged diffs 136 skk = np.dot(rkt.T, rkt) / rows(rkt) 137 sk0 = np.dot(rkt.T, r0t) / rows(rkt) 138 s00 = np.dot(r0t.T, r0t) / rows(r0t) 139 sig = np.dot(sk0, np.dot(inv(s00), (sk0.T))) 140 tmp = inv(skk) 141 # du, au = eig(np.dot(tmp, sig)) 142 au, du = eig(np.dot(tmp, sig)) # au is eval, du is evec 143 # orig = np.dot(tmp, sig) 144 145 # % Normalize the eigen vectors such that (du'skk*du) = I 146 temp = inv(chol(np.dot(du.T, np.dot(skk, du)))) 147 dt = np.dot(du, temp) 148 149 150 # JP: the next part can be done much easier 151 152 # % NOTE: At this point, the eigenvectors are aligned by column. To 153 # % physically move the column elements using the MATLAB sort, 154 # % take the transpose to put the eigenvectors across the row 155 156 # dt = transpose(dt) 157 158 # % sort eigenvalues and vectors 159 160 # au, auind = np.sort(diag(au)) 161 auind = np.argsort(au) 162 # a = flipud(au) 163 aind = flipud(auind) 164 a = au[aind] 165 # d = dt[aind,:] 166 d = dt[:, aind] 167 168 # %NOTE: The eigenvectors have been sorted by row based on auind and moved to array "d". 169 # % Put the eigenvectors back in column format after the sort by taking the 170 # % transpose of "d". Since the eigenvectors have been physically moved, there is 171 # % no need for aind at all. To preserve existing programming, aind is reset back to 172 # % 1, 2, 3, .... 173 174 # d = transpose(d) 175 # test = np.dot(transpose(d), np.dot(skk, d)) 176 177 # %EXPLANATION: The MATLAB sort function sorts from low to high. The flip realigns 178 # %auind to go from the largest to the smallest eigenvalue (now aind). The original procedure 179 # %physically moved the rows of dt (to d) based on the alignment in aind and then used 180 # %aind as a column index to address the eigenvectors from high to low. This is a double 181 # %sort. If you wanted to extract the eigenvector corresponding to the largest eigenvalue by, 182 # %using aind as a reference, you would get the correct eigenvector, but with sorted 183 # %coefficients and, therefore, any follow-on calculation would seem to be in error. 184 # %If alternative programming methods are used to evaluate the eigenvalues, e.g. Frame method 185 # %followed by a root extraction on the characteristic equation, then the roots can be 186 # %quickly sorted. One by one, the corresponding eigenvectors can be generated. The resultant 187 # %array can be operated on using the Cholesky transformation, which enables a unit 188 # %diagonalization of skk. But nowhere along the way are the coefficients within the 189 # %eigenvector array ever changed. The final value of the "beta" array using either method 190 # %should be the same. 191 192 193 # % Compute the trace and max eigenvalue statistics */ 194 lr1 = zeros(m) 195 lr2 = zeros(m) 196 cvm = zeros((m, 3)) 197 cvt = zeros((m, 3)) 198 iota = ones(m) 199 t, junk = rkt.shape 200 for i in range(0, m): 201 tmp = trimr(log(iota - a), i , 0) 202 lr1[i] = -t * np.sum(tmp, 0) # columnsum ? 203 # tmp = np.log(1-a) 204 # lr1[i] = -t * np.sum(tmp[i:]) 205 lr2[i] = -t * log(1 - a[i]) 206 cvm[i, :] = c_sja(m - i, p) 207 cvt[i, :] = c_sjt(m - i, p) 208 aind[i] = i 209 # end 210 211 result = Holder() 212 # % set up results structure 213 # estimation results, residuals 214 result.rkt = rkt 215 result.r0t = r0t 216 result.eig = a 217 result.evec = d # transposed compared to matlab ? 218 result.lr1 = lr1 219 result.lr2 = lr2 220 result.cvt = cvt 221 result.cvm = cvm 222 result.ind = aind 223 result.meth = 'johansen' 224 225 if print_on_console == True: 226 print ('--------------------------------------------------') 227 print ('--> Trace Statistics') 228 print ('variable statistic Crit-90% Crit-95% Crit-99%') 229 for i in range(len(result.lr1)): 230 print ('r =', i, '\t', round(result.lr1[i], 4), result.cvt[i, 0], result.cvt[i, 1], result.cvt[i, 2]) 231 print ('--------------------------------------------------') 232 print ('--> Eigen Statistics') 233 print ('variable statistic Crit-90% Crit-95% Crit-99%') 234 for i in range(len(result.lr2)): 235 print ('r =', i, '\t', round(result.lr2[i], 4), result.cvm[i, 0], result.cvm[i, 1], result.cvm[i, 2]) 236 print ('--------------------------------------------------') 237 print ('eigenvectors:\n', result.evec) 238 print ('--------------------------------------------------') 239 print ('eigenvalues:\n', result.eig) 240 print ('--------------------------------------------------') 241 242 243 return result 244 245def c_sjt(n, p): 246 247# PURPOSE: find critical values for Johansen trace statistic 248# ------------------------------------------------------------ 249# USAGE: jc = c_sjt(n,p) 250# where: n = dimension of the VAR system 251# NOTE: routine doesn't work for n > 12 252# p = order of time polynomial in the null-hypothesis 253# p = -1, no deterministic part 254# p = 0, for constant term 255# p = 1, for constant plus time-trend 256# p > 1 returns no critical values 257# ------------------------------------------------------------ 258# RETURNS: a (3x1) vector of percentiles for the trace 259# statistic for [90# 95# 99#] 260# ------------------------------------------------------------ 261# NOTES: for n > 12, the function returns a (3x1) vector of zeros. 262# The values returned by the function were generated using 263# a method described in MacKinnon (1996), using his FORTRAN 264# program johdist.f 265# ------------------------------------------------------------ 266# SEE ALSO: johansen() 267# ------------------------------------------------------------ 268# # References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution 269# functions of likelihood ratio tests for cointegration', 270# Queen's University Institute for Economic Research Discussion paper. 271# ------------------------------------------------------- 272 273# written by: 274# James P. LeSage, Dept of Economics 275# University of Toledo 276# 2801 W. Bancroft St, 277# Toledo, OH 43606 278# jlesage@spatial-econometrics.com 279# 280# Ported to Python by Javier Garcia 281# javier.macro.trader@gmail.com 282 283# these are the values from Johansen's 1995 book 284# for comparison to the MacKinnon values 285# jcp0 = [ 2.98 4.14 7.02 286# 10.35 12.21 16.16 287# 21.58 24.08 29.19 288# 36.58 39.71 46.00 289# 55.54 59.24 66.71 290# 78.30 86.36 91.12 291# 104.93 109.93 119.58 292# 135.16 140.74 151.70 293# 169.30 175.47 187.82 294# 207.21 214.07 226.95 295# 248.77 256.23 270.47 296# 293.83 301.95 318.14]; 297 298 299 300 301 jcp0 = ((2.9762, 4.1296, 6.9406), 302 (10.4741, 12.3212, 16.3640), 303 (21.7781, 24.2761, 29.5147), 304 (37.0339, 40.1749, 46.5716), 305 (56.2839, 60.0627, 67.6367), 306 (79.5329, 83.9383, 92.7136), 307 (106.7351, 111.7797, 121.7375), 308 (137.9954, 143.6691, 154.7977), 309 (173.2292, 179.5199, 191.8122), 310 (212.4721, 219.4051, 232.8291), 311 (255.6732, 263.2603, 277.9962), 312 (302.9054, 311.1288, 326.9716)) 313 314 315 jcp1 = ((2.7055, 3.8415, 6.6349), 316 (13.4294, 15.4943, 19.9349), 317 (27.0669, 29.7961, 35.4628), 318 (44.4929, 47.8545, 54.6815), 319 (65.8202, 69.8189, 77.8202), 320 (91.1090, 95.7542, 104.9637), 321 (120.3673, 125.6185, 135.9825), 322 (153.6341, 159.5290, 171.0905), 323 (190.8714, 197.3772, 210.0366), 324 (232.1030, 239.2468, 253.2526), 325 (277.3740, 285.1402, 300.2821), 326 (326.5354, 334.9795, 351.2150)) 327 328 jcp2 = ((2.7055, 3.8415, 6.6349), 329 (16.1619, 18.3985, 23.1485), 330 (32.0645, 35.0116, 41.0815), 331 (51.6492, 55.2459, 62.5202), 332 (75.1027, 79.3422, 87.7748), 333 (102.4674, 107.3429, 116.9829), 334 (133.7852, 139.2780, 150.0778), 335 (169.0618, 175.1584, 187.1891), 336 (208.3582, 215.1268, 228.2226), 337 (251.6293, 259.0267, 273.3838), 338 (298.8836, 306.8988, 322.4264), 339 (350.1125, 358.7190, 375.3203)) 340 341 342 343 if (p > 1) or (p < -1): 344 jc = (0, 0, 0) 345 elif (n > 12) or (n < 1): 346 jc = (0, 0, 0) 347 elif p == -1: 348 jc = jcp0[n - 1] 349 elif p == 0: 350 jc = jcp1[n - 1] 351 elif p == 1: 352 jc = jcp2[n - 1] 353 354 355 356 return jc 357 358def c_sja(n, p): 359 360# PURPOSE: find critical values for Johansen maximum eigenvalue statistic 361# ------------------------------------------------------------ 362# USAGE: jc = c_sja(n,p) 363# where: n = dimension of the VAR system 364# p = order of time polynomial in the null-hypothesis 365# p = -1, no deterministic part 366# p = 0, for constant term 367# p = 1, for constant plus time-trend 368# p > 1 returns no critical values 369# ------------------------------------------------------------ 370# RETURNS: a (3x1) vector of percentiles for the maximum eigenvalue 371# statistic for: [90# 95# 99#] 372# ------------------------------------------------------------ 373# NOTES: for n > 12, the function returns a (3x1) vector of zeros. 374# The values returned by the function were generated using 375# a method described in MacKinnon (1996), using his FORTRAN 376# program johdist.f 377# ------------------------------------------------------------ 378# SEE ALSO: johansen() 379# ------------------------------------------------------------ 380# References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution 381# functions of likelihood ratio tests for cointegration', 382# Queen's University Institute for Economic Research Discussion paper. 383# ------------------------------------------------------- 384 385# written by: 386# James P. LeSage, Dept of Economics 387# University of Toledo 388# 2801 W. Bancroft St, 389# Toledo, OH 43606 390# jlesage@spatial-econometrics.com 391# Ported to Python by Javier Garcia 392# javier.macro.trader@gmail.com 393 394 395 jcp0 = ((2.9762, 4.1296, 6.9406), 396 (9.4748, 11.2246, 15.0923), 397 (15.7175, 17.7961, 22.2519), 398 (21.8370, 24.1592, 29.0609), 399 (27.9160, 30.4428, 35.7359), 400 (33.9271, 36.6301, 42.2333), 401 (39.9085, 42.7679, 48.6606), 402 (45.8930, 48.8795, 55.0335), 403 (51.8528, 54.9629, 61.3449), 404 (57.7954, 61.0404, 67.6415), 405 (63.7248, 67.0756, 73.8856), 406 (69.6513, 73.0946, 80.0937)) 407 408 jcp1 = ((2.7055, 3.8415, 6.6349), 409 (12.2971, 14.2639, 18.5200), 410 (18.8928, 21.1314, 25.8650), 411 (25.1236, 27.5858, 32.7172), 412 (31.2379, 33.8777, 39.3693), 413 (37.2786, 40.0763, 45.8662), 414 (43.2947, 46.2299, 52.3069), 415 (49.2855, 52.3622, 58.6634), 416 (55.2412, 58.4332, 64.9960), 417 (61.2041, 64.5040, 71.2525), 418 (67.1307, 70.5392, 77.4877), 419 (73.0563, 76.5734, 83.7105)) 420 421 jcp2 = ((2.7055, 3.8415, 6.6349), 422 (15.0006, 17.1481, 21.7465), 423 (21.8731, 24.2522, 29.2631), 424 (28.2398, 30.8151, 36.1930), 425 (34.4202, 37.1646, 42.8612), 426 (40.5244, 43.4183, 49.4095), 427 (46.5583, 49.5875, 55.8171), 428 (52.5858, 55.7302, 62.1741), 429 (58.5316, 61.8051, 68.5030), 430 (64.5292, 67.9040, 74.7434), 431 (70.4630, 73.9355, 81.0678), 432 (76.4081, 79.9878, 87.2395)) 433 434 435 if (p > 1) or (p < -1): 436 jc = (0, 0, 0) 437 elif (n > 12) or (n < 1): 438 jc = (0, 0, 0) 439 elif p == -1: 440 jc = jcp0[n - 1] 441 elif p == 0: 442 jc = jcp1[n - 1] 443 elif p == 1: 444 jc = jcp2[n - 1] 445 446 447 return jc 448