PageRenderTime 29ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/EChanBook2/johansen_test.py

https://github.com/JavierGarciaD/AlgoRepo
Python | 448 lines | 235 code | 49 blank | 164 comment | 22 complexity | 1514ee96d19c1862e87ec31e28c637a7 MD5 | raw file
Possible License(s): Apache-2.0
  1. '''
  2. function result = johansen(x,p,k)
  3. % PURPOSE: perform Johansen cointegration tests
  4. % -------------------------------------------------------
  5. % USAGE: result = johansen(x,p,k)
  6. % where: x = input matrix of time-series in levels, (nobs x m)
  7. % p = order of time polynomial in the null-hypothesis
  8. % p = -1, no deterministic part
  9. % p = 0, for constant term
  10. % p = 1, for constant plus time-trend
  11. % p > 1, for higher order polynomial
  12. % k = number of lagged difference terms used when
  13. % computing the estimator
  14. % -------------------------------------------------------
  15. % RETURNS: a results structure:
  16. % result.eig = eigenvalues (m x 1)
  17. % result.evec = eigenvectors (m x m), where first
  18. % r columns are normalized coint vectors
  19. % result.lr1 = likelihood ratio trace statistic for r=0 to m-1
  20. % (m x 1) vector
  21. % result.lr2 = maximum eigenvalue statistic for r=0 to m-1
  22. % (m x 1) vector
  23. % result.cvt = critical values for trace statistic
  24. % (m x 3) vector [90% 95% 99%]
  25. % result.cvm = critical values for max eigen value statistic
  26. % (m x 3) vector [90% 95% 99%]
  27. % result.ind = index of co-integrating variables ordered by
  28. % size of the eigenvalues from large to small
  29. % -------------------------------------------------------
  30. % NOTE: c_sja(), c_sjt() provide critical values generated using
  31. % a method of MacKinnon (1994, 1996).
  32. % critical values are available for n<=12 and -1 <= p <= 1,
  33. % zeros are returned for other cases.
  34. % -------------------------------------------------------
  35. % SEE ALSO: prt_coint, a function that prints results
  36. % -------------------------------------------------------
  37. % References: Johansen (1988), 'Statistical Analysis of Co-integration
  38. % vectors', Journal of Economic Dynamics and Control, 12, pp. 231-254.
  39. % MacKinnon, Haug, Michelis (1996) 'Numerical distribution
  40. % functions of likelihood ratio tests for cointegration',
  41. % Queen's University Institute for Economic Research Discussion paper.
  42. % (see also: MacKinnon's JBES 1994 article
  43. % -------------------------------------------------------
  44. % written by:
  45. % James P. LeSage, Dept of Economics
  46. % University of Toledo
  47. % 2801 W. Bancroft St,
  48. % Toledo, OH 43606
  49. % jlesage@spatial-econometrics.com
  50. % ****************************************************************
  51. % NOTE: Adina Enache provided some bug fixes and corrections that
  52. % she notes below in comments. 4/10/2000
  53. % ****************************************************************
  54. '''
  55. import numpy as np
  56. from numpy import zeros, ones, flipud, log
  57. from numpy.linalg import inv, eig, cholesky as chol
  58. from statsmodels.regression.linear_model import OLS
  59. tdiff = np.diff
  60. class Holder(object):
  61. pass
  62. def rows(x):
  63. return x.shape[0]
  64. def trimr(x, front, end):
  65. if end > 0:
  66. return x[front:-end]
  67. else:
  68. return x[front:]
  69. import statsmodels.tsa.tsatools as tsat
  70. mlag = tsat.lagmat
  71. def mlag_(x, maxlag):
  72. '''return all lags up to maxlag
  73. '''
  74. return x[:-lag]
  75. def lag(x, lag):
  76. return x[:-lag]
  77. def detrend(y, order):
  78. if order == -1:
  79. return y
  80. return OLS(y, np.vander(np.linspace(-1, 1, len(y)), order + 1)).fit().resid
  81. def resid(y, x):
  82. r = y - np.dot(x, np.dot(np.linalg.pinv(x), y))
  83. return r
  84. def coint_johansen(x, p, k, print_on_console=True):
  85. # % error checking on inputs
  86. # if (nargin ~= 3)
  87. # error('Wrong # of inputs to johansen')
  88. # end
  89. nobs, m = x.shape
  90. # why this? f is detrend transformed series, p is detrend data
  91. if (p > -1):
  92. f = 0
  93. else:
  94. f = p
  95. x = detrend(x, p)
  96. dx = tdiff(x, 1, axis=0)
  97. # dx = trimr(dx,1,0)
  98. z = mlag(dx, k) # [k-1:]
  99. # print z.shape
  100. z = trimr(z, k, 0)
  101. z = detrend(z, f)
  102. # print dx.shape
  103. dx = trimr(dx, k, 0)
  104. dx = detrend(dx, f)
  105. # r0t = dx - z*(z\dx)
  106. r0t = resid(dx, z) # diff on lagged diffs
  107. # lx = trimr(lag(x,k),k,0)
  108. lx = lag(x, k)
  109. lx = trimr(lx, 1, 0)
  110. dx = detrend(lx, f)
  111. # print 'rkt', dx.shape, z.shape
  112. # rkt = dx - z*(z\dx)
  113. rkt = resid(dx, z) # level on lagged diffs
  114. skk = np.dot(rkt.T, rkt) / rows(rkt)
  115. sk0 = np.dot(rkt.T, r0t) / rows(rkt)
  116. s00 = np.dot(r0t.T, r0t) / rows(r0t)
  117. sig = np.dot(sk0, np.dot(inv(s00), (sk0.T)))
  118. tmp = inv(skk)
  119. # du, au = eig(np.dot(tmp, sig))
  120. au, du = eig(np.dot(tmp, sig)) # au is eval, du is evec
  121. # orig = np.dot(tmp, sig)
  122. # % Normalize the eigen vectors such that (du'skk*du) = I
  123. temp = inv(chol(np.dot(du.T, np.dot(skk, du))))
  124. dt = np.dot(du, temp)
  125. # JP: the next part can be done much easier
  126. # % NOTE: At this point, the eigenvectors are aligned by column. To
  127. # % physically move the column elements using the MATLAB sort,
  128. # % take the transpose to put the eigenvectors across the row
  129. # dt = transpose(dt)
  130. # % sort eigenvalues and vectors
  131. # au, auind = np.sort(diag(au))
  132. auind = np.argsort(au)
  133. # a = flipud(au)
  134. aind = flipud(auind)
  135. a = au[aind]
  136. # d = dt[aind,:]
  137. d = dt[:, aind]
  138. # %NOTE: The eigenvectors have been sorted by row based on auind and moved to array "d".
  139. # % Put the eigenvectors back in column format after the sort by taking the
  140. # % transpose of "d". Since the eigenvectors have been physically moved, there is
  141. # % no need for aind at all. To preserve existing programming, aind is reset back to
  142. # % 1, 2, 3, ....
  143. # d = transpose(d)
  144. # test = np.dot(transpose(d), np.dot(skk, d))
  145. # %EXPLANATION: The MATLAB sort function sorts from low to high. The flip realigns
  146. # %auind to go from the largest to the smallest eigenvalue (now aind). The original procedure
  147. # %physically moved the rows of dt (to d) based on the alignment in aind and then used
  148. # %aind as a column index to address the eigenvectors from high to low. This is a double
  149. # %sort. If you wanted to extract the eigenvector corresponding to the largest eigenvalue by,
  150. # %using aind as a reference, you would get the correct eigenvector, but with sorted
  151. # %coefficients and, therefore, any follow-on calculation would seem to be in error.
  152. # %If alternative programming methods are used to evaluate the eigenvalues, e.g. Frame method
  153. # %followed by a root extraction on the characteristic equation, then the roots can be
  154. # %quickly sorted. One by one, the corresponding eigenvectors can be generated. The resultant
  155. # %array can be operated on using the Cholesky transformation, which enables a unit
  156. # %diagonalization of skk. But nowhere along the way are the coefficients within the
  157. # %eigenvector array ever changed. The final value of the "beta" array using either method
  158. # %should be the same.
  159. # % Compute the trace and max eigenvalue statistics */
  160. lr1 = zeros(m)
  161. lr2 = zeros(m)
  162. cvm = zeros((m, 3))
  163. cvt = zeros((m, 3))
  164. iota = ones(m)
  165. t, junk = rkt.shape
  166. for i in range(0, m):
  167. tmp = trimr(log(iota - a), i , 0)
  168. lr1[i] = -t * np.sum(tmp, 0) # columnsum ?
  169. # tmp = np.log(1-a)
  170. # lr1[i] = -t * np.sum(tmp[i:])
  171. lr2[i] = -t * log(1 - a[i])
  172. cvm[i, :] = c_sja(m - i, p)
  173. cvt[i, :] = c_sjt(m - i, p)
  174. aind[i] = i
  175. # end
  176. result = Holder()
  177. # % set up results structure
  178. # estimation results, residuals
  179. result.rkt = rkt
  180. result.r0t = r0t
  181. result.eig = a
  182. result.evec = d # transposed compared to matlab ?
  183. result.lr1 = lr1
  184. result.lr2 = lr2
  185. result.cvt = cvt
  186. result.cvm = cvm
  187. result.ind = aind
  188. result.meth = 'johansen'
  189. if print_on_console == True:
  190. print ('--------------------------------------------------')
  191. print ('--> Trace Statistics')
  192. print ('variable statistic Crit-90% Crit-95% Crit-99%')
  193. for i in range(len(result.lr1)):
  194. print ('r =', i, '\t', round(result.lr1[i], 4), result.cvt[i, 0], result.cvt[i, 1], result.cvt[i, 2])
  195. print ('--------------------------------------------------')
  196. print ('--> Eigen Statistics')
  197. print ('variable statistic Crit-90% Crit-95% Crit-99%')
  198. for i in range(len(result.lr2)):
  199. print ('r =', i, '\t', round(result.lr2[i], 4), result.cvm[i, 0], result.cvm[i, 1], result.cvm[i, 2])
  200. print ('--------------------------------------------------')
  201. print ('eigenvectors:\n', result.evec)
  202. print ('--------------------------------------------------')
  203. print ('eigenvalues:\n', result.eig)
  204. print ('--------------------------------------------------')
  205. return result
  206. def c_sjt(n, p):
  207. # PURPOSE: find critical values for Johansen trace statistic
  208. # ------------------------------------------------------------
  209. # USAGE: jc = c_sjt(n,p)
  210. # where: n = dimension of the VAR system
  211. # NOTE: routine doesn't work for n > 12
  212. # p = order of time polynomial in the null-hypothesis
  213. # p = -1, no deterministic part
  214. # p = 0, for constant term
  215. # p = 1, for constant plus time-trend
  216. # p > 1 returns no critical values
  217. # ------------------------------------------------------------
  218. # RETURNS: a (3x1) vector of percentiles for the trace
  219. # statistic for [90# 95# 99#]
  220. # ------------------------------------------------------------
  221. # NOTES: for n > 12, the function returns a (3x1) vector of zeros.
  222. # The values returned by the function were generated using
  223. # a method described in MacKinnon (1996), using his FORTRAN
  224. # program johdist.f
  225. # ------------------------------------------------------------
  226. # SEE ALSO: johansen()
  227. # ------------------------------------------------------------
  228. # # References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution
  229. # functions of likelihood ratio tests for cointegration',
  230. # Queen's University Institute for Economic Research Discussion paper.
  231. # -------------------------------------------------------
  232. # written by:
  233. # James P. LeSage, Dept of Economics
  234. # University of Toledo
  235. # 2801 W. Bancroft St,
  236. # Toledo, OH 43606
  237. # jlesage@spatial-econometrics.com
  238. #
  239. # Ported to Python by Javier Garcia
  240. # javier.macro.trader@gmail.com
  241. # these are the values from Johansen's 1995 book
  242. # for comparison to the MacKinnon values
  243. # jcp0 = [ 2.98 4.14 7.02
  244. # 10.35 12.21 16.16
  245. # 21.58 24.08 29.19
  246. # 36.58 39.71 46.00
  247. # 55.54 59.24 66.71
  248. # 78.30 86.36 91.12
  249. # 104.93 109.93 119.58
  250. # 135.16 140.74 151.70
  251. # 169.30 175.47 187.82
  252. # 207.21 214.07 226.95
  253. # 248.77 256.23 270.47
  254. # 293.83 301.95 318.14];
  255. jcp0 = ((2.9762, 4.1296, 6.9406),
  256. (10.4741, 12.3212, 16.3640),
  257. (21.7781, 24.2761, 29.5147),
  258. (37.0339, 40.1749, 46.5716),
  259. (56.2839, 60.0627, 67.6367),
  260. (79.5329, 83.9383, 92.7136),
  261. (106.7351, 111.7797, 121.7375),
  262. (137.9954, 143.6691, 154.7977),
  263. (173.2292, 179.5199, 191.8122),
  264. (212.4721, 219.4051, 232.8291),
  265. (255.6732, 263.2603, 277.9962),
  266. (302.9054, 311.1288, 326.9716))
  267. jcp1 = ((2.7055, 3.8415, 6.6349),
  268. (13.4294, 15.4943, 19.9349),
  269. (27.0669, 29.7961, 35.4628),
  270. (44.4929, 47.8545, 54.6815),
  271. (65.8202, 69.8189, 77.8202),
  272. (91.1090, 95.7542, 104.9637),
  273. (120.3673, 125.6185, 135.9825),
  274. (153.6341, 159.5290, 171.0905),
  275. (190.8714, 197.3772, 210.0366),
  276. (232.1030, 239.2468, 253.2526),
  277. (277.3740, 285.1402, 300.2821),
  278. (326.5354, 334.9795, 351.2150))
  279. jcp2 = ((2.7055, 3.8415, 6.6349),
  280. (16.1619, 18.3985, 23.1485),
  281. (32.0645, 35.0116, 41.0815),
  282. (51.6492, 55.2459, 62.5202),
  283. (75.1027, 79.3422, 87.7748),
  284. (102.4674, 107.3429, 116.9829),
  285. (133.7852, 139.2780, 150.0778),
  286. (169.0618, 175.1584, 187.1891),
  287. (208.3582, 215.1268, 228.2226),
  288. (251.6293, 259.0267, 273.3838),
  289. (298.8836, 306.8988, 322.4264),
  290. (350.1125, 358.7190, 375.3203))
  291. if (p > 1) or (p < -1):
  292. jc = (0, 0, 0)
  293. elif (n > 12) or (n < 1):
  294. jc = (0, 0, 0)
  295. elif p == -1:
  296. jc = jcp0[n - 1]
  297. elif p == 0:
  298. jc = jcp1[n - 1]
  299. elif p == 1:
  300. jc = jcp2[n - 1]
  301. return jc
  302. def c_sja(n, p):
  303. # PURPOSE: find critical values for Johansen maximum eigenvalue statistic
  304. # ------------------------------------------------------------
  305. # USAGE: jc = c_sja(n,p)
  306. # where: n = dimension of the VAR system
  307. # p = order of time polynomial in the null-hypothesis
  308. # p = -1, no deterministic part
  309. # p = 0, for constant term
  310. # p = 1, for constant plus time-trend
  311. # p > 1 returns no critical values
  312. # ------------------------------------------------------------
  313. # RETURNS: a (3x1) vector of percentiles for the maximum eigenvalue
  314. # statistic for: [90# 95# 99#]
  315. # ------------------------------------------------------------
  316. # NOTES: for n > 12, the function returns a (3x1) vector of zeros.
  317. # The values returned by the function were generated using
  318. # a method described in MacKinnon (1996), using his FORTRAN
  319. # program johdist.f
  320. # ------------------------------------------------------------
  321. # SEE ALSO: johansen()
  322. # ------------------------------------------------------------
  323. # References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution
  324. # functions of likelihood ratio tests for cointegration',
  325. # Queen's University Institute for Economic Research Discussion paper.
  326. # -------------------------------------------------------
  327. # written by:
  328. # James P. LeSage, Dept of Economics
  329. # University of Toledo
  330. # 2801 W. Bancroft St,
  331. # Toledo, OH 43606
  332. # jlesage@spatial-econometrics.com
  333. # Ported to Python by Javier Garcia
  334. # javier.macro.trader@gmail.com
  335. jcp0 = ((2.9762, 4.1296, 6.9406),
  336. (9.4748, 11.2246, 15.0923),
  337. (15.7175, 17.7961, 22.2519),
  338. (21.8370, 24.1592, 29.0609),
  339. (27.9160, 30.4428, 35.7359),
  340. (33.9271, 36.6301, 42.2333),
  341. (39.9085, 42.7679, 48.6606),
  342. (45.8930, 48.8795, 55.0335),
  343. (51.8528, 54.9629, 61.3449),
  344. (57.7954, 61.0404, 67.6415),
  345. (63.7248, 67.0756, 73.8856),
  346. (69.6513, 73.0946, 80.0937))
  347. jcp1 = ((2.7055, 3.8415, 6.6349),
  348. (12.2971, 14.2639, 18.5200),
  349. (18.8928, 21.1314, 25.8650),
  350. (25.1236, 27.5858, 32.7172),
  351. (31.2379, 33.8777, 39.3693),
  352. (37.2786, 40.0763, 45.8662),
  353. (43.2947, 46.2299, 52.3069),
  354. (49.2855, 52.3622, 58.6634),
  355. (55.2412, 58.4332, 64.9960),
  356. (61.2041, 64.5040, 71.2525),
  357. (67.1307, 70.5392, 77.4877),
  358. (73.0563, 76.5734, 83.7105))
  359. jcp2 = ((2.7055, 3.8415, 6.6349),
  360. (15.0006, 17.1481, 21.7465),
  361. (21.8731, 24.2522, 29.2631),
  362. (28.2398, 30.8151, 36.1930),
  363. (34.4202, 37.1646, 42.8612),
  364. (40.5244, 43.4183, 49.4095),
  365. (46.5583, 49.5875, 55.8171),
  366. (52.5858, 55.7302, 62.1741),
  367. (58.5316, 61.8051, 68.5030),
  368. (64.5292, 67.9040, 74.7434),
  369. (70.4630, 73.9355, 81.0678),
  370. (76.4081, 79.9878, 87.2395))
  371. if (p > 1) or (p < -1):
  372. jc = (0, 0, 0)
  373. elif (n > 12) or (n < 1):
  374. jc = (0, 0, 0)
  375. elif p == -1:
  376. jc = jcp0[n - 1]
  377. elif p == 0:
  378. jc = jcp1[n - 1]
  379. elif p == 1:
  380. jc = jcp2[n - 1]
  381. return jc