johansen_test.py | searchcode

/EChanBook2/johansen_test.py

https://github.com/JavierGarciaD/AlgoRepo · Python · 448 lines · 214 code · 65 blank · 169 comment · 25 complexity · 1514ee96d19c1862e87ec31e28c637a7 MD5 · raw file

'''

function result = johansen(x,p,k)

% PURPOSE: perform Johansen cointegration tests

% -------------------------------------------------------

% USAGE: result = johansen(x,p,k)

% where:      x = input matrix of time-series in levels, (nobs x m)

%             p = order of time polynomial in the null-hypothesis

%                 p = -1, no deterministic part

%                 p =  0, for constant term

%                 p =  1, for constant plus time-trend

%                 p >  1, for higher order polynomial

%             k = number of lagged difference terms used when

%                 computing the estimator

% -------------------------------------------------------

% RETURNS: a results structure:

%          result.eig  = eigenvalues  (m x 1)

%          result.evec = eigenvectors (m x m), where first

%                        r columns are normalized coint vectors

%          result.lr1  = likelihood ratio trace statistic for r=0 to m-1

%                        (m x 1) vector

%          result.lr2  = maximum eigenvalue statistic for r=0 to m-1

%                        (m x 1) vector

%          result.cvt  = critical values for trace statistic

%                        (m x 3) vector [90% 95% 99%]

%          result.cvm  = critical values for max eigen value statistic

%                        (m x 3) vector [90% 95% 99%]

%          result.ind  = index of co-integrating variables ordered by

%                        size of the eigenvalues from large to small

% -------------------------------------------------------

% NOTE: c_sja(), c_sjt() provide critical values generated using

%       a method of MacKinnon (1994, 1996).

%       critical values are available for n<=12 and -1 <= p <= 1,

%       zeros are returned for other cases.

% -------------------------------------------------------

% SEE ALSO: prt_coint, a function that prints results

% -------------------------------------------------------

% References: Johansen (1988), 'Statistical Analysis of Co-integration

% vectors', Journal of Economic Dynamics and Control, 12, pp. 231-254.

% MacKinnon, Haug, Michelis (1996) 'Numerical distribution

% functions of likelihood ratio tests for cointegration',

% Queen's University Institute for Economic Research Discussion paper.

% (see also: MacKinnon's JBES 1994 article

% -------------------------------------------------------



% written by:

% James P. LeSage, Dept of Economics

% University of Toledo

% 2801 W. Bancroft St,

% Toledo, OH 43606

% jlesage@spatial-econometrics.com



% ****************************************************************

% NOTE: Adina Enache provided some bug fixes and corrections that

%       she notes below in comments. 4/10/2000

% ****************************************************************

'''



import numpy as np

from numpy import zeros, ones, flipud, log

from numpy.linalg import inv, eig, cholesky as chol

from statsmodels.regression.linear_model import OLS





tdiff = np.diff



class Holder(object):

    pass



def rows(x):

    return x.shape[0]



def trimr(x, front, end):

    if end > 0:

        return x[front:-end]

    else:

        return x[front:]



import statsmodels.tsa.tsatools as tsat

mlag = tsat.lagmat



def mlag_(x, maxlag):

    '''return all lags up to maxlag

    '''

    return x[:-lag]



def lag(x, lag):

    return x[:-lag]



def detrend(y, order):

    if order == -1:

        return y

    return OLS(y, np.vander(np.linspace(-1, 1, len(y)), order + 1)).fit().resid



def resid(y, x):

    r = y - np.dot(x, np.dot(np.linalg.pinv(x), y))

    return r









def coint_johansen(x, p, k, print_on_console=True):



    #    % error checking on inputs

    #    if (nargin ~= 3)

    #     error('Wrong # of inputs to johansen')

    #    end

    nobs, m = x.shape



    # why this?  f is detrend transformed series, p is detrend data

    if (p > -1):

        f = 0

    else:

        f = p



    x = detrend(x, p)

    dx = tdiff(x, 1, axis=0)

    # dx    = trimr(dx,1,0)

    z = mlag(dx, k)  # [k-1:]

#    print z.shape

    z = trimr(z, k, 0)

    z = detrend(z, f)

#    print dx.shape

    dx = trimr(dx, k, 0)



    dx = detrend(dx, f)

    # r0t   = dx - z*(z\dx)

    r0t = resid(dx, z)  # diff on lagged diffs

    # lx = trimr(lag(x,k),k,0)

    lx = lag(x, k)

    lx = trimr(lx, 1, 0)

    dx = detrend(lx, f)

#    print 'rkt', dx.shape, z.shape

    # rkt   = dx - z*(z\dx)

    rkt = resid(dx, z)  # level on lagged diffs

    skk = np.dot(rkt.T, rkt) / rows(rkt)

    sk0 = np.dot(rkt.T, r0t) / rows(rkt)

    s00 = np.dot(r0t.T, r0t) / rows(r0t)

    sig = np.dot(sk0, np.dot(inv(s00), (sk0.T)))

    tmp = inv(skk)

    # du, au = eig(np.dot(tmp, sig))

    au, du = eig(np.dot(tmp, sig))  # au is eval, du is evec

    # orig = np.dot(tmp, sig)



    # % Normalize the eigen vectors such that (du'skk*du) = I

    temp = inv(chol(np.dot(du.T, np.dot(skk, du))))

    dt = np.dot(du, temp)





    # JP: the next part can be done much  easier



    # %      NOTE: At this point, the eigenvectors are aligned by column. To

    # %            physically move the column elements using the MATLAB sort,

    # %            take the transpose to put the eigenvectors across the row



    # dt = transpose(dt)



    # % sort eigenvalues and vectors



    # au, auind = np.sort(diag(au))

    auind = np.argsort(au)

    # a = flipud(au)

    aind = flipud(auind)

    a = au[aind]

    # d = dt[aind,:]

    d = dt[:, aind]



    # %NOTE: The eigenvectors have been sorted by row based on auind and moved to array "d".

    # %      Put the eigenvectors back in column format after the sort by taking the

    # %      transpose of "d". Since the eigenvectors have been physically moved, there is

    # %      no need for aind at all. To preserve existing programming, aind is reset back to

    # %      1, 2, 3, ....



    # d  =  transpose(d)

    # test = np.dot(transpose(d), np.dot(skk, d))



    # %EXPLANATION:  The MATLAB sort function sorts from low to high. The flip realigns

    # %auind to go from the largest to the smallest eigenvalue (now aind). The original procedure

    # %physically moved the rows of dt (to d) based on the alignment in aind and then used

    # %aind as a column index to address the eigenvectors from high to low. This is a double

    # %sort. If you wanted to extract the eigenvector corresponding to the largest eigenvalue by,

    # %using aind as a reference, you would get the correct eigenvector, but with sorted

    # %coefficients and, therefore, any follow-on calculation would seem to be in error.

    # %If alternative programming methods are used to evaluate the eigenvalues, e.g. Frame method

    # %followed by a root extraction on the characteristic equation, then the roots can be

    # %quickly sorted. One by one, the corresponding eigenvectors can be generated. The resultant

    # %array can be operated on using the Cholesky transformation, which enables a unit

    # %diagonalization of skk. But nowhere along the way are the coefficients within the

    # %eigenvector array ever changed. The final value of the "beta" array using either method

    # %should be the same.





    # % Compute the trace and max eigenvalue statistics */

    lr1 = zeros(m)

    lr2 = zeros(m)

    cvm = zeros((m, 3))

    cvt = zeros((m, 3))

    iota = ones(m)

    t, junk = rkt.shape

    for i in range(0, m):

        tmp = trimr(log(iota - a), i , 0)

        lr1[i] = -t * np.sum(tmp, 0)  # columnsum ?

        # tmp = np.log(1-a)

        # lr1[i] = -t * np.sum(tmp[i:])

        lr2[i] = -t * log(1 - a[i])

        cvm[i, :] = c_sja(m - i, p)

        cvt[i, :] = c_sjt(m - i, p)

        aind[i] = i

    # end



    result = Holder()

    # % set up results structure

    # estimation results, residuals

    result.rkt = rkt

    result.r0t = r0t

    result.eig = a

    result.evec = d  # transposed compared to matlab ?

    result.lr1 = lr1

    result.lr2 = lr2

    result.cvt = cvt

    result.cvm = cvm

    result.ind = aind

    result.meth = 'johansen'



    if print_on_console == True:

        print ('--------------------------------------------------')

        print ('--> Trace Statistics')

        print ('variable statistic Crit-90% Crit-95%  Crit-99%')

        for i in range(len(result.lr1)):

            print ('r =', i, '\t', round(result.lr1[i], 4), result.cvt[i, 0], result.cvt[i, 1], result.cvt[i, 2])

        print ('--------------------------------------------------')

        print ('--> Eigen Statistics')

        print ('variable statistic Crit-90% Crit-95%  Crit-99%')

        for i in range(len(result.lr2)):

            print ('r =', i, '\t', round(result.lr2[i], 4), result.cvm[i, 0], result.cvm[i, 1], result.cvm[i, 2])

        print ('--------------------------------------------------')

        print ('eigenvectors:\n', result.evec)

        print ('--------------------------------------------------')

        print ('eigenvalues:\n', result.eig)

        print ('--------------------------------------------------')





    return result



def c_sjt(n, p):



# PURPOSE: find critical values for Johansen trace statistic

# ------------------------------------------------------------

# USAGE:  jc = c_sjt(n,p)

# where:    n = dimension of the VAR system

#               NOTE: routine doesn't work for n > 12

#           p = order of time polynomial in the null-hypothesis

#                 p = -1, no deterministic part

#                 p =  0, for constant term

#                 p =  1, for constant plus time-trend

#                 p >  1  returns no critical values

# ------------------------------------------------------------

# RETURNS: a (3x1) vector of percentiles for the trace

#          statistic for [90# 95# 99#]

# ------------------------------------------------------------

# NOTES: for n > 12, the function returns a (3x1) vector of zeros.

#        The values returned by the function were generated using

#        a method described in MacKinnon (1996), using his FORTRAN

#        program johdist.f

# ------------------------------------------------------------

# SEE ALSO: johansen()

# ------------------------------------------------------------

# # References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution

# functions of likelihood ratio tests for cointegration',

# Queen's University Institute for Economic Research Discussion paper.

# -------------------------------------------------------



# written by:

# James P. LeSage, Dept of Economics

# University of Toledo

# 2801 W. Bancroft St,

# Toledo, OH 43606

# jlesage@spatial-econometrics.com

#

# Ported to Python by Javier Garcia

# javier.macro.trader@gmail.com



# these are the values from Johansen's 1995 book

# for comparison to the MacKinnon values

# jcp0 = [ 2.98   4.14   7.02

#        10.35  12.21  16.16

#        21.58  24.08  29.19

#        36.58  39.71  46.00

#        55.54  59.24  66.71

#        78.30  86.36  91.12

#       104.93 109.93 119.58

#       135.16 140.74 151.70

#       169.30 175.47 187.82

#       207.21 214.07 226.95

#       248.77 256.23 270.47

#       293.83 301.95 318.14];









    jcp0 = ((2.9762, 4.1296, 6.9406),

            (10.4741, 12.3212, 16.3640),

            (21.7781, 24.2761, 29.5147),

            (37.0339, 40.1749, 46.5716),

            (56.2839, 60.0627, 67.6367),

            (79.5329, 83.9383, 92.7136),

            (106.7351, 111.7797, 121.7375),

            (137.9954, 143.6691, 154.7977),

            (173.2292, 179.5199, 191.8122),

            (212.4721, 219.4051, 232.8291),

            (255.6732, 263.2603, 277.9962),

            (302.9054, 311.1288, 326.9716))





    jcp1 = ((2.7055, 3.8415, 6.6349),

            (13.4294, 15.4943, 19.9349),

            (27.0669, 29.7961, 35.4628),

            (44.4929, 47.8545, 54.6815),

            (65.8202, 69.8189, 77.8202),

            (91.1090, 95.7542, 104.9637),

            (120.3673, 125.6185, 135.9825),

            (153.6341, 159.5290, 171.0905),

            (190.8714, 197.3772, 210.0366),

            (232.1030, 239.2468, 253.2526),

            (277.3740, 285.1402, 300.2821),

            (326.5354, 334.9795, 351.2150))



    jcp2 = ((2.7055, 3.8415, 6.6349),

            (16.1619, 18.3985, 23.1485),

            (32.0645, 35.0116, 41.0815),

            (51.6492, 55.2459, 62.5202),

            (75.1027, 79.3422, 87.7748),

            (102.4674, 107.3429, 116.9829),

            (133.7852, 139.2780, 150.0778),

            (169.0618, 175.1584, 187.1891),

            (208.3582, 215.1268, 228.2226),

            (251.6293, 259.0267, 273.3838),

            (298.8836, 306.8988, 322.4264),

            (350.1125, 358.7190, 375.3203))







    if (p > 1) or (p < -1):

        jc = (0, 0, 0)

    elif (n > 12) or (n < 1):

        jc = (0, 0, 0)

    elif p == -1:

        jc = jcp0[n - 1]

    elif p == 0:

        jc = jcp1[n - 1]

    elif p == 1:

        jc = jcp2[n - 1]







    return jc



def c_sja(n, p):



# PURPOSE: find critical values for Johansen maximum eigenvalue statistic

# ------------------------------------------------------------

# USAGE:  jc = c_sja(n,p)

# where:    n = dimension of the VAR system

#           p = order of time polynomial in the null-hypothesis

#                 p = -1, no deterministic part

#                 p =  0, for constant term

#                 p =  1, for constant plus time-trend

#                 p >  1  returns no critical values

# ------------------------------------------------------------

# RETURNS: a (3x1) vector of percentiles for the maximum eigenvalue

#          statistic for: [90# 95# 99#]

# ------------------------------------------------------------

# NOTES: for n > 12, the function returns a (3x1) vector of zeros.

#        The values returned by the function were generated using

#        a method described in MacKinnon (1996), using his FORTRAN

#        program johdist.f

# ------------------------------------------------------------

# SEE ALSO: johansen()

# ------------------------------------------------------------

# References: MacKinnon, Haug, Michelis (1996) 'Numerical distribution

# functions of likelihood ratio tests for cointegration',

# Queen's University Institute for Economic Research Discussion paper.

# -------------------------------------------------------



# written by:

# James P. LeSage, Dept of Economics

# University of Toledo

# 2801 W. Bancroft St,

# Toledo, OH 43606

# jlesage@spatial-econometrics.com

# Ported to Python by Javier Garcia

# javier.macro.trader@gmail.com





    jcp0 = ((2.9762, 4.1296, 6.9406),

            (9.4748, 11.2246, 15.0923),

            (15.7175, 17.7961, 22.2519),

            (21.8370, 24.1592, 29.0609),

            (27.9160, 30.4428, 35.7359),

            (33.9271, 36.6301, 42.2333),

            (39.9085, 42.7679, 48.6606),

            (45.8930, 48.8795, 55.0335),

            (51.8528, 54.9629, 61.3449),

            (57.7954, 61.0404, 67.6415),

            (63.7248, 67.0756, 73.8856),

            (69.6513, 73.0946, 80.0937))



    jcp1 = ((2.7055, 3.8415, 6.6349),

            (12.2971, 14.2639, 18.5200),

            (18.8928, 21.1314, 25.8650),

            (25.1236, 27.5858, 32.7172),

            (31.2379, 33.8777, 39.3693),

            (37.2786, 40.0763, 45.8662),

            (43.2947, 46.2299, 52.3069),

            (49.2855, 52.3622, 58.6634),

            (55.2412, 58.4332, 64.9960),

            (61.2041, 64.5040, 71.2525),

            (67.1307, 70.5392, 77.4877),

            (73.0563, 76.5734, 83.7105))



    jcp2 = ((2.7055, 3.8415, 6.6349),

            (15.0006, 17.1481, 21.7465),

            (21.8731, 24.2522, 29.2631),

            (28.2398, 30.8151, 36.1930),

            (34.4202, 37.1646, 42.8612),

            (40.5244, 43.4183, 49.4095),

            (46.5583, 49.5875, 55.8171),

            (52.5858, 55.7302, 62.1741),

            (58.5316, 61.8051, 68.5030),

            (64.5292, 67.9040, 74.7434),

            (70.4630, 73.9355, 81.0678),

            (76.4081, 79.9878, 87.2395))





    if (p > 1) or (p < -1):

        jc = (0, 0, 0)

    elif (n > 12) or (n < 1):

        jc = (0, 0, 0)

    elif p == -1:

        jc = jcp0[n - 1]

    elif p == 0:

        jc = jcp1[n - 1]

    elif p == 1:

        jc = jcp2[n - 1]





    return jc
Tech Fingerprint

Alerts (8)

'def' Ensure functions have docstrings for documentation
69 72 86 89 94 101 244 357