PageRenderTime 12ms CodeModel.GetById 1ms app.highlight 8ms RepoModel.GetById 2ms app.codeStats 0ms

/statsmodels/sandbox/examples/thirdparty/ex_ratereturn.py

http://github.com/statsmodels/statsmodels
Python | 140 lines | 98 code | 26 blank | 16 comment | 21 complexity | 476b2c66cea87a42d3ce764e0bba5741 MD5 | raw file
  1# -*- coding: utf-8 -*-
  2"""Playing with correlation of DJ-30 stock returns
  3
  4this uses pickled data that needs to be created with findow.py
  5to see graphs, uncomment plt.show()
  6
  7
  8Created on Sat Jan 30 16:30:18 2010
  9Author: josef-pktd
 10"""
 11import pickle
 12
 13import numpy as np
 14import matplotlib.pyplot as plt
 15import matplotlib as mpl
 16
 17import statsmodels.sandbox.tools as sbtools
 18
 19from statsmodels.graphics.correlation import plot_corr, plot_corr_grid
 20
 21try:
 22    with open('dj30rr', 'rb') as fd:
 23        rrdm = pickle.load(fd)
 24except Exception: #blanket for any unpickling error
 25    print("Error with unpickling, a new pickle file can be created with findow_1")
 26    raise
 27
 28ticksym = rrdm.columns.tolist()
 29rr = rrdm.values[1:400]
 30
 31rrcorr = np.corrcoef(rr, rowvar=0)
 32
 33
 34plot_corr(rrcorr, xnames=ticksym)
 35nvars = rrcorr.shape[0]
 36plt.figure()
 37plt.hist(rrcorr[np.triu_indices(nvars,1)])
 38plt.title('Correlation Coefficients')
 39
 40xreda, facta, evaa, evea  = sbtools.pcasvd(rr)
 41evallcs = (evaa).cumsum()
 42print(evallcs/evallcs[-1])
 43xred, fact, eva, eve  = sbtools.pcasvd(rr, keepdim=4)
 44pcacorr = np.corrcoef(xred, rowvar=0)
 45
 46plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA')
 47
 48resid = rr-xred
 49residcorr = np.corrcoef(resid, rowvar=0)
 50plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals')
 51
 52plt.matshow(residcorr)
 53plt.imshow(residcorr, cmap=plt.cm.jet, interpolation='nearest',
 54           extent=(0,30,0,30), vmin=-1.0, vmax=1.0)
 55plt.colorbar()
 56
 57normcolor = (0,1) #False #True
 58fig = plt.figure()
 59ax = fig.add_subplot(2,2,1)
 60plot_corr(rrcorr, xnames=ticksym, normcolor=normcolor, ax=ax)
 61ax2 = fig.add_subplot(2,2,3)
 62#pcacorr = np.corrcoef(xred, rowvar=0)
 63plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA',
 64          normcolor=normcolor, ax=ax2)
 65ax3 = fig.add_subplot(2,2,4)
 66plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals',
 67          normcolor=normcolor, ax=ax3)
 68
 69images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
 70print(images)
 71print(ax.get_children())
 72#cax = fig.add_subplot(2,2,2)
 73#[0.85, 0.1, 0.075, 0.8]
 74fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
 75cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
 76fig.colorbar(images[0], cax=cax)
 77fig.savefig('corrmatrixgrid.png', dpi=120)
 78
 79has_sklearn = True
 80try:
 81    import sklearn  # noqa:F401
 82except ImportError:
 83    has_sklearn = False
 84    print('sklearn not available')
 85
 86
 87def cov2corr(cov):
 88    std_ = np.sqrt(np.diag(cov))
 89    corr = cov / np.outer(std_, std_)
 90    return corr
 91
 92if has_sklearn:
 93    from sklearn.covariance import LedoitWolf, OAS, MCD
 94
 95    lw = LedoitWolf(store_precision=False)
 96    lw.fit(rr, assume_centered=False)
 97    cov_lw = lw.covariance_
 98    corr_lw = cov2corr(cov_lw)
 99
100    oas = OAS(store_precision=False)
101    oas.fit(rr, assume_centered=False)
102    cov_oas = oas.covariance_
103    corr_oas = cov2corr(cov_oas)
104
105    mcd = MCD()#.fit(rr, reweight=None)
106    mcd.fit(rr, assume_centered=False)
107    cov_mcd = mcd.covariance_
108    corr_mcd = cov2corr(cov_mcd)
109
110    titles = ['raw correlation', 'lw', 'oas', 'mcd']
111    normcolor = None
112    fig = plt.figure()
113    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
114    #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
115        ax = fig.add_subplot(2,2,i+1)
116        plot_corr(c, xnames=None, title=titles[i],
117              normcolor=normcolor, ax=ax)
118
119    images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
120    fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
121    cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
122    fig.colorbar(images[0], cax=cax)
123
124    corrli = [rrcorr, corr_lw, corr_oas, corr_mcd, pcacorr]
125    diffssq = np.array([[((ci-cj)**2).sum() for ci in corrli]
126                            for cj in corrli])
127    diffsabs = np.array([[np.max(np.abs(ci-cj)) for ci in corrli]
128                            for cj in corrli])
129    print(diffssq)
130    print('\nmaxabs')
131    print(diffsabs)
132    fig.savefig('corrmatrix_sklearn.png', dpi=120)
133
134    fig2 = plot_corr_grid(corrli+[residcorr], ncols=3,
135                          titles=titles+['pca', 'pca-residual'],
136                          xnames=[], ynames=[])
137    fig2.savefig('corrmatrix_sklearn_2.png', dpi=120)
138
139#plt.show()
140#plt.close('all')