/statsmodels/sandbox/examples/thirdparty/ex_ratereturn.py
Python | 140 lines | 98 code | 26 blank | 16 comment | 17 complexity | 476b2c66cea87a42d3ce764e0bba5741 MD5 | raw file
Possible License(s): BSD-3-Clause
- # -*- coding: utf-8 -*-
- """Playing with correlation of DJ-30 stock returns
- this uses pickled data that needs to be created with findow.py
- to see graphs, uncomment plt.show()
- Created on Sat Jan 30 16:30:18 2010
- Author: josef-pktd
- """
- import pickle
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib as mpl
- import statsmodels.sandbox.tools as sbtools
- from statsmodels.graphics.correlation import plot_corr, plot_corr_grid
- try:
- with open('dj30rr', 'rb') as fd:
- rrdm = pickle.load(fd)
- except Exception: #blanket for any unpickling error
- print("Error with unpickling, a new pickle file can be created with findow_1")
- raise
- ticksym = rrdm.columns.tolist()
- rr = rrdm.values[1:400]
- rrcorr = np.corrcoef(rr, rowvar=0)
- plot_corr(rrcorr, xnames=ticksym)
- nvars = rrcorr.shape[0]
- plt.figure()
- plt.hist(rrcorr[np.triu_indices(nvars,1)])
- plt.title('Correlation Coefficients')
- xreda, facta, evaa, evea = sbtools.pcasvd(rr)
- evallcs = (evaa).cumsum()
- print(evallcs/evallcs[-1])
- xred, fact, eva, eve = sbtools.pcasvd(rr, keepdim=4)
- pcacorr = np.corrcoef(xred, rowvar=0)
- plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA')
- resid = rr-xred
- residcorr = np.corrcoef(resid, rowvar=0)
- plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals')
- plt.matshow(residcorr)
- plt.imshow(residcorr, cmap=plt.cm.jet, interpolation='nearest',
- extent=(0,30,0,30), vmin=-1.0, vmax=1.0)
- plt.colorbar()
- normcolor = (0,1) #False #True
- fig = plt.figure()
- ax = fig.add_subplot(2,2,1)
- plot_corr(rrcorr, xnames=ticksym, normcolor=normcolor, ax=ax)
- ax2 = fig.add_subplot(2,2,3)
- #pcacorr = np.corrcoef(xred, rowvar=0)
- plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA',
- normcolor=normcolor, ax=ax2)
- ax3 = fig.add_subplot(2,2,4)
- plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals',
- normcolor=normcolor, ax=ax3)
- images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
- print(images)
- print(ax.get_children())
- #cax = fig.add_subplot(2,2,2)
- #[0.85, 0.1, 0.075, 0.8]
- fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
- cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
- fig.colorbar(images[0], cax=cax)
- fig.savefig('corrmatrixgrid.png', dpi=120)
- has_sklearn = True
- try:
- import sklearn # noqa:F401
- except ImportError:
- has_sklearn = False
- print('sklearn not available')
- def cov2corr(cov):
- std_ = np.sqrt(np.diag(cov))
- corr = cov / np.outer(std_, std_)
- return corr
- if has_sklearn:
- from sklearn.covariance import LedoitWolf, OAS, MCD
- lw = LedoitWolf(store_precision=False)
- lw.fit(rr, assume_centered=False)
- cov_lw = lw.covariance_
- corr_lw = cov2corr(cov_lw)
- oas = OAS(store_precision=False)
- oas.fit(rr, assume_centered=False)
- cov_oas = oas.covariance_
- corr_oas = cov2corr(cov_oas)
- mcd = MCD()#.fit(rr, reweight=None)
- mcd.fit(rr, assume_centered=False)
- cov_mcd = mcd.covariance_
- corr_mcd = cov2corr(cov_mcd)
- titles = ['raw correlation', 'lw', 'oas', 'mcd']
- normcolor = None
- fig = plt.figure()
- for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
- #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
- ax = fig.add_subplot(2,2,i+1)
- plot_corr(c, xnames=None, title=titles[i],
- normcolor=normcolor, ax=ax)
- images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
- fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
- cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
- fig.colorbar(images[0], cax=cax)
- corrli = [rrcorr, corr_lw, corr_oas, corr_mcd, pcacorr]
- diffssq = np.array([[((ci-cj)**2).sum() for ci in corrli]
- for cj in corrli])
- diffsabs = np.array([[np.max(np.abs(ci-cj)) for ci in corrli]
- for cj in corrli])
- print(diffssq)
- print('\nmaxabs')
- print(diffsabs)
- fig.savefig('corrmatrix_sklearn.png', dpi=120)
- fig2 = plot_corr_grid(corrli+[residcorr], ncols=3,
- titles=titles+['pca', 'pca-residual'],
- xnames=[], ynames=[])
- fig2.savefig('corrmatrix_sklearn_2.png', dpi=120)
- #plt.show()
- #plt.close('all')