PageRenderTime 53ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/statsmodels/sandbox/examples/thirdparty/ex_ratereturn.py

http://github.com/statsmodels/statsmodels
Python | 140 lines | 98 code | 26 blank | 16 comment | 17 complexity | 476b2c66cea87a42d3ce764e0bba5741 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. # -*- coding: utf-8 -*-
  2. """Playing with correlation of DJ-30 stock returns
  3. this uses pickled data that needs to be created with findow.py
  4. to see graphs, uncomment plt.show()
  5. Created on Sat Jan 30 16:30:18 2010
  6. Author: josef-pktd
  7. """
  8. import pickle
  9. import numpy as np
  10. import matplotlib.pyplot as plt
  11. import matplotlib as mpl
  12. import statsmodels.sandbox.tools as sbtools
  13. from statsmodels.graphics.correlation import plot_corr, plot_corr_grid
  14. try:
  15. with open('dj30rr', 'rb') as fd:
  16. rrdm = pickle.load(fd)
  17. except Exception: #blanket for any unpickling error
  18. print("Error with unpickling, a new pickle file can be created with findow_1")
  19. raise
  20. ticksym = rrdm.columns.tolist()
  21. rr = rrdm.values[1:400]
  22. rrcorr = np.corrcoef(rr, rowvar=0)
  23. plot_corr(rrcorr, xnames=ticksym)
  24. nvars = rrcorr.shape[0]
  25. plt.figure()
  26. plt.hist(rrcorr[np.triu_indices(nvars,1)])
  27. plt.title('Correlation Coefficients')
  28. xreda, facta, evaa, evea = sbtools.pcasvd(rr)
  29. evallcs = (evaa).cumsum()
  30. print(evallcs/evallcs[-1])
  31. xred, fact, eva, eve = sbtools.pcasvd(rr, keepdim=4)
  32. pcacorr = np.corrcoef(xred, rowvar=0)
  33. plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA')
  34. resid = rr-xred
  35. residcorr = np.corrcoef(resid, rowvar=0)
  36. plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals')
  37. plt.matshow(residcorr)
  38. plt.imshow(residcorr, cmap=plt.cm.jet, interpolation='nearest',
  39. extent=(0,30,0,30), vmin=-1.0, vmax=1.0)
  40. plt.colorbar()
  41. normcolor = (0,1) #False #True
  42. fig = plt.figure()
  43. ax = fig.add_subplot(2,2,1)
  44. plot_corr(rrcorr, xnames=ticksym, normcolor=normcolor, ax=ax)
  45. ax2 = fig.add_subplot(2,2,3)
  46. #pcacorr = np.corrcoef(xred, rowvar=0)
  47. plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA',
  48. normcolor=normcolor, ax=ax2)
  49. ax3 = fig.add_subplot(2,2,4)
  50. plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals',
  51. normcolor=normcolor, ax=ax3)
  52. images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
  53. print(images)
  54. print(ax.get_children())
  55. #cax = fig.add_subplot(2,2,2)
  56. #[0.85, 0.1, 0.075, 0.8]
  57. fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
  58. cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
  59. fig.colorbar(images[0], cax=cax)
  60. fig.savefig('corrmatrixgrid.png', dpi=120)
  61. has_sklearn = True
  62. try:
  63. import sklearn # noqa:F401
  64. except ImportError:
  65. has_sklearn = False
  66. print('sklearn not available')
  67. def cov2corr(cov):
  68. std_ = np.sqrt(np.diag(cov))
  69. corr = cov / np.outer(std_, std_)
  70. return corr
  71. if has_sklearn:
  72. from sklearn.covariance import LedoitWolf, OAS, MCD
  73. lw = LedoitWolf(store_precision=False)
  74. lw.fit(rr, assume_centered=False)
  75. cov_lw = lw.covariance_
  76. corr_lw = cov2corr(cov_lw)
  77. oas = OAS(store_precision=False)
  78. oas.fit(rr, assume_centered=False)
  79. cov_oas = oas.covariance_
  80. corr_oas = cov2corr(cov_oas)
  81. mcd = MCD()#.fit(rr, reweight=None)
  82. mcd.fit(rr, assume_centered=False)
  83. cov_mcd = mcd.covariance_
  84. corr_mcd = cov2corr(cov_mcd)
  85. titles = ['raw correlation', 'lw', 'oas', 'mcd']
  86. normcolor = None
  87. fig = plt.figure()
  88. for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
  89. #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
  90. ax = fig.add_subplot(2,2,i+1)
  91. plot_corr(c, xnames=None, title=titles[i],
  92. normcolor=normcolor, ax=ax)
  93. images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
  94. fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
  95. cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
  96. fig.colorbar(images[0], cax=cax)
  97. corrli = [rrcorr, corr_lw, corr_oas, corr_mcd, pcacorr]
  98. diffssq = np.array([[((ci-cj)**2).sum() for ci in corrli]
  99. for cj in corrli])
  100. diffsabs = np.array([[np.max(np.abs(ci-cj)) for ci in corrli]
  101. for cj in corrli])
  102. print(diffssq)
  103. print('\nmaxabs')
  104. print(diffsabs)
  105. fig.savefig('corrmatrix_sklearn.png', dpi=120)
  106. fig2 = plot_corr_grid(corrli+[residcorr], ncols=3,
  107. titles=titles+['pca', 'pca-residual'],
  108. xnames=[], ynames=[])
  109. fig2.savefig('corrmatrix_sklearn_2.png', dpi=120)
  110. #plt.show()
  111. #plt.close('all')