PageRenderTime 56ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/BDDS_dnaCompleteExome_optimized/pymodules/python2.7/lib/python/statsmodels-0.5.0-py2.7-linux-x86_64.egg/statsmodels/sandbox/panel/panel_short.py

https://gitlab.com/pooja043/Globus_Docker
Python | 247 lines | 92 code | 43 blank | 112 comment | 8 complexity | 4c381e8a3f9b779d2c33db6153693bc7 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """Panel data analysis for short T and large N
  3. Created on Sat Dec 17 19:32:00 2011
  4. Author: Josef Perktold
  5. License: BSD-3
  6. starting from scratch before looking at references again
  7. just a stub to get the basic structure for group handling
  8. target outsource as much as possible for reuse
  9. Notes
  10. -----
  11. this is the basic version using a loop over individuals which will be more
  12. widely applicable. Depending on the special cases, there will be faster
  13. implementations possible (sparse, kroneker, ...)
  14. the only two group specific methods or get_within_cov and whiten
  15. """
  16. import numpy as np
  17. from statsmodels.regression.linear_model import OLS, GLS
  18. from statsmodels.tools.grouputils import Group, GroupSorted
  19. #not used
  20. class Unit(object):
  21. def __init__(endog, exog):
  22. self.endog = endog
  23. self.exog = exog
  24. def sum_outer_product_loop(x, group_iter):
  25. '''sum outerproduct dot(x_i, x_i.T) over individuals
  26. loop version
  27. '''
  28. mom = 0
  29. for g in group_iter():
  30. x_g = x[g]
  31. #print 'x_g.shape', x_g.shape
  32. mom += np.outer(x_g, x_g)
  33. return mom
  34. def sum_outer_product_balanced(x, n_groups):
  35. '''sum outerproduct dot(x_i, x_i.T) over individuals
  36. where x_i is (nobs_i, 1), and result is (nobs_i, nobs_i)
  37. reshape-dot version, for x.ndim=1 only
  38. '''
  39. xrs = x.reshape(-1, n_groups, order='F')
  40. return np.dot(xrs, xrs.T) #should be (nobs_i, nobs_i)
  41. #x.reshape(n_groups, nobs_i, k_vars) #, order='F')
  42. #... ? this is getting 3-dimensional dot, tensordot?
  43. #needs (n_groups, k_vars, k_vars) array with sum over groups
  44. #NOT
  45. #I only need this for x is 1d, i.e. residual
  46. def whiten_individuals_loop(x, transform, group_iter):
  47. '''apply linear transform for each individual
  48. loop version
  49. '''
  50. #Note: figure out dimension of transformed variable
  51. #so we can pre-allocate
  52. x_new = []
  53. for g in group_iter():
  54. x_g = x[g]
  55. x_new.append(np.dot(transform, x_g))
  56. return np.concatenate(x_new) #np.vstack(x_new) #or np.array(x_new) #check shape
  57. class ShortPanelGLS2(object):
  58. '''Short Panel with general intertemporal within correlation
  59. assumes data is stacked by individuals, panel is balanced and
  60. within correlation structure is identical across individuals.
  61. It looks like this can just inherit GLS and overwrite whiten
  62. '''
  63. def __init__(self, endog, exog, group):
  64. self.endog = endog
  65. self.exog = exog
  66. self.group = GroupSorted(group)
  67. self.n_groups = self.group.n_groups
  68. #self.nobs_group = #list for unbalanced?
  69. def fit_ols(self):
  70. self.res_pooled = OLS(self.endog, self.exog).fit()
  71. return self.res_pooled #return or not
  72. def get_within_cov(self, resid):
  73. #central moment or not?
  74. mom = sum_outer_product_loop(resid, self.group.group_iter)
  75. return mom / self.n_groups #df correction ?
  76. def whiten_groups(self, x, cholsigmainv_i):
  77. #from scipy import sparse #use sparse
  78. wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
  79. return wx
  80. def fit(self):
  81. res_pooled = self.fit_ols() #get starting estimate
  82. sigma_i = self.get_within_cov(res_pooled.resid)
  83. self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
  84. wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
  85. wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
  86. #print wendog.shape, wexog.shape
  87. self.res1 = OLS(wendog, wexog).fit()
  88. return self.res1
  89. class ShortPanelGLS(GLS):
  90. '''Short Panel with general intertemporal within correlation
  91. assumes data is stacked by individuals, panel is balanced and
  92. within correlation structure is identical across individuals.
  93. It looks like this can just inherit GLS and overwrite whiten
  94. '''
  95. def __init__(self, endog, exog, group, sigma_i=None):
  96. self.group = GroupSorted(group)
  97. self.n_groups = self.group.n_groups
  98. #self.nobs_group = #list for unbalanced?
  99. nobs_i = len(endog) / self.n_groups #endog might later not be an ndarray
  100. #balanced only for now,
  101. #which is a requirement anyway in this case (full cov)
  102. #needs to change for parameterized sigma_i
  103. #
  104. if sigma_i is None:
  105. sigma_i = np.eye(nobs_i)
  106. self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
  107. #super is taking care of endog, exog and sigma
  108. super(self.__class__, self).__init__(endog, exog, sigma=None)
  109. def get_within_cov(self, resid):
  110. #central moment or not?
  111. mom = sum_outer_product_loop(resid, self.group.group_iter)
  112. return mom / self.n_groups #df correction ?
  113. def whiten_groups(self, x, cholsigmainv_i):
  114. #from scipy import sparse #use sparse
  115. wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
  116. return wx
  117. def _fit_ols(self):
  118. #used as starting estimate in old explicity version
  119. self.res_pooled = OLS(self.endog, self.exog).fit()
  120. return self.res_pooled #return or not
  121. def _fit_old(self):
  122. #old explicit version
  123. res_pooled = self._fit_ols() #get starting estimate
  124. sigma_i = self.get_within_cov(res_pooled.resid)
  125. self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
  126. wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
  127. wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
  128. self.res1 = OLS(wendog, wexog).fit()
  129. return self.res1
  130. def whiten(self, x):
  131. #whiten x by groups, will be applied to endog and exog
  132. wx = whiten_individuals_loop(x, self.cholsigmainv_i, self.group.group_iter)
  133. return wx
  134. #copied from GLSHet and adjusted (boiler plate?)
  135. def fit_iterative(self, maxiter=3):
  136. """
  137. Perform an iterative two-step procedure to estimate the GLS model.
  138. Parameters
  139. ----------
  140. maxiter : integer, optional
  141. the number of iterations
  142. Notes
  143. -----
  144. maxiter=1: returns the estimated based on given weights
  145. maxiter=2: performs a second estimation with the updated weights,
  146. this is 2-step estimation
  147. maxiter>2: iteratively estimate and update the weights
  148. TODO: possible extension stop iteration if change in parameter
  149. estimates is smaller than x_tol
  150. Repeated calls to fit_iterative, will do one redundant pinv_wexog
  151. calculation. Calling fit_iterative(maxiter) once does not do any
  152. redundant recalculations (whitening or calculating pinv_wexog).
  153. """
  154. #Note: in contrast to GLSHet, we don't have an auxilliary regression here
  155. # might be needed if there is more structure in cov_i
  156. #because we only have the loop we are not attaching the ols_pooled
  157. #initial estimate anymore compared to original version
  158. if maxiter < 1:
  159. raise ValueError('maxiter needs to be at least 1')
  160. import collections
  161. self.history = collections.defaultdict(list) #not really necessary
  162. for i in range(maxiter):
  163. #pinv_wexog is cached, delete it to force recalculation
  164. if hasattr(self, 'pinv_wexog'):
  165. del self.pinv_wexog
  166. #fit with current cov, GLS, i.e. OLS on whitened endog, exog
  167. results = self.fit()
  168. self.history['self_params'].append(results.params)
  169. if not i == maxiter-1: #skip for last iteration, could break instead
  170. #print 'ols',
  171. self.results_old = results #store previous results for debugging
  172. #get cov from residuals of previous regression
  173. sigma_i = self.get_within_cov(results.resid)
  174. self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
  175. #calculate new whitened endog and exog
  176. self.initialize()
  177. #note results is the wrapper, results._results is the results instance
  178. #results._results.results_residual_regression = res_resid
  179. return results
  180. if __name__ == '__main__':
  181. pass