/itmi_vcfqc_optimized/pymodules/python2.7/lib/python/statsmodels-0.5.0-py2.7-linux-x86_64.egg/statsmodels/sandbox/panel/panel_short.py
Python | 247 lines | 92 code | 43 blank | 112 comment | 8 complexity | 4c381e8a3f9b779d2c33db6153693bc7 MD5 | raw file
- # -*- coding: utf-8 -*-
- """Panel data analysis for short T and large N
- Created on Sat Dec 17 19:32:00 2011
- Author: Josef Perktold
- License: BSD-3
- starting from scratch before looking at references again
- just a stub to get the basic structure for group handling
- target outsource as much as possible for reuse
- Notes
- -----
- this is the basic version using a loop over individuals which will be more
- widely applicable. Depending on the special cases, there will be faster
- implementations possible (sparse, kroneker, ...)
- the only two group specific methods or get_within_cov and whiten
- """
- import numpy as np
- from statsmodels.regression.linear_model import OLS, GLS
- from statsmodels.tools.grouputils import Group, GroupSorted
- #not used
- class Unit(object):
- def __init__(endog, exog):
- self.endog = endog
- self.exog = exog
- def sum_outer_product_loop(x, group_iter):
- '''sum outerproduct dot(x_i, x_i.T) over individuals
- loop version
- '''
- mom = 0
- for g in group_iter():
- x_g = x[g]
- #print 'x_g.shape', x_g.shape
- mom += np.outer(x_g, x_g)
- return mom
- def sum_outer_product_balanced(x, n_groups):
- '''sum outerproduct dot(x_i, x_i.T) over individuals
- where x_i is (nobs_i, 1), and result is (nobs_i, nobs_i)
- reshape-dot version, for x.ndim=1 only
- '''
- xrs = x.reshape(-1, n_groups, order='F')
- return np.dot(xrs, xrs.T) #should be (nobs_i, nobs_i)
- #x.reshape(n_groups, nobs_i, k_vars) #, order='F')
- #... ? this is getting 3-dimensional dot, tensordot?
- #needs (n_groups, k_vars, k_vars) array with sum over groups
- #NOT
- #I only need this for x is 1d, i.e. residual
- def whiten_individuals_loop(x, transform, group_iter):
- '''apply linear transform for each individual
- loop version
- '''
- #Note: figure out dimension of transformed variable
- #so we can pre-allocate
- x_new = []
- for g in group_iter():
- x_g = x[g]
- x_new.append(np.dot(transform, x_g))
- return np.concatenate(x_new) #np.vstack(x_new) #or np.array(x_new) #check shape
- class ShortPanelGLS2(object):
- '''Short Panel with general intertemporal within correlation
- assumes data is stacked by individuals, panel is balanced and
- within correlation structure is identical across individuals.
- It looks like this can just inherit GLS and overwrite whiten
- '''
- def __init__(self, endog, exog, group):
- self.endog = endog
- self.exog = exog
- self.group = GroupSorted(group)
- self.n_groups = self.group.n_groups
- #self.nobs_group = #list for unbalanced?
- def fit_ols(self):
- self.res_pooled = OLS(self.endog, self.exog).fit()
- return self.res_pooled #return or not
- def get_within_cov(self, resid):
- #central moment or not?
- mom = sum_outer_product_loop(resid, self.group.group_iter)
- return mom / self.n_groups #df correction ?
- def whiten_groups(self, x, cholsigmainv_i):
- #from scipy import sparse #use sparse
- wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
- return wx
- def fit(self):
- res_pooled = self.fit_ols() #get starting estimate
- sigma_i = self.get_within_cov(res_pooled.resid)
- self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
- wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
- wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
- #print wendog.shape, wexog.shape
- self.res1 = OLS(wendog, wexog).fit()
- return self.res1
- class ShortPanelGLS(GLS):
- '''Short Panel with general intertemporal within correlation
- assumes data is stacked by individuals, panel is balanced and
- within correlation structure is identical across individuals.
- It looks like this can just inherit GLS and overwrite whiten
- '''
- def __init__(self, endog, exog, group, sigma_i=None):
- self.group = GroupSorted(group)
- self.n_groups = self.group.n_groups
- #self.nobs_group = #list for unbalanced?
- nobs_i = len(endog) / self.n_groups #endog might later not be an ndarray
- #balanced only for now,
- #which is a requirement anyway in this case (full cov)
- #needs to change for parameterized sigma_i
- #
- if sigma_i is None:
- sigma_i = np.eye(nobs_i)
- self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
- #super is taking care of endog, exog and sigma
- super(self.__class__, self).__init__(endog, exog, sigma=None)
- def get_within_cov(self, resid):
- #central moment or not?
- mom = sum_outer_product_loop(resid, self.group.group_iter)
- return mom / self.n_groups #df correction ?
- def whiten_groups(self, x, cholsigmainv_i):
- #from scipy import sparse #use sparse
- wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
- return wx
- def _fit_ols(self):
- #used as starting estimate in old explicity version
- self.res_pooled = OLS(self.endog, self.exog).fit()
- return self.res_pooled #return or not
- def _fit_old(self):
- #old explicit version
- res_pooled = self._fit_ols() #get starting estimate
- sigma_i = self.get_within_cov(res_pooled.resid)
- self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
- wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
- wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
- self.res1 = OLS(wendog, wexog).fit()
- return self.res1
- def whiten(self, x):
- #whiten x by groups, will be applied to endog and exog
- wx = whiten_individuals_loop(x, self.cholsigmainv_i, self.group.group_iter)
- return wx
- #copied from GLSHet and adjusted (boiler plate?)
- def fit_iterative(self, maxiter=3):
- """
- Perform an iterative two-step procedure to estimate the GLS model.
- Parameters
- ----------
- maxiter : integer, optional
- the number of iterations
- Notes
- -----
- maxiter=1: returns the estimated based on given weights
- maxiter=2: performs a second estimation with the updated weights,
- this is 2-step estimation
- maxiter>2: iteratively estimate and update the weights
- TODO: possible extension stop iteration if change in parameter
- estimates is smaller than x_tol
- Repeated calls to fit_iterative, will do one redundant pinv_wexog
- calculation. Calling fit_iterative(maxiter) once does not do any
- redundant recalculations (whitening or calculating pinv_wexog).
- """
- #Note: in contrast to GLSHet, we don't have an auxilliary regression here
- # might be needed if there is more structure in cov_i
- #because we only have the loop we are not attaching the ols_pooled
- #initial estimate anymore compared to original version
- if maxiter < 1:
- raise ValueError('maxiter needs to be at least 1')
- import collections
- self.history = collections.defaultdict(list) #not really necessary
- for i in range(maxiter):
- #pinv_wexog is cached, delete it to force recalculation
- if hasattr(self, 'pinv_wexog'):
- del self.pinv_wexog
- #fit with current cov, GLS, i.e. OLS on whitened endog, exog
- results = self.fit()
- self.history['self_params'].append(results.params)
- if not i == maxiter-1: #skip for last iteration, could break instead
- #print 'ols',
- self.results_old = results #store previous results for debugging
- #get cov from residuals of previous regression
- sigma_i = self.get_within_cov(results.resid)
- self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
- #calculate new whitened endog and exog
- self.initialize()
- #note results is the wrapper, results._results is the results instance
- #results._results.results_residual_regression = res_resid
- return results
- if __name__ == '__main__':
- pass