/pandas/stats/tests/test_ols.py
Python | 980 lines | 700 code | 234 blank | 46 comment | 80 complexity | d0089b3a7061fbb1ada9b9149da44f22 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
- """
- Unit test suite for OLS and PanelOLS classes
- """
- # pylint: disable-msg=W0212
- # flake8: noqa
- from __future__ import division
- from datetime import datetime
- from pandas import compat
- from distutils.version import LooseVersion
- import nose
- import numpy as np
- from pandas import date_range, bdate_range
- from pandas.core.panel import Panel
- from pandas import DataFrame, Index, Series, notnull, datetools
- from pandas.stats.api import ols
- from pandas.stats.ols import _filter_data
- from pandas.stats.plm import NonPooledPanelOLS, PanelOLS
- from pandas.util.testing import (assert_almost_equal, assert_series_equal,
- assert_frame_equal, assertRaisesRegexp, slow)
- import pandas.util.testing as tm
- import pandas.compat as compat
- from .common import BaseTest
- _have_statsmodels = True
- try:
- import statsmodels.api as sm
- except ImportError:
- try:
- import scikits.statsmodels.api as sm
- except ImportError:
- _have_statsmodels = False
- def _check_repr(obj):
- repr(obj)
- str(obj)
- def _compare_ols_results(model1, model2):
- tm.assertIsInstance(model1, type(model2))
- if hasattr(model1, '_window_type'):
- _compare_moving_ols(model1, model2)
- else:
- _compare_fullsample_ols(model1, model2)
- def _compare_fullsample_ols(model1, model2):
- assert_series_equal(model1.beta, model2.beta)
- def _compare_moving_ols(model1, model2):
- assert_frame_equal(model1.beta, model2.beta)
- class TestOLS(BaseTest):
- _multiprocess_can_split_ = True
- # TODO: Add tests for OLS y predict
- # TODO: Right now we just check for consistency between full-sample and
- # rolling/expanding results of the panel OLS. We should also cross-check
- # with trusted implementations of panel OLS (e.g. R).
- # TODO: Add tests for non pooled OLS.
- @classmethod
- def setUpClass(cls):
- super(TestOLS, cls).setUpClass()
- try:
- import matplotlib as mpl
- mpl.use('Agg', warn=False)
- except ImportError:
- pass
- if not _have_statsmodels:
- raise nose.SkipTest("no statsmodels")
- def testOLSWithDatasets_ccard(self):
- self.checkDataSet(sm.datasets.ccard.load(), skip_moving=True)
- self.checkDataSet(sm.datasets.cpunish.load(), skip_moving=True)
- self.checkDataSet(sm.datasets.longley.load(), skip_moving=True)
- self.checkDataSet(sm.datasets.stackloss.load(), skip_moving=True)
- @slow
- def testOLSWithDatasets_copper(self):
- self.checkDataSet(sm.datasets.copper.load())
- @slow
- def testOLSWithDatasets_scotland(self):
- self.checkDataSet(sm.datasets.scotland.load())
- # degenerate case fails on some platforms
- # self.checkDataSet(datasets.ccard.load(), 39, 49) # one col in X all
- # 0s
- def testWLS(self):
- # WLS centered SS changed (fixed) in 0.5.0
- sm_version = sm.version.version
- if sm_version < LooseVersion('0.5.0'):
- raise nose.SkipTest("WLS centered SS not fixed in statsmodels"
- " version {0}".format(sm_version))
- X = DataFrame(np.random.randn(30, 4), columns=['A', 'B', 'C', 'D'])
- Y = Series(np.random.randn(30))
- weights = X.std(1)
- self._check_wls(X, Y, weights)
- weights.ix[[5, 15]] = np.nan
- Y[[2, 21]] = np.nan
- self._check_wls(X, Y, weights)
- def _check_wls(self, x, y, weights):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=y, x=x, weights=1 / weights)
- combined = x.copy()
- combined['__y__'] = y
- combined['__weights__'] = weights
- combined = combined.dropna()
- endog = combined.pop('__y__').values
- aweights = combined.pop('__weights__').values
- exog = sm.add_constant(combined.values, prepend=False)
- sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit()
- assert_almost_equal(sm_result.params, result._beta_raw)
- assert_almost_equal(sm_result.resid, result._resid_raw)
- self.checkMovingOLS('rolling', x, y, weights=weights)
- self.checkMovingOLS('expanding', x, y, weights=weights)
- def checkDataSet(self, dataset, start=None, end=None, skip_moving=False):
- exog = dataset.exog[start: end]
- endog = dataset.endog[start: end]
- x = DataFrame(exog, index=np.arange(exog.shape[0]),
- columns=np.arange(exog.shape[1]))
- y = Series(endog, index=np.arange(len(endog)))
- self.checkOLS(exog, endog, x, y)
- if not skip_moving:
- self.checkMovingOLS('rolling', x, y)
- self.checkMovingOLS('rolling', x, y, nw_lags=0)
- self.checkMovingOLS('expanding', x, y, nw_lags=0)
- self.checkMovingOLS('rolling', x, y, nw_lags=1)
- self.checkMovingOLS('expanding', x, y, nw_lags=1)
- self.checkMovingOLS('expanding', x, y, nw_lags=1, nw_overlap=True)
- def checkOLS(self, exog, endog, x, y):
- reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=y, x=x)
- # check that sparse version is the same
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
- _compare_ols_results(result, sparse_result)
- assert_almost_equal(reference.params, result._beta_raw)
- assert_almost_equal(reference.df_model, result._df_model_raw)
- assert_almost_equal(reference.df_resid, result._df_resid_raw)
- assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
- assert_almost_equal(reference.pvalues, result._p_value_raw)
- assert_almost_equal(reference.rsquared, result._r2_raw)
- assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
- assert_almost_equal(reference.resid, result._resid_raw)
- assert_almost_equal(reference.bse, result._std_err_raw)
- assert_almost_equal(reference.tvalues, result._t_stat_raw)
- assert_almost_equal(reference.cov_params(), result._var_beta_raw)
- assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)
- _check_non_raw_results(result)
- def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
- window = np.linalg.matrix_rank(x.values) * 2
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- moving = ols(y=y, x=x, weights=weights, window_type=window_type,
- window=window, **kwds)
- # check that sparse version is the same
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(),
- weights=weights,
- window_type=window_type,
- window=window, **kwds)
- _compare_ols_results(moving, sparse_moving)
- index = moving._index
- for n, i in enumerate(moving._valid_indices):
- if window_type == 'rolling' and i >= window:
- prior_date = index[i - window + 1]
- else:
- prior_date = index[0]
- date = index[i]
- x_iter = {}
- for k, v in compat.iteritems(x):
- x_iter[k] = v.truncate(before=prior_date, after=date)
- y_iter = y.truncate(before=prior_date, after=date)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)
- self.compare(static, moving, event_index=i,
- result_index=n)
- _check_non_raw_results(moving)
- FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value',
- 'r2', 'r2_adj', 'rmse', 'std_err', 't_stat',
- 'var_beta']
- def compare(self, static, moving, event_index=None,
- result_index=None):
- index = moving._index
- # Check resid if we have a time index specified
- if event_index is not None:
- ref = static._resid_raw[-1]
- label = index[event_index]
- res = moving.resid[label]
- assert_almost_equal(ref, res)
- ref = static._y_fitted_raw[-1]
- res = moving.y_fitted[label]
- assert_almost_equal(ref, res)
- # Check y_fitted
- for field in self.FIELDS:
- attr = '_%s_raw' % field
- ref = getattr(static, attr)
- res = getattr(moving, attr)
- if result_index is not None:
- res = res[result_index]
- assert_almost_equal(ref, res)
- def test_ols_object_dtype(self):
- df = DataFrame(np.random.randn(20, 2), dtype=object)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=df[0], x=df[1])
- summary = repr(model)
- class TestOLSMisc(tm.TestCase):
- _multiprocess_can_split_ = True
- """
- For test coverage with faux data
- """
- @classmethod
- def setUpClass(cls):
- super(TestOLSMisc, cls).setUpClass()
- if not _have_statsmodels:
- raise nose.SkipTest("no statsmodels")
- def test_f_test(self):
- x = tm.makeTimeDataFrame()
- y = x.pop('A')
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x)
- hyp = '1*B+1*C+1*D=0'
- result = model.f_test(hyp)
- hyp = ['1*B=0',
- '1*C=0',
- '1*D=0']
- result = model.f_test(hyp)
- assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
- self.assertRaises(Exception, model.f_test, '1*A=0')
- def test_r2_no_intercept(self):
- y = tm.makeTimeSeries()
- x = tm.makeTimeDataFrame()
- x_with = x.copy()
- x_with['intercept'] = 1.
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model1 = ols(y=y, x=x)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model2 = ols(y=y, x=x_with, intercept=False)
- assert_series_equal(model1.beta, model2.beta)
- # TODO: can we infer whether the intercept is there...
- self.assertNotEqual(model1.r2, model2.r2)
- # rolling
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model1 = ols(y=y, x=x, window=20)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model2 = ols(y=y, x=x_with, window=20, intercept=False)
- assert_frame_equal(model1.beta, model2.beta)
- self.assertTrue((model1.r2 != model2.r2).all())
- def test_summary_many_terms(self):
- x = DataFrame(np.random.randn(100, 20))
- y = np.random.randn(100)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x)
- model.summary
- def test_y_predict(self):
- y = tm.makeTimeSeries()
- x = tm.makeTimeDataFrame()
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model1 = ols(y=y, x=x)
- assert_series_equal(model1.y_predict, model1.y_fitted)
- assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
- def test_predict(self):
- y = tm.makeTimeSeries()
- x = tm.makeTimeDataFrame()
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model1 = ols(y=y, x=x)
- assert_series_equal(model1.predict(), model1.y_predict)
- assert_series_equal(model1.predict(x=x), model1.y_predict)
- assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)
- exog = x.copy()
- exog['intercept'] = 1.
- rs = Series(np.dot(exog.values, model1.beta.values), x.index)
- assert_series_equal(model1.y_predict, rs)
- x2 = x.reindex(columns=x.columns[::-1])
- assert_series_equal(model1.predict(x=x2), model1.y_predict)
- x3 = x2 + 10
- pred3 = model1.predict(x=x3)
- x3['intercept'] = 1.
- x3 = x3.reindex(columns=model1.beta.index)
- expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
- assert_series_equal(expected, pred3)
- beta = Series(0., model1.beta.index)
- pred4 = model1.predict(beta=beta)
- assert_series_equal(Series(0., pred4.index), pred4)
- def test_predict_longer_exog(self):
- exogenous = {"1998": "4760", "1999": "5904", "2000": "4504",
- "2001": "9808", "2002": "4241", "2003": "4086",
- "2004": "4687", "2005": "7686", "2006": "3740",
- "2007": "3075", "2008": "3753", "2009": "4679",
- "2010": "5468", "2011": "7154", "2012": "4292",
- "2013": "4283", "2014": "4595", "2015": "9194",
- "2016": "4221", "2017": "4520"}
- endogenous = {"1998": "691", "1999": "1580", "2000": "80",
- "2001": "1450", "2002": "555", "2003": "956",
- "2004": "877", "2005": "614", "2006": "468",
- "2007": "191"}
- endog = Series(endogenous)
- exog = Series(exogenous)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=endog, x=exog)
- pred = model.y_predict
- self.assert_index_equal(pred.index, exog.index)
- def test_longpanel_series_combo(self):
- wp = tm.makePanel()
- lp = wp.to_frame()
- y = lp.pop('ItemA')
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=lp, entity_effects=True, window=20)
- self.assertTrue(notnull(model.beta.values).all())
- tm.assertIsInstance(model, PanelOLS)
- model.summary
- def test_series_rhs(self):
- y = tm.makeTimeSeries()
- x = tm.makeTimeSeries()
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- expected = ols(y=y, x={'x': x})
- assert_series_equal(model.beta, expected.beta)
- # GH 5233/5250
- assert_series_equal(model.y_predict, model.predict(x=x))
- def test_various_attributes(self):
- # just make sure everything "works". test correctness elsewhere
- x = DataFrame(np.random.randn(100, 5))
- y = np.random.randn(100)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x, window=20)
- series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol']
- for attr in series_attrs:
- value = getattr(model, attr)
- tm.assertIsInstance(value, Series)
- # works
- model._results
- def test_catch_regressor_overlap(self):
- df1 = tm.makeTimeDataFrame().ix[:, ['A', 'B']]
- df2 = tm.makeTimeDataFrame().ix[:, ['B', 'C', 'D']]
- y = tm.makeTimeSeries()
- data = {'foo': df1, 'bar': df2}
- def f():
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- ols(y=y, x=data)
- self.assertRaises(Exception, f)
- def test_plm_ctor(self):
- y = tm.makeTimeDataFrame()
- x = {'a': tm.makeTimeDataFrame(),
- 'b': tm.makeTimeDataFrame()}
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x, intercept=False)
- model.summary
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=Panel(x))
- model.summary
- def test_plm_attrs(self):
- y = tm.makeTimeDataFrame()
- x = {'a': tm.makeTimeDataFrame(),
- 'b': tm.makeTimeDataFrame()}
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- rmodel = ols(y=y, x=x, window=10)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x)
- model.resid
- rmodel.resid
- def test_plm_lagged_y_predict(self):
- y = tm.makeTimeDataFrame()
- x = {'a': tm.makeTimeDataFrame(),
- 'b': tm.makeTimeDataFrame()}
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x, window=10)
- result = model.lagged_y_predict(2)
- def test_plm_f_test(self):
- y = tm.makeTimeDataFrame()
- x = {'a': tm.makeTimeDataFrame(),
- 'b': tm.makeTimeDataFrame()}
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=y, x=x)
- hyp = '1*a+1*b=0'
- result = model.f_test(hyp)
- hyp = ['1*a=0',
- '1*b=0']
- result = model.f_test(hyp)
- assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
- def test_plm_exclude_dummy_corner(self):
- y = tm.makeTimeDataFrame()
- x = {'a': tm.makeTimeDataFrame(),
- 'b': tm.makeTimeDataFrame()}
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(
- y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'})
- model.summary
- def f():
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- ols(y=y, x=x, entity_effects=True,
- dropped_dummies={'entity': 'E'})
- self.assertRaises(Exception, f)
- def test_columns_tuples_summary(self):
- # #1837
- X = DataFrame(np.random.randn(10, 2), columns=[('a', 'b'), ('c', 'd')])
- Y = Series(np.random.randn(10))
- # it works!
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- model = ols(y=Y, x=X)
- model.summary
- class TestPanelOLS(BaseTest):
- _multiprocess_can_split_ = True
- FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat',
- 'p_value', 'r2', 'r2_adj', 'rmse', 'std_err',
- 't_stat', 'var_beta']
- _other_fields = ['resid', 'y_fitted']
- def testFiltering(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y2, x=self.panel_x2)
- x = result._x
- index = x.index.get_level_values(0)
- index = Index(sorted(set(index)))
- exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
- self.assert_index_equal(exp_index, index)
- index = x.index.get_level_values(1)
- index = Index(sorted(set(index)))
- exp_index = Index(['A', 'B'])
- self.assert_index_equal(exp_index, index)
- x = result._x_filtered
- index = x.index.get_level_values(0)
- index = Index(sorted(set(index)))
- exp_index = Index([datetime(2000, 1, 1),
- datetime(2000, 1, 3),
- datetime(2000, 1, 4)])
- self.assert_index_equal(exp_index, index)
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 4, 5],
- check_dtype=False)
- exp_x = np.array([[6, 14, 1], [9, 17, 1],
- [30, 48, 1]], dtype=np.float64)
- assert_almost_equal(exp_x, result._x.values)
- exp_x_filtered = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1],
- [11, 20, 1], [12, 21, 1]], dtype=np.float64)
- assert_almost_equal(exp_x_filtered, result._x_filtered.values)
- self.assert_index_equal(result._x_filtered.index.levels[0],
- result.y_fitted.index)
- def test_wls_panel(self):
- y = tm.makeTimeDataFrame()
- x = Panel({'x1': tm.makeTimeDataFrame(),
- 'x2': tm.makeTimeDataFrame()})
- y.ix[[1, 7], 'A'] = np.nan
- y.ix[[6, 15], 'B'] = np.nan
- y.ix[[3, 20], 'C'] = np.nan
- y.ix[[5, 11], 'D'] = np.nan
- stack_y = y.stack()
- stack_x = DataFrame(dict((k, v.stack())
- for k, v in x.iteritems()))
- weights = x.std('items')
- stack_weights = weights.stack()
- stack_y.index = stack_y.index._tuple_index
- stack_x.index = stack_x.index._tuple_index
- stack_weights.index = stack_weights.index._tuple_index
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=y, x=x, weights=1 / weights)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)
- assert_almost_equal(result.beta, expected.beta)
- for attr in ['resid', 'y_fitted']:
- rvals = getattr(result, attr).stack().values
- evals = getattr(expected, attr).values
- assert_almost_equal(rvals, evals)
- def testWithTimeEffects(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True)
- # .flat is flatiter instance
- assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5],
- check_dtype=False)
- exp_x = np.array([[0, 0], [-10.5, -15.5], [10.5, 15.5]])
- assert_almost_equal(result._x_trans.values, exp_x)
- # _check_non_raw_results(result)
- def testWithEntityEffects(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True)
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 4, 5],
- check_dtype=False)
- exp_x = DataFrame([[0., 6., 14., 1.], [0, 9, 17, 1], [1, 30, 48, 1]],
- index=result._x.index, columns=['FE_B', 'x1', 'x2',
- 'intercept'],
- dtype=float)
- tm.assert_frame_equal(result._x, exp_x.ix[:, result._x.columns])
- # _check_non_raw_results(result)
- def testWithEntityEffectsAndDroppedDummies(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True,
- dropped_dummies={'entity': 'B'})
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 4, 5],
- check_dtype=False)
- exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]],
- index=result._x.index, columns=['FE_A', 'x1', 'x2',
- 'intercept'],
- dtype=float)
- tm.assert_frame_equal(result._x, exp_x.ix[:, result._x.columns])
- # _check_non_raw_results(result)
- def testWithXEffects(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'])
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 4, 5],
- check_dtype=False)
- res = result._x
- exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]],
- columns=['x1_30', 'x1_9', 'x2', 'intercept'],
- index=res.index, dtype=float)
- assert_frame_equal(res, exp_x.reindex(columns=res.columns))
- def testWithXEffectsAndDroppedDummies(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'],
- dropped_dummies={'x1': 30})
- res = result._x
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 4, 5],
- check_dtype=False)
- exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]],
- columns=['x1_6', 'x1_9', 'x2', 'intercept'],
- index=res.index, dtype=float)
- assert_frame_equal(res, exp_x.reindex(columns=res.columns))
- def testWithXEffectsAndConversion(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y3, x=self.panel_x3,
- x_effects=['x1', 'x2'])
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 2, 3, 4],
- check_dtype=False)
- exp_x = np.array([[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1],
- [0, 0, 0, 1, 1]], dtype=np.float64)
- assert_almost_equal(result._x.values, exp_x)
- exp_index = Index(['x1_B', 'x1_C', 'x2_baz', 'x2_foo', 'intercept'])
- self.assert_index_equal(exp_index, result._x.columns)
- # _check_non_raw_results(result)
- def testWithXEffectsAndConversionAndDroppedDummies(self):
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'],
- dropped_dummies={'x2': 'foo'})
- # .flat is flatiter instance
- assert_almost_equal(result._y.values.flat, [1, 2, 3, 4],
- check_dtype=False)
- exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1],
- [0, 0, 0, 0, 1]], dtype=np.float64)
- assert_almost_equal(result._x.values, exp_x)
- exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept'])
- self.assert_index_equal(exp_index, result._x.columns)
- # _check_non_raw_results(result)
- def testForSeries(self):
- self.checkForSeries(self.series_panel_x, self.series_panel_y,
- self.series_x, self.series_y)
- self.checkForSeries(self.series_panel_x, self.series_panel_y,
- self.series_x, self.series_y, nw_lags=0)
- self.checkForSeries(self.series_panel_x, self.series_panel_y,
- self.series_x, self.series_y, nw_lags=1,
- nw_overlap=True)
- def testRolling(self):
- self.checkMovingOLS(self.panel_x, self.panel_y)
- def testRollingWithFixedEffects(self):
- self.checkMovingOLS(self.panel_x, self.panel_y,
- entity_effects=True)
- self.checkMovingOLS(self.panel_x, self.panel_y, intercept=False,
- entity_effects=True)
- def testRollingWithTimeEffects(self):
- self.checkMovingOLS(self.panel_x, self.panel_y,
- time_effects=True)
- def testRollingWithNeweyWest(self):
- self.checkMovingOLS(self.panel_x, self.panel_y,
- nw_lags=1)
- def testRollingWithEntityCluster(self):
- self.checkMovingOLS(self.panel_x, self.panel_y,
- cluster='entity')
- def testUnknownClusterRaisesValueError(self):
- assertRaisesRegexp(ValueError, "Unrecognized cluster.*ridiculous",
- self.checkMovingOLS, self.panel_x, self.panel_y,
- cluster='ridiculous')
- def testRollingWithTimeEffectsAndEntityCluster(self):
- self.checkMovingOLS(self.panel_x, self.panel_y,
- time_effects=True, cluster='entity')
- def testRollingWithTimeCluster(self):
- self.checkMovingOLS(self.panel_x, self.panel_y,
- cluster='time')
- def testRollingWithNeweyWestAndEntityCluster(self):
- self.assertRaises(ValueError, self.checkMovingOLS,
- self.panel_x, self.panel_y,
- nw_lags=1, cluster='entity')
- def testRollingWithNeweyWestAndTimeEffectsAndEntityCluster(self):
- self.assertRaises(ValueError,
- self.checkMovingOLS, self.panel_x, self.panel_y,
- nw_lags=1, cluster='entity',
- time_effects=True)
- def testExpanding(self):
- self.checkMovingOLS(
- self.panel_x, self.panel_y, window_type='expanding')
- def testNonPooled(self):
- self.checkNonPooled(y=self.panel_y, x=self.panel_x)
- self.checkNonPooled(y=self.panel_y, x=self.panel_x,
- window_type='rolling', window=25, min_periods=10)
- def testUnknownWindowType(self):
- assertRaisesRegexp(ValueError, "window.*ridiculous",
- self.checkNonPooled, y=self.panel_y, x=self.panel_x,
- window_type='ridiculous', window=25, min_periods=10)
- def checkNonPooled(self, x, y, **kwds):
- # For now, just check that it doesn't crash
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=y, x=x, pool=False, **kwds)
- _check_repr(result)
- for attr in NonPooledPanelOLS.ATTRIBUTES:
- _check_repr(getattr(result, attr))
- def checkMovingOLS(self, x, y, window_type='rolling', **kwds):
- window = 25 # must be larger than rank of x
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- moving = ols(y=y, x=x, window_type=window_type,
- window=window, **kwds)
- index = moving._index
- for n, i in enumerate(moving._valid_indices):
- if window_type == 'rolling' and i >= window:
- prior_date = index[i - window + 1]
- else:
- prior_date = index[0]
- date = index[i]
- x_iter = {}
- for k, v in compat.iteritems(x):
- x_iter[k] = v.truncate(before=prior_date, after=date)
- y_iter = y.truncate(before=prior_date, after=date)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- static = ols(y=y_iter, x=x_iter, **kwds)
- self.compare(static, moving, event_index=i,
- result_index=n)
- _check_non_raw_results(moving)
- def checkForSeries(self, x, y, series_x, series_y, **kwds):
- # Consistency check with simple OLS.
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- result = ols(y=y, x=x, **kwds)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- reference = ols(y=series_y, x=series_x, **kwds)
- self.compare(reference, result)
- def compare(self, static, moving, event_index=None,
- result_index=None):
- # Check resid if we have a time index specified
- if event_index is not None:
- staticSlice = _period_slice(static, -1)
- movingSlice = _period_slice(moving, event_index)
- ref = static._resid_raw[staticSlice]
- res = moving._resid_raw[movingSlice]
- assert_almost_equal(ref, res)
- ref = static._y_fitted_raw[staticSlice]
- res = moving._y_fitted_raw[movingSlice]
- assert_almost_equal(ref, res)
- # Check y_fitted
- for field in self.FIELDS:
- attr = '_%s_raw' % field
- ref = getattr(static, attr)
- res = getattr(moving, attr)
- if result_index is not None:
- res = res[result_index]
- assert_almost_equal(ref, res)
- def test_auto_rolling_window_type(self):
- data = tm.makeTimeDataFrame()
- y = data.pop('A')
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- window_model = ols(y=y, x=data, window=20, min_periods=10)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- rolling_model = ols(y=y, x=data, window=20, min_periods=10,
- window_type='rolling')
- assert_frame_equal(window_model.beta, rolling_model.beta)
- def test_group_agg(self):
- from pandas.stats.plm import _group_agg
- values = np.ones((10, 2)) * np.arange(10).reshape((10, 1))
- bounds = np.arange(5) * 2
- f = lambda x: x.mean(axis=0)
- agged = _group_agg(values, bounds, f)
- assert(agged[1][0] == 2.5)
- assert(agged[2][0] == 4.5)
- # test a function that doesn't aggregate
- f2 = lambda x: np.zeros((2, 2))
- self.assertRaises(Exception, _group_agg, values, bounds, f2)
- def _check_non_raw_results(model):
- _check_repr(model)
- _check_repr(model.resid)
- _check_repr(model.summary_as_matrix)
- _check_repr(model.y_fitted)
- _check_repr(model.y_predict)
- def _period_slice(panelModel, i):
- index = panelModel._x_trans.index
- period = index.levels[0][i]
- L, R = index.get_major_bounds(period, period)
- return slice(L, R)
- class TestOLSFilter(tm.TestCase):
- _multiprocess_can_split_ = True
- def setUp(self):
- date_index = date_range(datetime(2009, 12, 11), periods=3,
- freq=datetools.bday)
- ts = Series([3, 1, 4], index=date_index)
- self.TS1 = ts
- date_index = date_range(datetime(2009, 12, 11), periods=5,
- freq=datetools.bday)
- ts = Series([1, 5, 9, 2, 6], index=date_index)
- self.TS2 = ts
- date_index = date_range(datetime(2009, 12, 11), periods=3,
- freq=datetools.bday)
- ts = Series([5, np.nan, 3], index=date_index)
- self.TS3 = ts
- date_index = date_range(datetime(2009, 12, 11), periods=5,
- freq=datetools.bday)
- ts = Series([np.nan, 5, 8, 9, 7], index=date_index)
- self.TS4 = ts
- data = {'x1': self.TS2, 'x2': self.TS4}
- self.DF1 = DataFrame(data=data)
- data = {'x1': self.TS2, 'x2': self.TS4}
- self.DICT1 = data
- def testFilterWithSeriesRHS(self):
- (lhs, rhs, weights, rhs_pre,
- index, valid) = _filter_data(self.TS1, {'x1': self.TS2}, None)
- self.tsAssertEqual(self.TS1.astype(np.float64), lhs, check_names=False)
- self.tsAssertEqual(self.TS2[:3].astype(np.float64), rhs['x1'],
- check_names=False)
- self.tsAssertEqual(self.TS2.astype(np.float64), rhs_pre['x1'],
- check_names=False)
- def testFilterWithSeriesRHS2(self):
- (lhs, rhs, weights, rhs_pre,
- index, valid) = _filter_data(self.TS2, {'x1': self.TS1}, None)
- self.tsAssertEqual(self.TS2[:3].astype(np.float64), lhs,
- check_names=False)
- self.tsAssertEqual(self.TS1.astype(np.float64), rhs['x1'],
- check_names=False)
- self.tsAssertEqual(self.TS1.astype(np.float64), rhs_pre['x1'],
- check_names=False)
- def testFilterWithSeriesRHS3(self):
- (lhs, rhs, weights, rhs_pre,
- index, valid) = _filter_data(self.TS3, {'x1': self.TS4}, None)
- exp_lhs = self.TS3[2:3]
- exp_rhs = self.TS4[2:3]
- exp_rhs_pre = self.TS4[1:]
- self.tsAssertEqual(exp_lhs, lhs, check_names=False)
- self.tsAssertEqual(exp_rhs, rhs['x1'], check_names=False)
- self.tsAssertEqual(exp_rhs_pre, rhs_pre['x1'], check_names=False)
- def testFilterWithDataFrameRHS(self):
- (lhs, rhs, weights, rhs_pre,
- index, valid) = _filter_data(self.TS1, self.DF1, None)
- exp_lhs = self.TS1[1:].astype(np.float64)
- exp_rhs1 = self.TS2[1:3]
- exp_rhs2 = self.TS4[1:3].astype(np.float64)
- self.tsAssertEqual(exp_lhs, lhs, check_names=False)
- self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False)
- self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False)
- def testFilterWithDictRHS(self):
- (lhs, rhs, weights, rhs_pre,
- index, valid) = _filter_data(self.TS1, self.DICT1, None)
- exp_lhs = self.TS1[1:].astype(np.float64)
- exp_rhs1 = self.TS2[1:3].astype(np.float64)
- exp_rhs2 = self.TS4[1:3].astype(np.float64)
- self.tsAssertEqual(exp_lhs, lhs, check_names=False)
- self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False)
- self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False)
- def tsAssertEqual(self, ts1, ts2, **kwargs):
- self.assert_series_equal(ts1, ts2, **kwargs)
- if __name__ == '__main__':
- import nose
- nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
- exit=False)