PageRenderTime 69ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/stats/tests/test_ols.py

http://github.com/wesm/pandas
Python | 980 lines | 700 code | 234 blank | 46 comment | 80 complexity | d0089b3a7061fbb1ada9b9149da44f22 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. """
  2. Unit test suite for OLS and PanelOLS classes
  3. """
  4. # pylint: disable-msg=W0212
  5. # flake8: noqa
  6. from __future__ import division
  7. from datetime import datetime
  8. from pandas import compat
  9. from distutils.version import LooseVersion
  10. import nose
  11. import numpy as np
  12. from pandas import date_range, bdate_range
  13. from pandas.core.panel import Panel
  14. from pandas import DataFrame, Index, Series, notnull, datetools
  15. from pandas.stats.api import ols
  16. from pandas.stats.ols import _filter_data
  17. from pandas.stats.plm import NonPooledPanelOLS, PanelOLS
  18. from pandas.util.testing import (assert_almost_equal, assert_series_equal,
  19. assert_frame_equal, assertRaisesRegexp, slow)
  20. import pandas.util.testing as tm
  21. import pandas.compat as compat
  22. from .common import BaseTest
  23. _have_statsmodels = True
  24. try:
  25. import statsmodels.api as sm
  26. except ImportError:
  27. try:
  28. import scikits.statsmodels.api as sm
  29. except ImportError:
  30. _have_statsmodels = False
  31. def _check_repr(obj):
  32. repr(obj)
  33. str(obj)
  34. def _compare_ols_results(model1, model2):
  35. tm.assertIsInstance(model1, type(model2))
  36. if hasattr(model1, '_window_type'):
  37. _compare_moving_ols(model1, model2)
  38. else:
  39. _compare_fullsample_ols(model1, model2)
  40. def _compare_fullsample_ols(model1, model2):
  41. assert_series_equal(model1.beta, model2.beta)
  42. def _compare_moving_ols(model1, model2):
  43. assert_frame_equal(model1.beta, model2.beta)
  44. class TestOLS(BaseTest):
  45. _multiprocess_can_split_ = True
  46. # TODO: Add tests for OLS y predict
  47. # TODO: Right now we just check for consistency between full-sample and
  48. # rolling/expanding results of the panel OLS. We should also cross-check
  49. # with trusted implementations of panel OLS (e.g. R).
  50. # TODO: Add tests for non pooled OLS.
  51. @classmethod
  52. def setUpClass(cls):
  53. super(TestOLS, cls).setUpClass()
  54. try:
  55. import matplotlib as mpl
  56. mpl.use('Agg', warn=False)
  57. except ImportError:
  58. pass
  59. if not _have_statsmodels:
  60. raise nose.SkipTest("no statsmodels")
  61. def testOLSWithDatasets_ccard(self):
  62. self.checkDataSet(sm.datasets.ccard.load(), skip_moving=True)
  63. self.checkDataSet(sm.datasets.cpunish.load(), skip_moving=True)
  64. self.checkDataSet(sm.datasets.longley.load(), skip_moving=True)
  65. self.checkDataSet(sm.datasets.stackloss.load(), skip_moving=True)
  66. @slow
  67. def testOLSWithDatasets_copper(self):
  68. self.checkDataSet(sm.datasets.copper.load())
  69. @slow
  70. def testOLSWithDatasets_scotland(self):
  71. self.checkDataSet(sm.datasets.scotland.load())
  72. # degenerate case fails on some platforms
  73. # self.checkDataSet(datasets.ccard.load(), 39, 49) # one col in X all
  74. # 0s
  75. def testWLS(self):
  76. # WLS centered SS changed (fixed) in 0.5.0
  77. sm_version = sm.version.version
  78. if sm_version < LooseVersion('0.5.0'):
  79. raise nose.SkipTest("WLS centered SS not fixed in statsmodels"
  80. " version {0}".format(sm_version))
  81. X = DataFrame(np.random.randn(30, 4), columns=['A', 'B', 'C', 'D'])
  82. Y = Series(np.random.randn(30))
  83. weights = X.std(1)
  84. self._check_wls(X, Y, weights)
  85. weights.ix[[5, 15]] = np.nan
  86. Y[[2, 21]] = np.nan
  87. self._check_wls(X, Y, weights)
  88. def _check_wls(self, x, y, weights):
  89. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  90. result = ols(y=y, x=x, weights=1 / weights)
  91. combined = x.copy()
  92. combined['__y__'] = y
  93. combined['__weights__'] = weights
  94. combined = combined.dropna()
  95. endog = combined.pop('__y__').values
  96. aweights = combined.pop('__weights__').values
  97. exog = sm.add_constant(combined.values, prepend=False)
  98. sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit()
  99. assert_almost_equal(sm_result.params, result._beta_raw)
  100. assert_almost_equal(sm_result.resid, result._resid_raw)
  101. self.checkMovingOLS('rolling', x, y, weights=weights)
  102. self.checkMovingOLS('expanding', x, y, weights=weights)
  103. def checkDataSet(self, dataset, start=None, end=None, skip_moving=False):
  104. exog = dataset.exog[start: end]
  105. endog = dataset.endog[start: end]
  106. x = DataFrame(exog, index=np.arange(exog.shape[0]),
  107. columns=np.arange(exog.shape[1]))
  108. y = Series(endog, index=np.arange(len(endog)))
  109. self.checkOLS(exog, endog, x, y)
  110. if not skip_moving:
  111. self.checkMovingOLS('rolling', x, y)
  112. self.checkMovingOLS('rolling', x, y, nw_lags=0)
  113. self.checkMovingOLS('expanding', x, y, nw_lags=0)
  114. self.checkMovingOLS('rolling', x, y, nw_lags=1)
  115. self.checkMovingOLS('expanding', x, y, nw_lags=1)
  116. self.checkMovingOLS('expanding', x, y, nw_lags=1, nw_overlap=True)
  117. def checkOLS(self, exog, endog, x, y):
  118. reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
  119. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  120. result = ols(y=y, x=x)
  121. # check that sparse version is the same
  122. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  123. sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
  124. _compare_ols_results(result, sparse_result)
  125. assert_almost_equal(reference.params, result._beta_raw)
  126. assert_almost_equal(reference.df_model, result._df_model_raw)
  127. assert_almost_equal(reference.df_resid, result._df_resid_raw)
  128. assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
  129. assert_almost_equal(reference.pvalues, result._p_value_raw)
  130. assert_almost_equal(reference.rsquared, result._r2_raw)
  131. assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
  132. assert_almost_equal(reference.resid, result._resid_raw)
  133. assert_almost_equal(reference.bse, result._std_err_raw)
  134. assert_almost_equal(reference.tvalues, result._t_stat_raw)
  135. assert_almost_equal(reference.cov_params(), result._var_beta_raw)
  136. assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)
  137. _check_non_raw_results(result)
  138. def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
  139. window = np.linalg.matrix_rank(x.values) * 2
  140. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  141. moving = ols(y=y, x=x, weights=weights, window_type=window_type,
  142. window=window, **kwds)
  143. # check that sparse version is the same
  144. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  145. sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(),
  146. weights=weights,
  147. window_type=window_type,
  148. window=window, **kwds)
  149. _compare_ols_results(moving, sparse_moving)
  150. index = moving._index
  151. for n, i in enumerate(moving._valid_indices):
  152. if window_type == 'rolling' and i >= window:
  153. prior_date = index[i - window + 1]
  154. else:
  155. prior_date = index[0]
  156. date = index[i]
  157. x_iter = {}
  158. for k, v in compat.iteritems(x):
  159. x_iter[k] = v.truncate(before=prior_date, after=date)
  160. y_iter = y.truncate(before=prior_date, after=date)
  161. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  162. static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)
  163. self.compare(static, moving, event_index=i,
  164. result_index=n)
  165. _check_non_raw_results(moving)
  166. FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value',
  167. 'r2', 'r2_adj', 'rmse', 'std_err', 't_stat',
  168. 'var_beta']
  169. def compare(self, static, moving, event_index=None,
  170. result_index=None):
  171. index = moving._index
  172. # Check resid if we have a time index specified
  173. if event_index is not None:
  174. ref = static._resid_raw[-1]
  175. label = index[event_index]
  176. res = moving.resid[label]
  177. assert_almost_equal(ref, res)
  178. ref = static._y_fitted_raw[-1]
  179. res = moving.y_fitted[label]
  180. assert_almost_equal(ref, res)
  181. # Check y_fitted
  182. for field in self.FIELDS:
  183. attr = '_%s_raw' % field
  184. ref = getattr(static, attr)
  185. res = getattr(moving, attr)
  186. if result_index is not None:
  187. res = res[result_index]
  188. assert_almost_equal(ref, res)
  189. def test_ols_object_dtype(self):
  190. df = DataFrame(np.random.randn(20, 2), dtype=object)
  191. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  192. model = ols(y=df[0], x=df[1])
  193. summary = repr(model)
  194. class TestOLSMisc(tm.TestCase):
  195. _multiprocess_can_split_ = True
  196. """
  197. For test coverage with faux data
  198. """
  199. @classmethod
  200. def setUpClass(cls):
  201. super(TestOLSMisc, cls).setUpClass()
  202. if not _have_statsmodels:
  203. raise nose.SkipTest("no statsmodels")
  204. def test_f_test(self):
  205. x = tm.makeTimeDataFrame()
  206. y = x.pop('A')
  207. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  208. model = ols(y=y, x=x)
  209. hyp = '1*B+1*C+1*D=0'
  210. result = model.f_test(hyp)
  211. hyp = ['1*B=0',
  212. '1*C=0',
  213. '1*D=0']
  214. result = model.f_test(hyp)
  215. assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
  216. self.assertRaises(Exception, model.f_test, '1*A=0')
  217. def test_r2_no_intercept(self):
  218. y = tm.makeTimeSeries()
  219. x = tm.makeTimeDataFrame()
  220. x_with = x.copy()
  221. x_with['intercept'] = 1.
  222. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  223. model1 = ols(y=y, x=x)
  224. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  225. model2 = ols(y=y, x=x_with, intercept=False)
  226. assert_series_equal(model1.beta, model2.beta)
  227. # TODO: can we infer whether the intercept is there...
  228. self.assertNotEqual(model1.r2, model2.r2)
  229. # rolling
  230. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  231. model1 = ols(y=y, x=x, window=20)
  232. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  233. model2 = ols(y=y, x=x_with, window=20, intercept=False)
  234. assert_frame_equal(model1.beta, model2.beta)
  235. self.assertTrue((model1.r2 != model2.r2).all())
  236. def test_summary_many_terms(self):
  237. x = DataFrame(np.random.randn(100, 20))
  238. y = np.random.randn(100)
  239. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  240. model = ols(y=y, x=x)
  241. model.summary
  242. def test_y_predict(self):
  243. y = tm.makeTimeSeries()
  244. x = tm.makeTimeDataFrame()
  245. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  246. model1 = ols(y=y, x=x)
  247. assert_series_equal(model1.y_predict, model1.y_fitted)
  248. assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
  249. def test_predict(self):
  250. y = tm.makeTimeSeries()
  251. x = tm.makeTimeDataFrame()
  252. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  253. model1 = ols(y=y, x=x)
  254. assert_series_equal(model1.predict(), model1.y_predict)
  255. assert_series_equal(model1.predict(x=x), model1.y_predict)
  256. assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)
  257. exog = x.copy()
  258. exog['intercept'] = 1.
  259. rs = Series(np.dot(exog.values, model1.beta.values), x.index)
  260. assert_series_equal(model1.y_predict, rs)
  261. x2 = x.reindex(columns=x.columns[::-1])
  262. assert_series_equal(model1.predict(x=x2), model1.y_predict)
  263. x3 = x2 + 10
  264. pred3 = model1.predict(x=x3)
  265. x3['intercept'] = 1.
  266. x3 = x3.reindex(columns=model1.beta.index)
  267. expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
  268. assert_series_equal(expected, pred3)
  269. beta = Series(0., model1.beta.index)
  270. pred4 = model1.predict(beta=beta)
  271. assert_series_equal(Series(0., pred4.index), pred4)
  272. def test_predict_longer_exog(self):
  273. exogenous = {"1998": "4760", "1999": "5904", "2000": "4504",
  274. "2001": "9808", "2002": "4241", "2003": "4086",
  275. "2004": "4687", "2005": "7686", "2006": "3740",
  276. "2007": "3075", "2008": "3753", "2009": "4679",
  277. "2010": "5468", "2011": "7154", "2012": "4292",
  278. "2013": "4283", "2014": "4595", "2015": "9194",
  279. "2016": "4221", "2017": "4520"}
  280. endogenous = {"1998": "691", "1999": "1580", "2000": "80",
  281. "2001": "1450", "2002": "555", "2003": "956",
  282. "2004": "877", "2005": "614", "2006": "468",
  283. "2007": "191"}
  284. endog = Series(endogenous)
  285. exog = Series(exogenous)
  286. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  287. model = ols(y=endog, x=exog)
  288. pred = model.y_predict
  289. self.assert_index_equal(pred.index, exog.index)
  290. def test_longpanel_series_combo(self):
  291. wp = tm.makePanel()
  292. lp = wp.to_frame()
  293. y = lp.pop('ItemA')
  294. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  295. model = ols(y=y, x=lp, entity_effects=True, window=20)
  296. self.assertTrue(notnull(model.beta.values).all())
  297. tm.assertIsInstance(model, PanelOLS)
  298. model.summary
  299. def test_series_rhs(self):
  300. y = tm.makeTimeSeries()
  301. x = tm.makeTimeSeries()
  302. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  303. model = ols(y=y, x=x)
  304. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  305. expected = ols(y=y, x={'x': x})
  306. assert_series_equal(model.beta, expected.beta)
  307. # GH 5233/5250
  308. assert_series_equal(model.y_predict, model.predict(x=x))
  309. def test_various_attributes(self):
  310. # just make sure everything "works". test correctness elsewhere
  311. x = DataFrame(np.random.randn(100, 5))
  312. y = np.random.randn(100)
  313. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  314. model = ols(y=y, x=x, window=20)
  315. series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol']
  316. for attr in series_attrs:
  317. value = getattr(model, attr)
  318. tm.assertIsInstance(value, Series)
  319. # works
  320. model._results
  321. def test_catch_regressor_overlap(self):
  322. df1 = tm.makeTimeDataFrame().ix[:, ['A', 'B']]
  323. df2 = tm.makeTimeDataFrame().ix[:, ['B', 'C', 'D']]
  324. y = tm.makeTimeSeries()
  325. data = {'foo': df1, 'bar': df2}
  326. def f():
  327. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  328. ols(y=y, x=data)
  329. self.assertRaises(Exception, f)
  330. def test_plm_ctor(self):
  331. y = tm.makeTimeDataFrame()
  332. x = {'a': tm.makeTimeDataFrame(),
  333. 'b': tm.makeTimeDataFrame()}
  334. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  335. model = ols(y=y, x=x, intercept=False)
  336. model.summary
  337. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  338. model = ols(y=y, x=Panel(x))
  339. model.summary
  340. def test_plm_attrs(self):
  341. y = tm.makeTimeDataFrame()
  342. x = {'a': tm.makeTimeDataFrame(),
  343. 'b': tm.makeTimeDataFrame()}
  344. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  345. rmodel = ols(y=y, x=x, window=10)
  346. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  347. model = ols(y=y, x=x)
  348. model.resid
  349. rmodel.resid
  350. def test_plm_lagged_y_predict(self):
  351. y = tm.makeTimeDataFrame()
  352. x = {'a': tm.makeTimeDataFrame(),
  353. 'b': tm.makeTimeDataFrame()}
  354. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  355. model = ols(y=y, x=x, window=10)
  356. result = model.lagged_y_predict(2)
  357. def test_plm_f_test(self):
  358. y = tm.makeTimeDataFrame()
  359. x = {'a': tm.makeTimeDataFrame(),
  360. 'b': tm.makeTimeDataFrame()}
  361. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  362. model = ols(y=y, x=x)
  363. hyp = '1*a+1*b=0'
  364. result = model.f_test(hyp)
  365. hyp = ['1*a=0',
  366. '1*b=0']
  367. result = model.f_test(hyp)
  368. assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
  369. def test_plm_exclude_dummy_corner(self):
  370. y = tm.makeTimeDataFrame()
  371. x = {'a': tm.makeTimeDataFrame(),
  372. 'b': tm.makeTimeDataFrame()}
  373. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  374. model = ols(
  375. y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'})
  376. model.summary
  377. def f():
  378. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  379. ols(y=y, x=x, entity_effects=True,
  380. dropped_dummies={'entity': 'E'})
  381. self.assertRaises(Exception, f)
  382. def test_columns_tuples_summary(self):
  383. # #1837
  384. X = DataFrame(np.random.randn(10, 2), columns=[('a', 'b'), ('c', 'd')])
  385. Y = Series(np.random.randn(10))
  386. # it works!
  387. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  388. model = ols(y=Y, x=X)
  389. model.summary
  390. class TestPanelOLS(BaseTest):
  391. _multiprocess_can_split_ = True
  392. FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat',
  393. 'p_value', 'r2', 'r2_adj', 'rmse', 'std_err',
  394. 't_stat', 'var_beta']
  395. _other_fields = ['resid', 'y_fitted']
  396. def testFiltering(self):
  397. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  398. result = ols(y=self.panel_y2, x=self.panel_x2)
  399. x = result._x
  400. index = x.index.get_level_values(0)
  401. index = Index(sorted(set(index)))
  402. exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
  403. self.assert_index_equal(exp_index, index)
  404. index = x.index.get_level_values(1)
  405. index = Index(sorted(set(index)))
  406. exp_index = Index(['A', 'B'])
  407. self.assert_index_equal(exp_index, index)
  408. x = result._x_filtered
  409. index = x.index.get_level_values(0)
  410. index = Index(sorted(set(index)))
  411. exp_index = Index([datetime(2000, 1, 1),
  412. datetime(2000, 1, 3),
  413. datetime(2000, 1, 4)])
  414. self.assert_index_equal(exp_index, index)
  415. # .flat is flatiter instance
  416. assert_almost_equal(result._y.values.flat, [1, 4, 5],
  417. check_dtype=False)
  418. exp_x = np.array([[6, 14, 1], [9, 17, 1],
  419. [30, 48, 1]], dtype=np.float64)
  420. assert_almost_equal(exp_x, result._x.values)
  421. exp_x_filtered = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1],
  422. [11, 20, 1], [12, 21, 1]], dtype=np.float64)
  423. assert_almost_equal(exp_x_filtered, result._x_filtered.values)
  424. self.assert_index_equal(result._x_filtered.index.levels[0],
  425. result.y_fitted.index)
  426. def test_wls_panel(self):
  427. y = tm.makeTimeDataFrame()
  428. x = Panel({'x1': tm.makeTimeDataFrame(),
  429. 'x2': tm.makeTimeDataFrame()})
  430. y.ix[[1, 7], 'A'] = np.nan
  431. y.ix[[6, 15], 'B'] = np.nan
  432. y.ix[[3, 20], 'C'] = np.nan
  433. y.ix[[5, 11], 'D'] = np.nan
  434. stack_y = y.stack()
  435. stack_x = DataFrame(dict((k, v.stack())
  436. for k, v in x.iteritems()))
  437. weights = x.std('items')
  438. stack_weights = weights.stack()
  439. stack_y.index = stack_y.index._tuple_index
  440. stack_x.index = stack_x.index._tuple_index
  441. stack_weights.index = stack_weights.index._tuple_index
  442. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  443. result = ols(y=y, x=x, weights=1 / weights)
  444. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  445. expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)
  446. assert_almost_equal(result.beta, expected.beta)
  447. for attr in ['resid', 'y_fitted']:
  448. rvals = getattr(result, attr).stack().values
  449. evals = getattr(expected, attr).values
  450. assert_almost_equal(rvals, evals)
  451. def testWithTimeEffects(self):
  452. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  453. result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True)
  454. # .flat is flatiter instance
  455. assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5],
  456. check_dtype=False)
  457. exp_x = np.array([[0, 0], [-10.5, -15.5], [10.5, 15.5]])
  458. assert_almost_equal(result._x_trans.values, exp_x)
  459. # _check_non_raw_results(result)
  460. def testWithEntityEffects(self):
  461. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  462. result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True)
  463. # .flat is flatiter instance
  464. assert_almost_equal(result._y.values.flat, [1, 4, 5],
  465. check_dtype=False)
  466. exp_x = DataFrame([[0., 6., 14., 1.], [0, 9, 17, 1], [1, 30, 48, 1]],
  467. index=result._x.index, columns=['FE_B', 'x1', 'x2',
  468. 'intercept'],
  469. dtype=float)
  470. tm.assert_frame_equal(result._x, exp_x.ix[:, result._x.columns])
  471. # _check_non_raw_results(result)
  472. def testWithEntityEffectsAndDroppedDummies(self):
  473. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  474. result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True,
  475. dropped_dummies={'entity': 'B'})
  476. # .flat is flatiter instance
  477. assert_almost_equal(result._y.values.flat, [1, 4, 5],
  478. check_dtype=False)
  479. exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]],
  480. index=result._x.index, columns=['FE_A', 'x1', 'x2',
  481. 'intercept'],
  482. dtype=float)
  483. tm.assert_frame_equal(result._x, exp_x.ix[:, result._x.columns])
  484. # _check_non_raw_results(result)
  485. def testWithXEffects(self):
  486. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  487. result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'])
  488. # .flat is flatiter instance
  489. assert_almost_equal(result._y.values.flat, [1, 4, 5],
  490. check_dtype=False)
  491. res = result._x
  492. exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]],
  493. columns=['x1_30', 'x1_9', 'x2', 'intercept'],
  494. index=res.index, dtype=float)
  495. assert_frame_equal(res, exp_x.reindex(columns=res.columns))
  496. def testWithXEffectsAndDroppedDummies(self):
  497. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  498. result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'],
  499. dropped_dummies={'x1': 30})
  500. res = result._x
  501. # .flat is flatiter instance
  502. assert_almost_equal(result._y.values.flat, [1, 4, 5],
  503. check_dtype=False)
  504. exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]],
  505. columns=['x1_6', 'x1_9', 'x2', 'intercept'],
  506. index=res.index, dtype=float)
  507. assert_frame_equal(res, exp_x.reindex(columns=res.columns))
  508. def testWithXEffectsAndConversion(self):
  509. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  510. result = ols(y=self.panel_y3, x=self.panel_x3,
  511. x_effects=['x1', 'x2'])
  512. # .flat is flatiter instance
  513. assert_almost_equal(result._y.values.flat, [1, 2, 3, 4],
  514. check_dtype=False)
  515. exp_x = np.array([[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1],
  516. [0, 0, 0, 1, 1]], dtype=np.float64)
  517. assert_almost_equal(result._x.values, exp_x)
  518. exp_index = Index(['x1_B', 'x1_C', 'x2_baz', 'x2_foo', 'intercept'])
  519. self.assert_index_equal(exp_index, result._x.columns)
  520. # _check_non_raw_results(result)
  521. def testWithXEffectsAndConversionAndDroppedDummies(self):
  522. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  523. result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'],
  524. dropped_dummies={'x2': 'foo'})
  525. # .flat is flatiter instance
  526. assert_almost_equal(result._y.values.flat, [1, 2, 3, 4],
  527. check_dtype=False)
  528. exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1],
  529. [0, 0, 0, 0, 1]], dtype=np.float64)
  530. assert_almost_equal(result._x.values, exp_x)
  531. exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept'])
  532. self.assert_index_equal(exp_index, result._x.columns)
  533. # _check_non_raw_results(result)
  534. def testForSeries(self):
  535. self.checkForSeries(self.series_panel_x, self.series_panel_y,
  536. self.series_x, self.series_y)
  537. self.checkForSeries(self.series_panel_x, self.series_panel_y,
  538. self.series_x, self.series_y, nw_lags=0)
  539. self.checkForSeries(self.series_panel_x, self.series_panel_y,
  540. self.series_x, self.series_y, nw_lags=1,
  541. nw_overlap=True)
  542. def testRolling(self):
  543. self.checkMovingOLS(self.panel_x, self.panel_y)
  544. def testRollingWithFixedEffects(self):
  545. self.checkMovingOLS(self.panel_x, self.panel_y,
  546. entity_effects=True)
  547. self.checkMovingOLS(self.panel_x, self.panel_y, intercept=False,
  548. entity_effects=True)
  549. def testRollingWithTimeEffects(self):
  550. self.checkMovingOLS(self.panel_x, self.panel_y,
  551. time_effects=True)
  552. def testRollingWithNeweyWest(self):
  553. self.checkMovingOLS(self.panel_x, self.panel_y,
  554. nw_lags=1)
  555. def testRollingWithEntityCluster(self):
  556. self.checkMovingOLS(self.panel_x, self.panel_y,
  557. cluster='entity')
  558. def testUnknownClusterRaisesValueError(self):
  559. assertRaisesRegexp(ValueError, "Unrecognized cluster.*ridiculous",
  560. self.checkMovingOLS, self.panel_x, self.panel_y,
  561. cluster='ridiculous')
  562. def testRollingWithTimeEffectsAndEntityCluster(self):
  563. self.checkMovingOLS(self.panel_x, self.panel_y,
  564. time_effects=True, cluster='entity')
  565. def testRollingWithTimeCluster(self):
  566. self.checkMovingOLS(self.panel_x, self.panel_y,
  567. cluster='time')
  568. def testRollingWithNeweyWestAndEntityCluster(self):
  569. self.assertRaises(ValueError, self.checkMovingOLS,
  570. self.panel_x, self.panel_y,
  571. nw_lags=1, cluster='entity')
  572. def testRollingWithNeweyWestAndTimeEffectsAndEntityCluster(self):
  573. self.assertRaises(ValueError,
  574. self.checkMovingOLS, self.panel_x, self.panel_y,
  575. nw_lags=1, cluster='entity',
  576. time_effects=True)
  577. def testExpanding(self):
  578. self.checkMovingOLS(
  579. self.panel_x, self.panel_y, window_type='expanding')
  580. def testNonPooled(self):
  581. self.checkNonPooled(y=self.panel_y, x=self.panel_x)
  582. self.checkNonPooled(y=self.panel_y, x=self.panel_x,
  583. window_type='rolling', window=25, min_periods=10)
  584. def testUnknownWindowType(self):
  585. assertRaisesRegexp(ValueError, "window.*ridiculous",
  586. self.checkNonPooled, y=self.panel_y, x=self.panel_x,
  587. window_type='ridiculous', window=25, min_periods=10)
  588. def checkNonPooled(self, x, y, **kwds):
  589. # For now, just check that it doesn't crash
  590. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  591. result = ols(y=y, x=x, pool=False, **kwds)
  592. _check_repr(result)
  593. for attr in NonPooledPanelOLS.ATTRIBUTES:
  594. _check_repr(getattr(result, attr))
  595. def checkMovingOLS(self, x, y, window_type='rolling', **kwds):
  596. window = 25 # must be larger than rank of x
  597. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  598. moving = ols(y=y, x=x, window_type=window_type,
  599. window=window, **kwds)
  600. index = moving._index
  601. for n, i in enumerate(moving._valid_indices):
  602. if window_type == 'rolling' and i >= window:
  603. prior_date = index[i - window + 1]
  604. else:
  605. prior_date = index[0]
  606. date = index[i]
  607. x_iter = {}
  608. for k, v in compat.iteritems(x):
  609. x_iter[k] = v.truncate(before=prior_date, after=date)
  610. y_iter = y.truncate(before=prior_date, after=date)
  611. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  612. static = ols(y=y_iter, x=x_iter, **kwds)
  613. self.compare(static, moving, event_index=i,
  614. result_index=n)
  615. _check_non_raw_results(moving)
  616. def checkForSeries(self, x, y, series_x, series_y, **kwds):
  617. # Consistency check with simple OLS.
  618. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  619. result = ols(y=y, x=x, **kwds)
  620. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  621. reference = ols(y=series_y, x=series_x, **kwds)
  622. self.compare(reference, result)
  623. def compare(self, static, moving, event_index=None,
  624. result_index=None):
  625. # Check resid if we have a time index specified
  626. if event_index is not None:
  627. staticSlice = _period_slice(static, -1)
  628. movingSlice = _period_slice(moving, event_index)
  629. ref = static._resid_raw[staticSlice]
  630. res = moving._resid_raw[movingSlice]
  631. assert_almost_equal(ref, res)
  632. ref = static._y_fitted_raw[staticSlice]
  633. res = moving._y_fitted_raw[movingSlice]
  634. assert_almost_equal(ref, res)
  635. # Check y_fitted
  636. for field in self.FIELDS:
  637. attr = '_%s_raw' % field
  638. ref = getattr(static, attr)
  639. res = getattr(moving, attr)
  640. if result_index is not None:
  641. res = res[result_index]
  642. assert_almost_equal(ref, res)
  643. def test_auto_rolling_window_type(self):
  644. data = tm.makeTimeDataFrame()
  645. y = data.pop('A')
  646. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  647. window_model = ols(y=y, x=data, window=20, min_periods=10)
  648. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  649. rolling_model = ols(y=y, x=data, window=20, min_periods=10,
  650. window_type='rolling')
  651. assert_frame_equal(window_model.beta, rolling_model.beta)
  652. def test_group_agg(self):
  653. from pandas.stats.plm import _group_agg
  654. values = np.ones((10, 2)) * np.arange(10).reshape((10, 1))
  655. bounds = np.arange(5) * 2
  656. f = lambda x: x.mean(axis=0)
  657. agged = _group_agg(values, bounds, f)
  658. assert(agged[1][0] == 2.5)
  659. assert(agged[2][0] == 4.5)
  660. # test a function that doesn't aggregate
  661. f2 = lambda x: np.zeros((2, 2))
  662. self.assertRaises(Exception, _group_agg, values, bounds, f2)
  663. def _check_non_raw_results(model):
  664. _check_repr(model)
  665. _check_repr(model.resid)
  666. _check_repr(model.summary_as_matrix)
  667. _check_repr(model.y_fitted)
  668. _check_repr(model.y_predict)
  669. def _period_slice(panelModel, i):
  670. index = panelModel._x_trans.index
  671. period = index.levels[0][i]
  672. L, R = index.get_major_bounds(period, period)
  673. return slice(L, R)
  674. class TestOLSFilter(tm.TestCase):
  675. _multiprocess_can_split_ = True
  676. def setUp(self):
  677. date_index = date_range(datetime(2009, 12, 11), periods=3,
  678. freq=datetools.bday)
  679. ts = Series([3, 1, 4], index=date_index)
  680. self.TS1 = ts
  681. date_index = date_range(datetime(2009, 12, 11), periods=5,
  682. freq=datetools.bday)
  683. ts = Series([1, 5, 9, 2, 6], index=date_index)
  684. self.TS2 = ts
  685. date_index = date_range(datetime(2009, 12, 11), periods=3,
  686. freq=datetools.bday)
  687. ts = Series([5, np.nan, 3], index=date_index)
  688. self.TS3 = ts
  689. date_index = date_range(datetime(2009, 12, 11), periods=5,
  690. freq=datetools.bday)
  691. ts = Series([np.nan, 5, 8, 9, 7], index=date_index)
  692. self.TS4 = ts
  693. data = {'x1': self.TS2, 'x2': self.TS4}
  694. self.DF1 = DataFrame(data=data)
  695. data = {'x1': self.TS2, 'x2': self.TS4}
  696. self.DICT1 = data
  697. def testFilterWithSeriesRHS(self):
  698. (lhs, rhs, weights, rhs_pre,
  699. index, valid) = _filter_data(self.TS1, {'x1': self.TS2}, None)
  700. self.tsAssertEqual(self.TS1.astype(np.float64), lhs, check_names=False)
  701. self.tsAssertEqual(self.TS2[:3].astype(np.float64), rhs['x1'],
  702. check_names=False)
  703. self.tsAssertEqual(self.TS2.astype(np.float64), rhs_pre['x1'],
  704. check_names=False)
  705. def testFilterWithSeriesRHS2(self):
  706. (lhs, rhs, weights, rhs_pre,
  707. index, valid) = _filter_data(self.TS2, {'x1': self.TS1}, None)
  708. self.tsAssertEqual(self.TS2[:3].astype(np.float64), lhs,
  709. check_names=False)
  710. self.tsAssertEqual(self.TS1.astype(np.float64), rhs['x1'],
  711. check_names=False)
  712. self.tsAssertEqual(self.TS1.astype(np.float64), rhs_pre['x1'],
  713. check_names=False)
  714. def testFilterWithSeriesRHS3(self):
  715. (lhs, rhs, weights, rhs_pre,
  716. index, valid) = _filter_data(self.TS3, {'x1': self.TS4}, None)
  717. exp_lhs = self.TS3[2:3]
  718. exp_rhs = self.TS4[2:3]
  719. exp_rhs_pre = self.TS4[1:]
  720. self.tsAssertEqual(exp_lhs, lhs, check_names=False)
  721. self.tsAssertEqual(exp_rhs, rhs['x1'], check_names=False)
  722. self.tsAssertEqual(exp_rhs_pre, rhs_pre['x1'], check_names=False)
  723. def testFilterWithDataFrameRHS(self):
  724. (lhs, rhs, weights, rhs_pre,
  725. index, valid) = _filter_data(self.TS1, self.DF1, None)
  726. exp_lhs = self.TS1[1:].astype(np.float64)
  727. exp_rhs1 = self.TS2[1:3]
  728. exp_rhs2 = self.TS4[1:3].astype(np.float64)
  729. self.tsAssertEqual(exp_lhs, lhs, check_names=False)
  730. self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False)
  731. self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False)
  732. def testFilterWithDictRHS(self):
  733. (lhs, rhs, weights, rhs_pre,
  734. index, valid) = _filter_data(self.TS1, self.DICT1, None)
  735. exp_lhs = self.TS1[1:].astype(np.float64)
  736. exp_rhs1 = self.TS2[1:3].astype(np.float64)
  737. exp_rhs2 = self.TS4[1:3].astype(np.float64)
  738. self.tsAssertEqual(exp_lhs, lhs, check_names=False)
  739. self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False)
  740. self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False)
  741. def tsAssertEqual(self, ts1, ts2, **kwargs):
  742. self.assert_series_equal(ts1, ts2, **kwargs)
  743. if __name__ == '__main__':
  744. import nose
  745. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  746. exit=False)