PageRenderTime 72ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/stats/tests/test_moments.py

https://github.com/kljensen/pandas
Python | 516 lines | 370 code | 130 blank | 16 comment | 22 complexity | ec1a06450581f77f8e3ad39118d27d9e MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import unittest
  2. import nose
  3. import sys
  4. import functools
  5. from datetime import datetime
  6. from numpy.random import randn
  7. import numpy as np
  8. from pandas import Series, DataFrame, bdate_range, isnull, notnull
  9. from pandas.util.testing import assert_almost_equal, assert_series_equal
  10. import pandas.core.datetools as datetools
  11. import pandas.stats.moments as mom
  12. import pandas.util.testing as tm
  13. N, K = 100, 10
  14. class TestMoments(unittest.TestCase):
  15. _nan_locs = np.arange(20, 40)
  16. _inf_locs = np.array([])
  17. def setUp(self):
  18. arr = randn(N)
  19. arr[self._nan_locs] = np.NaN
  20. self.arr = arr
  21. self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
  22. self.series = Series(arr.copy(), index=self.rng)
  23. self.frame = DataFrame(randn(N, K), index=self.rng,
  24. columns=np.arange(K))
  25. def test_rolling_sum(self):
  26. self._check_moment_func(mom.rolling_sum, np.sum)
  27. def test_rolling_count(self):
  28. counter = lambda x: np.isfinite(x).astype(float).sum()
  29. self._check_moment_func(mom.rolling_count, counter,
  30. has_min_periods=False,
  31. preserve_nan=False)
  32. def test_rolling_mean(self):
  33. self._check_moment_func(mom.rolling_mean, np.mean)
  34. def test_rolling_median(self):
  35. self._check_moment_func(mom.rolling_median, np.median)
  36. def test_rolling_min(self):
  37. self._check_moment_func(mom.rolling_min, np.min)
  38. a = np.array([1,2,3,4,5])
  39. b = mom.rolling_min(a, window=100, min_periods=1)
  40. assert_almost_equal(b, np.ones(len(a)))
  41. self.assertRaises(ValueError, mom.rolling_min, np.array([1,2,3]), window=3, min_periods=5)
  42. def test_rolling_max(self):
  43. self._check_moment_func(mom.rolling_max, np.max)
  44. a = np.array([1,2,3,4,5])
  45. b = mom.rolling_max(a, window=100, min_periods=1)
  46. assert_almost_equal(a, b)
  47. self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5)
  48. def test_rolling_quantile(self):
  49. qs = [.1, .5, .9]
  50. def scoreatpercentile(a, per):
  51. values = np.sort(a,axis=0)
  52. idx = per /1. * (values.shape[0] - 1)
  53. return values[int(idx)]
  54. for q in qs:
  55. def f(x, window, min_periods=None, freq=None):
  56. return mom.rolling_quantile(x, window, q,
  57. min_periods=min_periods,
  58. freq=freq)
  59. def alt(x):
  60. return scoreatpercentile(x, q)
  61. self._check_moment_func(f, alt)
  62. def test_rolling_apply(self):
  63. ser = Series([])
  64. assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x:x.mean()))
  65. def roll_mean(x, window, min_periods=None, freq=None):
  66. return mom.rolling_apply(x, window,
  67. lambda x: x[np.isfinite(x)].mean(),
  68. min_periods=min_periods,
  69. freq=freq)
  70. self._check_moment_func(roll_mean, np.mean)
  71. def test_rolling_apply_out_of_bounds(self):
  72. # #1850
  73. arr = np.arange(4)
  74. # it works!
  75. result = mom.rolling_apply(arr, 10, np.sum)
  76. self.assert_(isnull(result).all())
  77. result = mom.rolling_apply(arr, 10, np.sum, min_periods=1)
  78. assert_almost_equal(result, result)
  79. def test_rolling_std(self):
  80. self._check_moment_func(mom.rolling_std,
  81. lambda x: np.std(x, ddof=1))
  82. self._check_moment_func(functools.partial(mom.rolling_std, ddof=0),
  83. lambda x: np.std(x, ddof=0))
  84. def test_rolling_std_1obs(self):
  85. result = mom.rolling_std(np.array([1.,2.,3.,4.,5.]),
  86. 1, min_periods=1)
  87. expected = np.zeros(5)
  88. assert_almost_equal(result, expected)
  89. result = mom.rolling_std(np.array([np.nan,np.nan,3.,4.,5.]),
  90. 3, min_periods=2)
  91. self.assert_(np.isnan(result[2]))
  92. def test_rolling_std_neg_sqrt(self):
  93. # unit test from Bottleneck
  94. # Test move_nanstd for neg sqrt.
  95. a = np.array([0.0011448196318903589,
  96. 0.00028718669878572767,
  97. 0.00028718669878572767,
  98. 0.00028718669878572767,
  99. 0.00028718669878572767])
  100. b = mom.rolling_std(a, window=3)
  101. self.assert_(np.isfinite(b[2:]).all())
  102. b = mom.ewmstd(a, span=3)
  103. self.assert_(np.isfinite(b[2:]).all())
  104. def test_rolling_var(self):
  105. self._check_moment_func(mom.rolling_var,
  106. lambda x: np.var(x, ddof=1))
  107. self._check_moment_func(functools.partial(mom.rolling_var, ddof=0),
  108. lambda x: np.var(x, ddof=0))
  109. def test_rolling_skew(self):
  110. try:
  111. from scipy.stats import skew
  112. except ImportError:
  113. raise nose.SkipTest('no scipy')
  114. self._check_moment_func(mom.rolling_skew,
  115. lambda x: skew(x, bias=False))
  116. def test_rolling_kurt(self):
  117. try:
  118. from scipy.stats import kurtosis
  119. except ImportError:
  120. raise nose.SkipTest('no scipy')
  121. self._check_moment_func(mom.rolling_kurt,
  122. lambda x: kurtosis(x, bias=False))
  123. def _check_moment_func(self, func, static_comp, window=50,
  124. has_min_periods=True,
  125. has_time_rule=True,
  126. preserve_nan=True):
  127. self._check_ndarray(func, static_comp, window=window,
  128. has_min_periods=has_min_periods,
  129. preserve_nan=preserve_nan)
  130. self._check_structures(func, static_comp,
  131. has_min_periods=has_min_periods,
  132. has_time_rule=has_time_rule)
  133. def _check_ndarray(self, func, static_comp, window=50,
  134. has_min_periods=True,
  135. preserve_nan=True):
  136. result = func(self.arr, window)
  137. assert_almost_equal(result[-1],
  138. static_comp(self.arr[-50:]))
  139. if preserve_nan:
  140. assert(np.isnan(result[self._nan_locs]).all())
  141. # excluding NaNs correctly
  142. arr = randn(50)
  143. arr[:10] = np.NaN
  144. arr[-10:] = np.NaN
  145. if has_min_periods:
  146. result = func(arr, 50, min_periods=30)
  147. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  148. # min_periods is working correctly
  149. result = func(arr, 20, min_periods=15)
  150. self.assert_(np.isnan(result[23]))
  151. self.assert_(not np.isnan(result[24]))
  152. self.assert_(not np.isnan(result[-6]))
  153. self.assert_(np.isnan(result[-5]))
  154. arr2 = randn(20)
  155. result = func(arr2, 10, min_periods=5)
  156. self.assert_(isnull(result[3]))
  157. self.assert_(notnull(result[4]))
  158. # min_periods=0
  159. result0 = func(arr, 20, min_periods=0)
  160. result1 = func(arr, 20, min_periods=1)
  161. assert_almost_equal(result0, result1)
  162. else:
  163. result = func(arr, 50)
  164. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  165. def _check_structures(self, func, static_comp,
  166. has_min_periods=True, has_time_rule=True):
  167. series_result = func(self.series, 50)
  168. self.assert_(isinstance(series_result, Series))
  169. frame_result = func(self.frame, 50)
  170. self.assertEquals(type(frame_result), DataFrame)
  171. # check time_rule works
  172. if has_time_rule:
  173. win = 25
  174. minp = 10
  175. if has_min_periods:
  176. series_result = func(self.series[::2], win, min_periods=minp,
  177. freq='B')
  178. frame_result = func(self.frame[::2], win, min_periods=minp,
  179. freq='B')
  180. else:
  181. series_result = func(self.series[::2], win, freq='B')
  182. frame_result = func(self.frame[::2], win, freq='B')
  183. last_date = series_result.index[-1]
  184. prev_date = last_date - 24 * datetools.bday
  185. trunc_series = self.series[::2].truncate(prev_date, last_date)
  186. trunc_frame = self.frame[::2].truncate(prev_date, last_date)
  187. assert_almost_equal(series_result[-1], static_comp(trunc_series))
  188. assert_almost_equal(frame_result.xs(last_date),
  189. trunc_frame.apply(static_comp))
  190. def test_legacy_time_rule_arg(self):
  191. from StringIO import StringIO
  192. # suppress deprecation warnings
  193. sys.stderr = StringIO()
  194. rng = bdate_range('1/1/2000', periods=20)
  195. ts = Series(np.random.randn(20), index=rng)
  196. ts = ts.take(np.random.permutation(len(ts))[:12]).sort_index()
  197. try:
  198. result = mom.rolling_mean(ts, 1, min_periods=1, freq='B')
  199. expected = mom.rolling_mean(ts, 1, min_periods=1,
  200. time_rule='WEEKDAY')
  201. tm.assert_series_equal(result, expected)
  202. result = mom.ewma(ts, span=5, freq='B')
  203. expected = mom.ewma(ts, span=5, time_rule='WEEKDAY')
  204. tm.assert_series_equal(result, expected)
  205. finally:
  206. sys.stderr = sys.__stderr__
  207. def test_ewma(self):
  208. self._check_ew(mom.ewma)
  209. arr = np.zeros(1000)
  210. arr[5] = 1
  211. result = mom.ewma(arr, span=100, adjust=False).sum()
  212. self.assert_(np.abs(result - 1) < 1e-2)
  213. def test_ewmvar(self):
  214. self._check_ew(mom.ewmvar)
  215. def test_ewmvol(self):
  216. self._check_ew(mom.ewmvol)
  217. def test_ewma_span_com_args(self):
  218. A = mom.ewma(self.arr, com=9.5)
  219. B = mom.ewma(self.arr, span=20)
  220. assert_almost_equal(A, B)
  221. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
  222. self.assertRaises(Exception, mom.ewma, self.arr)
  223. def test_ew_empty_arrays(self):
  224. arr = np.array([], dtype=np.float64)
  225. funcs = [mom.ewma, mom.ewmvol, mom.ewmvar]
  226. for f in funcs:
  227. result = f(arr, 3)
  228. assert_almost_equal(result, arr)
  229. def _check_ew(self, func):
  230. self._check_ew_ndarray(func)
  231. self._check_ew_structures(func)
  232. def _check_ew_ndarray(self, func, preserve_nan=False):
  233. result = func(self.arr, com=10)
  234. if preserve_nan:
  235. assert(np.isnan(result[self._nan_locs]).all())
  236. # excluding NaNs correctly
  237. arr = randn(50)
  238. arr[:10] = np.NaN
  239. arr[-10:] = np.NaN
  240. # ??? check something
  241. # pass in ints
  242. result2 = func(np.arange(50), span=10)
  243. self.assert_(result2.dtype == np.float_)
  244. def _check_ew_structures(self, func):
  245. series_result = func(self.series, com=10)
  246. self.assert_(isinstance(series_result, Series))
  247. frame_result = func(self.frame, com=10)
  248. self.assertEquals(type(frame_result), DataFrame)
  249. # binary moments
  250. def test_rolling_cov(self):
  251. A = self.series
  252. B = A + randn(len(A))
  253. result = mom.rolling_cov(A, B, 50, min_periods=25)
  254. assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
  255. def test_rolling_corr(self):
  256. A = self.series
  257. B = A + randn(len(A))
  258. result = mom.rolling_corr(A, B, 50, min_periods=25)
  259. assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
  260. # test for correct bias correction
  261. a = tm.makeTimeSeries()
  262. b = tm.makeTimeSeries()
  263. a[:5] = np.nan
  264. b[:10] = np.nan
  265. result = mom.rolling_corr(a, b, len(a), min_periods=1)
  266. assert_almost_equal(result[-1], a.corr(b))
  267. def test_rolling_corr_pairwise(self):
  268. panel = mom.rolling_corr_pairwise(self.frame, 10, min_periods=5)
  269. correl = panel.ix[:, 1, 5]
  270. exp = mom.rolling_corr(self.frame[1], self.frame[5],
  271. 10, min_periods=5)
  272. tm.assert_series_equal(correl, exp)
  273. def test_flex_binary_frame(self):
  274. def _check(method):
  275. series = self.frame[1]
  276. res = method(series, self.frame, 10)
  277. res2 = method(self.frame, series, 10)
  278. exp = self.frame.apply(lambda x: method(series, x, 10))
  279. tm.assert_frame_equal(res, exp)
  280. tm.assert_frame_equal(res2, exp)
  281. frame2 = self.frame.copy()
  282. frame2.values[:] = np.random.randn(*frame2.shape)
  283. res3 = method(self.frame, frame2, 10)
  284. exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
  285. for k in self.frame))
  286. tm.assert_frame_equal(res3, exp)
  287. methods = [mom.rolling_corr, mom.rolling_cov]
  288. for meth in methods:
  289. _check(meth)
  290. def test_ewmcov(self):
  291. self._check_binary_ew(mom.ewmcov)
  292. def test_ewmcorr(self):
  293. self._check_binary_ew(mom.ewmcorr)
  294. def _check_binary_ew(self, func):
  295. A = Series(randn(50), index=np.arange(50))
  296. B = A[2:] + randn(48)
  297. A[:10] = np.NaN
  298. B[-10:] = np.NaN
  299. result = func(A, B, 20, min_periods=5)
  300. self.assert_(np.isnan(result.values[:15]).all())
  301. self.assert_(not np.isnan(result.values[15:]).any())
  302. self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
  303. def test_expanding_apply(self):
  304. ser = Series([])
  305. assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))
  306. def expanding_mean(x, min_periods=1, freq=None):
  307. return mom.expanding_apply(x,
  308. lambda x: x.mean(),
  309. min_periods=min_periods,
  310. freq=freq)
  311. self._check_expanding(expanding_mean, np.mean)
  312. def test_expanding_corr(self):
  313. A = self.series.dropna()
  314. B = (A + randn(len(A)))[:-5]
  315. result = mom.expanding_corr(A, B)
  316. rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)
  317. assert_almost_equal(rolling_result, result)
  318. def test_expanding_count(self):
  319. result = mom.expanding_count(self.series)
  320. assert_almost_equal(result, mom.rolling_count(self.series,
  321. len(self.series)))
  322. def test_expanding_quantile(self):
  323. result = mom.expanding_quantile(self.series, 0.5)
  324. rolling_result = mom.rolling_quantile(self.series,
  325. len(self.series),
  326. 0.5, min_periods=1)
  327. assert_almost_equal(result, rolling_result)
  328. def test_expanding_cov(self):
  329. A = self.series
  330. B = (A + randn(len(A)))[:-5]
  331. result = mom.expanding_cov(A, B)
  332. rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)
  333. assert_almost_equal(rolling_result, result)
  334. def test_expanding_max(self):
  335. self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)
  336. def test_expanding_corr_pairwise(self):
  337. result = mom.expanding_corr_pairwise(self.frame)
  338. rolling_result = mom.rolling_corr_pairwise(self.frame,
  339. len(self.frame),
  340. min_periods=1)
  341. for i in result.items:
  342. assert_almost_equal(result[i], rolling_result[i])
  343. def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
  344. has_time_rule=True, preserve_nan=True):
  345. result = func(self.arr)
  346. assert_almost_equal(result[10],
  347. static_comp(self.arr[:11]))
  348. if preserve_nan:
  349. assert(np.isnan(result[self._nan_locs]).all())
  350. arr = randn(50)
  351. if has_min_periods:
  352. result = func(arr, min_periods=30)
  353. assert(np.isnan(result[:29]).all())
  354. assert_almost_equal(result[-1], static_comp(arr[:50]))
  355. # min_periods is working correctly
  356. result = func(arr, min_periods=15)
  357. self.assert_(np.isnan(result[13]))
  358. self.assert_(not np.isnan(result[14]))
  359. arr2 = randn(20)
  360. result = func(arr2, min_periods=5)
  361. self.assert_(isnull(result[3]))
  362. self.assert_(notnull(result[4]))
  363. # min_periods=0
  364. result0 = func(arr, min_periods=0)
  365. result1 = func(arr, min_periods=1)
  366. assert_almost_equal(result0, result1)
  367. else:
  368. result = func(arr)
  369. assert_almost_equal(result[-1], static_comp(arr[:50]))
  370. def _check_expanding_structures(self, func):
  371. series_result = func(self.series)
  372. self.assert_(isinstance(series_result, Series))
  373. frame_result = func(self.frame)
  374. self.assertEquals(type(frame_result), DataFrame)
  375. def _check_expanding(self, func, static_comp, has_min_periods=True,
  376. has_time_rule=True,
  377. preserve_nan=True):
  378. self._check_expanding_ndarray(func, static_comp,
  379. has_min_periods=has_min_periods,
  380. has_time_rule=has_time_rule,
  381. preserve_nan=preserve_nan)
  382. self._check_expanding_structures(func)
  383. if __name__ == '__main__':
  384. import nose
  385. nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
  386. exit=False)