PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/stats/tests/test_moments.py

https://github.com/lenolib/pandas
Python | 350 lines | 254 code | 86 blank | 10 comment | 17 complexity | 25b099fca2343a1a69de2095af90a5ae MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import unittest
  2. import nose
  3. import sys
  4. import functools
  5. from datetime import datetime
  6. from numpy.random import randn
  7. import numpy as np
  8. from pandas import Series, DataFrame, bdate_range, isnull, notnull
  9. from pandas.util.testing import assert_almost_equal, assert_series_equal
  10. import pandas.core.datetools as datetools
  11. import pandas.stats.moments as mom
  12. import pandas.util.testing as tm
  13. N, K = 100, 10
  14. class TestMoments(unittest.TestCase):
  15. _nan_locs = np.arange(20, 40)
  16. _inf_locs = np.array([])
  17. def setUp(self):
  18. arr = randn(N)
  19. arr[self._nan_locs] = np.NaN
  20. self.arr = arr
  21. self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
  22. self.series = Series(arr.copy(), index=self.rng)
  23. self.frame = DataFrame(randn(N, K), index=self.rng,
  24. columns=np.arange(K))
  25. def test_rolling_sum(self):
  26. self._check_moment_func(mom.rolling_sum, np.sum)
  27. def test_rolling_count(self):
  28. counter = lambda x: np.isfinite(x).astype(float).sum()
  29. self._check_moment_func(mom.rolling_count, counter,
  30. has_min_periods=False,
  31. preserve_nan=False)
  32. def test_rolling_mean(self):
  33. self._check_moment_func(mom.rolling_mean, np.mean)
  34. def test_rolling_median(self):
  35. self._check_moment_func(mom.rolling_median, np.median)
  36. def test_rolling_min(self):
  37. self._check_moment_func(mom.rolling_min, np.min)
  38. def test_rolling_max(self):
  39. self._check_moment_func(mom.rolling_max, np.max)
  40. def test_rolling_quantile(self):
  41. qs = [.1, .5, .9]
  42. def scoreatpercentile(a, per):
  43. values = np.sort(a,axis=0)
  44. idx = per /1. * (values.shape[0] - 1)
  45. return values[int(idx)]
  46. for q in qs:
  47. def f(x, window, min_periods=None, freq=None):
  48. return mom.rolling_quantile(x, window, q,
  49. min_periods=min_periods,
  50. freq=freq)
  51. def alt(x):
  52. return scoreatpercentile(x, q)
  53. self._check_moment_func(f, alt)
  54. def test_rolling_apply(self):
  55. ser = Series([])
  56. assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x:x.mean()))
  57. def roll_mean(x, window, min_periods=None, freq=None):
  58. return mom.rolling_apply(x, window,
  59. lambda x: x[np.isfinite(x)].mean(),
  60. min_periods=min_periods,
  61. freq=freq)
  62. self._check_moment_func(roll_mean, np.mean)
  63. def test_rolling_std(self):
  64. self._check_moment_func(mom.rolling_std,
  65. lambda x: np.std(x, ddof=1))
  66. self._check_moment_func(functools.partial(mom.rolling_std, ddof=0),
  67. lambda x: np.std(x, ddof=0))
  68. def test_rolling_var(self):
  69. self._check_moment_func(mom.rolling_var,
  70. lambda x: np.var(x, ddof=1))
  71. self._check_moment_func(functools.partial(mom.rolling_var, ddof=0),
  72. lambda x: np.var(x, ddof=0))
  73. def test_rolling_skew(self):
  74. try:
  75. from scipy.stats import skew
  76. except ImportError:
  77. raise nose.SkipTest('no scipy')
  78. self._check_moment_func(mom.rolling_skew,
  79. lambda x: skew(x, bias=False))
  80. def test_rolling_kurt(self):
  81. try:
  82. from scipy.stats import kurtosis
  83. except ImportError:
  84. raise nose.SkipTest('no scipy')
  85. self._check_moment_func(mom.rolling_kurt,
  86. lambda x: kurtosis(x, bias=False))
  87. def _check_moment_func(self, func, static_comp, window=50,
  88. has_min_periods=True,
  89. has_time_rule=True,
  90. preserve_nan=True):
  91. self._check_ndarray(func, static_comp, window=window,
  92. has_min_periods=has_min_periods,
  93. preserve_nan=preserve_nan)
  94. self._check_structures(func, static_comp,
  95. has_min_periods=has_min_periods,
  96. has_time_rule=has_time_rule)
  97. def _check_ndarray(self, func, static_comp, window=50,
  98. has_min_periods=True,
  99. preserve_nan=True):
  100. result = func(self.arr, window)
  101. assert_almost_equal(result[-1],
  102. static_comp(self.arr[-50:]))
  103. if preserve_nan:
  104. assert(np.isnan(result[self._nan_locs]).all())
  105. # excluding NaNs correctly
  106. arr = randn(50)
  107. arr[:10] = np.NaN
  108. arr[-10:] = np.NaN
  109. if has_min_periods:
  110. result = func(arr, 50, min_periods=30)
  111. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  112. # min_periods is working correctly
  113. result = func(arr, 20, min_periods=15)
  114. self.assert_(np.isnan(result[23]))
  115. self.assert_(not np.isnan(result[24]))
  116. self.assert_(not np.isnan(result[-6]))
  117. self.assert_(np.isnan(result[-5]))
  118. arr2 = randn(20)
  119. result = func(arr2, 10, min_periods=5)
  120. self.assert_(isnull(result[3]))
  121. self.assert_(notnull(result[4]))
  122. # min_periods=0
  123. result0 = func(arr, 20, min_periods=0)
  124. result1 = func(arr, 20, min_periods=1)
  125. assert_almost_equal(result0, result1)
  126. else:
  127. result = func(arr, 50)
  128. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  129. def _check_structures(self, func, static_comp,
  130. has_min_periods=True, has_time_rule=True):
  131. series_result = func(self.series, 50)
  132. self.assert_(isinstance(series_result, Series))
  133. frame_result = func(self.frame, 50)
  134. self.assertEquals(type(frame_result), DataFrame)
  135. # check time_rule works
  136. if has_time_rule:
  137. win = 25
  138. minp = 10
  139. if has_min_periods:
  140. series_result = func(self.series[::2], win, min_periods=minp,
  141. freq='B')
  142. frame_result = func(self.frame[::2], win, min_periods=minp,
  143. freq='B')
  144. else:
  145. series_result = func(self.series[::2], win, freq='B')
  146. frame_result = func(self.frame[::2], win, freq='B')
  147. last_date = series_result.index[-1]
  148. prev_date = last_date - 24 * datetools.bday
  149. trunc_series = self.series[::2].truncate(prev_date, last_date)
  150. trunc_frame = self.frame[::2].truncate(prev_date, last_date)
  151. assert_almost_equal(series_result[-1], static_comp(trunc_series))
  152. assert_almost_equal(frame_result.xs(last_date),
  153. trunc_frame.apply(static_comp))
  154. def test_legacy_time_rule_arg(self):
  155. from StringIO import StringIO
  156. # suppress deprecation warnings
  157. sys.stderr = StringIO()
  158. rng = bdate_range('1/1/2000', periods=20)
  159. ts = Series(np.random.randn(20), index=rng)
  160. ts = ts.take(np.random.permutation(len(ts))[:12]).sort_index()
  161. try:
  162. result = mom.rolling_mean(ts, 1, min_periods=1, freq='B')
  163. expected = mom.rolling_mean(ts, 1, min_periods=1,
  164. time_rule='WEEKDAY')
  165. tm.assert_series_equal(result, expected)
  166. result = mom.ewma(ts, span=5, freq='B')
  167. expected = mom.ewma(ts, span=5, time_rule='WEEKDAY')
  168. tm.assert_series_equal(result, expected)
  169. finally:
  170. sys.stderr = sys.__stderr__
  171. def test_ewma(self):
  172. self._check_ew(mom.ewma)
  173. arr = np.zeros(1000)
  174. arr[5] = 1
  175. result = mom.ewma(arr, span=100, adjust=False).sum()
  176. self.assert_(np.abs(result - 1) < 1e-2)
  177. def test_ewmvar(self):
  178. self._check_ew(mom.ewmvar)
  179. def test_ewmvol(self):
  180. self._check_ew(mom.ewmvol)
  181. def test_ewma_span_com_args(self):
  182. A = mom.ewma(self.arr, com=9.5)
  183. B = mom.ewma(self.arr, span=20)
  184. assert_almost_equal(A, B)
  185. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
  186. self.assertRaises(Exception, mom.ewma, self.arr)
  187. def _check_ew(self, func):
  188. self._check_ew_ndarray(func)
  189. self._check_ew_structures(func)
  190. def _check_ew_ndarray(self, func, preserve_nan=False):
  191. result = func(self.arr, com=10)
  192. if preserve_nan:
  193. assert(np.isnan(result[self._nan_locs]).all())
  194. # excluding NaNs correctly
  195. arr = randn(50)
  196. arr[:10] = np.NaN
  197. arr[-10:] = np.NaN
  198. # ??? check something
  199. # pass in ints
  200. result2 = func(np.arange(50), span=10)
  201. self.assert_(result2.dtype == np.float_)
  202. def _check_ew_structures(self, func):
  203. series_result = func(self.series, com=10)
  204. self.assert_(isinstance(series_result, Series))
  205. frame_result = func(self.frame, com=10)
  206. self.assertEquals(type(frame_result), DataFrame)
  207. # binary moments
  208. def test_rolling_cov(self):
  209. A = self.series
  210. B = A + randn(len(A))
  211. result = mom.rolling_cov(A, B, 50, min_periods=25)
  212. assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
  213. def test_rolling_corr(self):
  214. A = self.series
  215. B = A + randn(len(A))
  216. result = mom.rolling_corr(A, B, 50, min_periods=25)
  217. assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
  218. # test for correct bias correction
  219. a = tm.makeTimeSeries()
  220. b = tm.makeTimeSeries()
  221. a[:5] = np.nan
  222. b[:10] = np.nan
  223. result = mom.rolling_corr(a, b, len(a), min_periods=1)
  224. assert_almost_equal(result[-1], a.corr(b))
  225. def test_rolling_corr_pairwise(self):
  226. panel = mom.rolling_corr_pairwise(self.frame, 10, min_periods=5)
  227. correl = panel.ix[:, 1, 5]
  228. exp = mom.rolling_corr(self.frame[1], self.frame[5],
  229. 10, min_periods=5)
  230. tm.assert_series_equal(correl, exp)
  231. def test_flex_binary_frame(self):
  232. def _check(method):
  233. series = self.frame[1]
  234. res = method(series, self.frame, 10)
  235. res2 = method(self.frame, series, 10)
  236. exp = self.frame.apply(lambda x: method(series, x, 10))
  237. tm.assert_frame_equal(res, exp)
  238. tm.assert_frame_equal(res2, exp)
  239. frame2 = self.frame.copy()
  240. frame2.values[:] = np.random.randn(*frame2.shape)
  241. res3 = method(self.frame, frame2, 10)
  242. exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
  243. for k in self.frame))
  244. tm.assert_frame_equal(res3, exp)
  245. methods = [mom.rolling_corr, mom.rolling_cov]
  246. for meth in methods:
  247. _check(meth)
  248. def test_ewmcov(self):
  249. self._check_binary_ew(mom.ewmcov)
  250. def test_ewmcorr(self):
  251. self._check_binary_ew(mom.ewmcorr)
  252. def _check_binary_ew(self, func):
  253. A = Series(randn(50), index=np.arange(50))
  254. B = A[2:] + randn(48)
  255. A[:10] = np.NaN
  256. B[-10:] = np.NaN
  257. result = func(A, B, 20, min_periods=5)
  258. self.assert_(np.isnan(result.values[:15]).all())
  259. self.assert_(not np.isnan(result.values[15:]).any())
  260. self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
  261. if __name__ == '__main__':
  262. import nose
  263. nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
  264. exit=False)