/pandas/stats/tests/test_moments.py

https://github.com/ajcr/pandas · Python · 950 lines · 677 code · 218 blank · 55 comment · 64 complexity · 7655c4f8580db20c45dd0bb8353c3dd6 MD5 · raw file

  1. import nose
  2. import sys
  3. import functools
  4. from datetime import datetime
  5. from numpy.random import randn
  6. import numpy as np
  7. from pandas import Series, DataFrame, bdate_range, isnull, notnull
  8. from pandas.util.testing import (
  9. assert_almost_equal, assert_series_equal, assert_frame_equal
  10. )
  11. import pandas.core.datetools as datetools
  12. import pandas.stats.moments as mom
  13. import pandas.util.testing as tm
  14. from pandas.compat import range, zip, PY3, StringIO
  15. N, K = 100, 10
  16. class TestMoments(tm.TestCase):
  17. _multiprocess_can_split_ = True
  18. _nan_locs = np.arange(20, 40)
  19. _inf_locs = np.array([])
  20. def setUp(self):
  21. arr = randn(N)
  22. arr[self._nan_locs] = np.NaN
  23. self.arr = arr
  24. self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
  25. self.series = Series(arr.copy(), index=self.rng)
  26. self.frame = DataFrame(randn(N, K), index=self.rng,
  27. columns=np.arange(K))
  28. def test_centered_axis_validation(self):
  29. # ok
  30. mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0)
  31. # bad axis
  32. self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1)
  33. # ok ok
  34. mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0)
  35. mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1)
  36. # bad axis
  37. self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2)
  38. def test_rolling_sum(self):
  39. self._check_moment_func(mom.rolling_sum, np.sum)
  40. def test_rolling_count(self):
  41. counter = lambda x: np.isfinite(x).astype(float).sum()
  42. self._check_moment_func(mom.rolling_count, counter,
  43. has_min_periods=False,
  44. preserve_nan=False,
  45. fill_value=0)
  46. def test_rolling_mean(self):
  47. self._check_moment_func(mom.rolling_mean, np.mean)
  48. def test_cmov_mean(self):
  49. tm._skip_if_no_scipy()
  50. try:
  51. from scikits.timeseries.lib import cmov_mean
  52. except ImportError:
  53. raise nose.SkipTest("no scikits.timeseries")
  54. vals = np.random.randn(10)
  55. xp = cmov_mean(vals, 5)
  56. rs = mom.rolling_mean(vals, 5, center=True)
  57. assert_almost_equal(xp.compressed(), rs[2:-2])
  58. assert_almost_equal(xp.mask, np.isnan(rs))
  59. xp = Series(rs)
  60. rs = mom.rolling_mean(Series(vals), 5, center=True)
  61. assert_series_equal(xp, rs)
  62. def test_cmov_window(self):
  63. tm._skip_if_no_scipy()
  64. try:
  65. from scikits.timeseries.lib import cmov_window
  66. except ImportError:
  67. raise nose.SkipTest("no scikits.timeseries")
  68. vals = np.random.randn(10)
  69. xp = cmov_window(vals, 5, 'boxcar')
  70. rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
  71. assert_almost_equal(xp.compressed(), rs[2:-2])
  72. assert_almost_equal(xp.mask, np.isnan(rs))
  73. xp = Series(rs)
  74. rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
  75. assert_series_equal(xp, rs)
  76. def test_cmov_window_corner(self):
  77. tm._skip_if_no_scipy()
  78. try:
  79. from scikits.timeseries.lib import cmov_window
  80. except ImportError:
  81. raise nose.SkipTest("no scikits.timeseries")
  82. # all nan
  83. vals = np.empty(10, dtype=float)
  84. vals.fill(np.nan)
  85. rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
  86. self.assertTrue(np.isnan(rs).all())
  87. # empty
  88. vals = np.array([])
  89. rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
  90. self.assertEqual(len(rs), 0)
  91. # shorter than window
  92. vals = np.random.randn(5)
  93. rs = mom.rolling_window(vals, 10, 'boxcar')
  94. self.assertTrue(np.isnan(rs).all())
  95. self.assertEqual(len(rs), 5)
  96. def test_cmov_window_frame(self):
  97. tm._skip_if_no_scipy()
  98. try:
  99. from scikits.timeseries.lib import cmov_window
  100. except ImportError:
  101. raise nose.SkipTest("no scikits.timeseries")
  102. # DataFrame
  103. vals = np.random.randn(10, 2)
  104. xp = cmov_window(vals, 5, 'boxcar')
  105. rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
  106. assert_frame_equal(DataFrame(xp), rs)
  107. def test_cmov_window_na_min_periods(self):
  108. tm._skip_if_no_scipy()
  109. try:
  110. from scikits.timeseries.lib import cmov_window
  111. except ImportError:
  112. raise nose.SkipTest("no scikits.timeseries")
  113. # min_periods
  114. vals = Series(np.random.randn(10))
  115. vals[4] = np.nan
  116. vals[8] = np.nan
  117. xp = mom.rolling_mean(vals, 5, min_periods=4, center=True)
  118. rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True)
  119. assert_series_equal(xp, rs)
  120. def test_cmov_window_regular(self):
  121. tm._skip_if_no_scipy()
  122. try:
  123. from scikits.timeseries.lib import cmov_window
  124. except ImportError:
  125. raise nose.SkipTest("no scikits.timeseries")
  126. win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
  127. 'blackmanharris', 'nuttall', 'barthann']
  128. for wt in win_types:
  129. vals = np.random.randn(10)
  130. xp = cmov_window(vals, 5, wt)
  131. rs = mom.rolling_window(Series(vals), 5, wt, center=True)
  132. assert_series_equal(Series(xp), rs)
  133. def test_cmov_window_special(self):
  134. tm._skip_if_no_scipy()
  135. try:
  136. from scikits.timeseries.lib import cmov_window
  137. except ImportError:
  138. raise nose.SkipTest("no scikits.timeseries")
  139. win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
  140. kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
  141. {'width': 0.5}]
  142. for wt, k in zip(win_types, kwds):
  143. vals = np.random.randn(10)
  144. xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
  145. rs = mom.rolling_window(Series(vals), 5, wt, center=True,
  146. **k)
  147. assert_series_equal(Series(xp), rs)
  148. def test_rolling_median(self):
  149. self._check_moment_func(mom.rolling_median, np.median)
  150. def test_rolling_min(self):
  151. self._check_moment_func(mom.rolling_min, np.min)
  152. a = np.array([1, 2, 3, 4, 5])
  153. b = mom.rolling_min(a, window=100, min_periods=1)
  154. assert_almost_equal(b, np.ones(len(a)))
  155. self.assertRaises(ValueError, mom.rolling_min, np.array([1,
  156. 2, 3]), window=3, min_periods=5)
  157. def test_rolling_max(self):
  158. self._check_moment_func(mom.rolling_max, np.max)
  159. a = np.array([1, 2, 3, 4, 5])
  160. b = mom.rolling_max(a, window=100, min_periods=1)
  161. assert_almost_equal(a, b)
  162. self.assertRaises(ValueError, mom.rolling_max, np.array([1,
  163. 2, 3]), window=3, min_periods=5)
  164. def test_rolling_quantile(self):
  165. qs = [.1, .5, .9]
  166. def scoreatpercentile(a, per):
  167. values = np.sort(a, axis=0)
  168. idx = per / 1. * (values.shape[0] - 1)
  169. return values[int(idx)]
  170. for q in qs:
  171. def f(x, window, min_periods=None, freq=None, center=False):
  172. return mom.rolling_quantile(x, window, q,
  173. min_periods=min_periods,
  174. freq=freq,
  175. center=center)
  176. def alt(x):
  177. return scoreatpercentile(x, q)
  178. self._check_moment_func(f, alt)
  179. def test_rolling_apply(self):
  180. ser = Series([])
  181. assert_series_equal(
  182. ser, mom.rolling_apply(ser, 10, lambda x: x.mean()))
  183. def roll_mean(x, window, min_periods=None, freq=None, center=False):
  184. return mom.rolling_apply(x, window,
  185. lambda x: x[np.isfinite(x)].mean(),
  186. min_periods=min_periods,
  187. freq=freq,
  188. center=center)
  189. self._check_moment_func(roll_mean, np.mean)
  190. def test_rolling_apply_out_of_bounds(self):
  191. # #1850
  192. arr = np.arange(4)
  193. # it works!
  194. result = mom.rolling_apply(arr, 10, np.sum)
  195. self.assertTrue(isnull(result).all())
  196. result = mom.rolling_apply(arr, 10, np.sum, min_periods=1)
  197. assert_almost_equal(result, result)
  198. def test_rolling_std(self):
  199. self._check_moment_func(mom.rolling_std,
  200. lambda x: np.std(x, ddof=1))
  201. self._check_moment_func(functools.partial(mom.rolling_std, ddof=0),
  202. lambda x: np.std(x, ddof=0))
  203. def test_rolling_std_1obs(self):
  204. result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]),
  205. 1, min_periods=1)
  206. expected = np.zeros(5)
  207. assert_almost_equal(result, expected)
  208. result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]),
  209. 3, min_periods=2)
  210. self.assertTrue(np.isnan(result[2]))
  211. def test_rolling_std_neg_sqrt(self):
  212. # unit test from Bottleneck
  213. # Test move_nanstd for neg sqrt.
  214. a = np.array([0.0011448196318903589,
  215. 0.00028718669878572767,
  216. 0.00028718669878572767,
  217. 0.00028718669878572767,
  218. 0.00028718669878572767])
  219. b = mom.rolling_std(a, window=3)
  220. self.assertTrue(np.isfinite(b[2:]).all())
  221. b = mom.ewmstd(a, span=3)
  222. self.assertTrue(np.isfinite(b[2:]).all())
  223. def test_rolling_var(self):
  224. self._check_moment_func(mom.rolling_var,
  225. lambda x: np.var(x, ddof=1),
  226. test_stable=True)
  227. self._check_moment_func(functools.partial(mom.rolling_var, ddof=0),
  228. lambda x: np.var(x, ddof=0))
  229. def test_rolling_skew(self):
  230. try:
  231. from scipy.stats import skew
  232. except ImportError:
  233. raise nose.SkipTest('no scipy')
  234. self._check_moment_func(mom.rolling_skew,
  235. lambda x: skew(x, bias=False))
  236. def test_rolling_kurt(self):
  237. try:
  238. from scipy.stats import kurtosis
  239. except ImportError:
  240. raise nose.SkipTest('no scipy')
  241. self._check_moment_func(mom.rolling_kurt,
  242. lambda x: kurtosis(x, bias=False))
  243. def test_fperr_robustness(self):
  244. # TODO: remove this once python 2.5 out of picture
  245. if PY3:
  246. raise nose.SkipTest("doesn't work on python 3")
  247. # #2114
  248. data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>'
  249. arr = np.frombuffer(data, dtype='<f8')
  250. if sys.byteorder != "little":
  251. arr = arr.byteswap().newbyteorder()
  252. result = mom.rolling_sum(arr, 2)
  253. self.assertTrue((result[1:] >= 0).all())
  254. result = mom.rolling_mean(arr, 2)
  255. self.assertTrue((result[1:] >= 0).all())
  256. result = mom.rolling_var(arr, 2)
  257. self.assertTrue((result[1:] >= 0).all())
  258. # #2527, ugh
  259. arr = np.array([0.00012456, 0.0003, 0])
  260. result = mom.rolling_mean(arr, 1)
  261. self.assertTrue(result[-1] >= 0)
  262. result = mom.rolling_mean(-arr, 1)
  263. self.assertTrue(result[-1] <= 0)
  264. def _check_moment_func(self, func, static_comp, window=50,
  265. has_min_periods=True,
  266. has_center=True,
  267. has_time_rule=True,
  268. preserve_nan=True,
  269. fill_value=None,
  270. test_stable=False):
  271. self._check_ndarray(func, static_comp, window=window,
  272. has_min_periods=has_min_periods,
  273. preserve_nan=preserve_nan,
  274. has_center=has_center,
  275. fill_value=fill_value,
  276. test_stable=test_stable)
  277. self._check_structures(func, static_comp,
  278. has_min_periods=has_min_periods,
  279. has_time_rule=has_time_rule,
  280. fill_value=fill_value,
  281. has_center=has_center)
  282. def _check_ndarray(self, func, static_comp, window=50,
  283. has_min_periods=True,
  284. preserve_nan=True,
  285. has_center=True,
  286. fill_value=None,
  287. test_stable=False):
  288. result = func(self.arr, window)
  289. assert_almost_equal(result[-1],
  290. static_comp(self.arr[-50:]))
  291. if preserve_nan:
  292. assert(np.isnan(result[self._nan_locs]).all())
  293. # excluding NaNs correctly
  294. arr = randn(50)
  295. arr[:10] = np.NaN
  296. arr[-10:] = np.NaN
  297. if has_min_periods:
  298. result = func(arr, 50, min_periods=30)
  299. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  300. # min_periods is working correctly
  301. result = func(arr, 20, min_periods=15)
  302. self.assertTrue(np.isnan(result[23]))
  303. self.assertFalse(np.isnan(result[24]))
  304. self.assertFalse(np.isnan(result[-6]))
  305. self.assertTrue(np.isnan(result[-5]))
  306. arr2 = randn(20)
  307. result = func(arr2, 10, min_periods=5)
  308. self.assertTrue(isnull(result[3]))
  309. self.assertTrue(notnull(result[4]))
  310. # min_periods=0
  311. result0 = func(arr, 20, min_periods=0)
  312. result1 = func(arr, 20, min_periods=1)
  313. assert_almost_equal(result0, result1)
  314. else:
  315. result = func(arr, 50)
  316. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  317. if has_center:
  318. if has_min_periods:
  319. result = func(arr, 20, min_periods=15, center=True)
  320. expected = func(arr, 20, min_periods=15)
  321. else:
  322. result = func(arr, 20, center=True)
  323. expected = func(arr, 20)
  324. assert_almost_equal(result[1], expected[10])
  325. if fill_value is None:
  326. self.assertTrue(np.isnan(result[-9:]).all())
  327. else:
  328. self.assertTrue((result[-9:] == 0).all())
  329. if has_min_periods:
  330. self.assertTrue(np.isnan(expected[23]))
  331. self.assertTrue(np.isnan(result[14]))
  332. self.assertTrue(np.isnan(expected[-5]))
  333. self.assertTrue(np.isnan(result[-14]))
  334. if test_stable:
  335. result = func(self.arr + 1e9, window)
  336. assert_almost_equal(result[-1],
  337. static_comp(self.arr[-50:] + 1e9))
  338. def _check_structures(self, func, static_comp,
  339. has_min_periods=True, has_time_rule=True,
  340. has_center=True,
  341. fill_value=None):
  342. series_result = func(self.series, 50)
  343. tm.assert_isinstance(series_result, Series)
  344. frame_result = func(self.frame, 50)
  345. self.assertEqual(type(frame_result), DataFrame)
  346. # check time_rule works
  347. if has_time_rule:
  348. win = 25
  349. minp = 10
  350. if has_min_periods:
  351. series_result = func(self.series[::2], win, min_periods=minp,
  352. freq='B')
  353. frame_result = func(self.frame[::2], win, min_periods=minp,
  354. freq='B')
  355. else:
  356. series_result = func(self.series[::2], win, freq='B')
  357. frame_result = func(self.frame[::2], win, freq='B')
  358. last_date = series_result.index[-1]
  359. prev_date = last_date - 24 * datetools.bday
  360. trunc_series = self.series[::2].truncate(prev_date, last_date)
  361. trunc_frame = self.frame[::2].truncate(prev_date, last_date)
  362. assert_almost_equal(series_result[-1], static_comp(trunc_series))
  363. assert_almost_equal(frame_result.xs(last_date),
  364. trunc_frame.apply(static_comp))
  365. if has_center:
  366. if has_min_periods:
  367. minp = 10
  368. series_xp = func(self.series, 25, min_periods=minp).shift(-12)
  369. frame_xp = func(self.frame, 25, min_periods=minp).shift(-12)
  370. series_rs = func(self.series, 25, min_periods=minp,
  371. center=True)
  372. frame_rs = func(self.frame, 25, min_periods=minp,
  373. center=True)
  374. else:
  375. series_xp = func(self.series, 25).shift(-12)
  376. frame_xp = func(self.frame, 25).shift(-12)
  377. series_rs = func(self.series, 25, center=True)
  378. frame_rs = func(self.frame, 25, center=True)
  379. if fill_value is not None:
  380. series_xp = series_xp.fillna(fill_value)
  381. frame_xp = frame_xp.fillna(fill_value)
  382. assert_series_equal(series_xp, series_rs)
  383. assert_frame_equal(frame_xp, frame_rs)
  384. def test_ewma(self):
  385. self._check_ew(mom.ewma)
  386. arr = np.zeros(1000)
  387. arr[5] = 1
  388. result = mom.ewma(arr, span=100, adjust=False).sum()
  389. self.assertTrue(np.abs(result - 1) < 1e-2)
  390. def test_ewma_nan_handling(self):
  391. s = Series([1.] + [np.nan] * 5 + [1.])
  392. result = mom.ewma(s, com=5)
  393. assert_almost_equal(result, [1] * len(s))
  394. def test_ewmvar(self):
  395. self._check_ew(mom.ewmvar)
  396. def test_ewmvol(self):
  397. self._check_ew(mom.ewmvol)
  398. def test_ewma_span_com_args(self):
  399. A = mom.ewma(self.arr, com=9.5)
  400. B = mom.ewma(self.arr, span=20)
  401. assert_almost_equal(A, B)
  402. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
  403. self.assertRaises(Exception, mom.ewma, self.arr)
  404. def test_ewma_halflife_arg(self):
  405. A = mom.ewma(self.arr, com=13.932726172912965)
  406. B = mom.ewma(self.arr, halflife=10.0)
  407. assert_almost_equal(A, B)
  408. self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50)
  409. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50)
  410. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50)
  411. self.assertRaises(Exception, mom.ewma, self.arr)
  412. def test_ew_empty_arrays(self):
  413. arr = np.array([], dtype=np.float64)
  414. funcs = [mom.ewma, mom.ewmvol, mom.ewmvar]
  415. for f in funcs:
  416. result = f(arr, 3)
  417. assert_almost_equal(result, arr)
  418. def _check_ew(self, func):
  419. self._check_ew_ndarray(func)
  420. self._check_ew_structures(func)
  421. def _check_ew_ndarray(self, func, preserve_nan=False):
  422. result = func(self.arr, com=10)
  423. if preserve_nan:
  424. assert(np.isnan(result[self._nan_locs]).all())
  425. # excluding NaNs correctly
  426. arr = randn(50)
  427. arr[:10] = np.NaN
  428. arr[-10:] = np.NaN
  429. # ??? check something
  430. # pass in ints
  431. result2 = func(np.arange(50), span=10)
  432. self.assertEqual(result2.dtype, np.float_)
  433. def _check_ew_structures(self, func):
  434. series_result = func(self.series, com=10)
  435. tm.assert_isinstance(series_result, Series)
  436. frame_result = func(self.frame, com=10)
  437. self.assertEqual(type(frame_result), DataFrame)
  438. # binary moments
  439. def test_rolling_cov(self):
  440. A = self.series
  441. B = A + randn(len(A))
  442. result = mom.rolling_cov(A, B, 50, min_periods=25)
  443. assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
  444. def test_rolling_cov_pairwise(self):
  445. self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5)
  446. def test_rolling_corr(self):
  447. A = self.series
  448. B = A + randn(len(A))
  449. result = mom.rolling_corr(A, B, 50, min_periods=25)
  450. assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
  451. # test for correct bias correction
  452. a = tm.makeTimeSeries()
  453. b = tm.makeTimeSeries()
  454. a[:5] = np.nan
  455. b[:10] = np.nan
  456. result = mom.rolling_corr(a, b, len(a), min_periods=1)
  457. assert_almost_equal(result[-1], a.corr(b))
  458. def test_rolling_corr_pairwise(self):
  459. self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5)
  460. def _check_pairwise_moment(self, func, *args, **kwargs):
  461. panel = func(self.frame, *args, **kwargs)
  462. actual = panel.ix[:, 1, 5]
  463. expected = func(self.frame[1], self.frame[5], *args, **kwargs)
  464. tm.assert_series_equal(actual, expected)
  465. def test_flex_binary_moment(self):
  466. # GH3155
  467. # don't blow the stack
  468. self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None)
  469. def test_corr_sanity(self):
  470. #GH 3155
  471. df = DataFrame(
  472. np.array(
  473. [[ 0.87024726, 0.18505595],
  474. [ 0.64355431, 0.3091617 ],
  475. [ 0.92372966, 0.50552513],
  476. [ 0.00203756, 0.04520709],
  477. [ 0.84780328, 0.33394331],
  478. [ 0.78369152, 0.63919667]])
  479. )
  480. res = mom.rolling_corr(df[0],df[1],5,center=True)
  481. self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
  482. # and some fuzzing
  483. for i in range(10):
  484. df = DataFrame(np.random.rand(30,2))
  485. res = mom.rolling_corr(df[0],df[1],5,center=True)
  486. try:
  487. self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
  488. except:
  489. print(res)
  490. def test_flex_binary_frame(self):
  491. def _check(method):
  492. series = self.frame[1]
  493. res = method(series, self.frame, 10)
  494. res2 = method(self.frame, series, 10)
  495. exp = self.frame.apply(lambda x: method(series, x, 10))
  496. tm.assert_frame_equal(res, exp)
  497. tm.assert_frame_equal(res2, exp)
  498. frame2 = self.frame.copy()
  499. frame2.values[:] = np.random.randn(*frame2.shape)
  500. res3 = method(self.frame, frame2, 10)
  501. exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
  502. for k in self.frame))
  503. tm.assert_frame_equal(res3, exp)
  504. methods = [mom.rolling_corr, mom.rolling_cov]
  505. for meth in methods:
  506. _check(meth)
  507. def test_ewmcov(self):
  508. self._check_binary_ew(mom.ewmcov)
  509. def test_ewmcov_pairwise(self):
  510. self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5)
  511. def test_ewmcorr(self):
  512. self._check_binary_ew(mom.ewmcorr)
  513. def test_ewmcorr_pairwise(self):
  514. self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5)
  515. def _check_binary_ew(self, func):
  516. A = Series(randn(50), index=np.arange(50))
  517. B = A[2:] + randn(48)
  518. A[:10] = np.NaN
  519. B[-10:] = np.NaN
  520. result = func(A, B, 20, min_periods=5)
  521. self.assertTrue(np.isnan(result.values[:15]).all())
  522. self.assertFalse(np.isnan(result.values[15:]).any())
  523. self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
  524. def test_expanding_apply(self):
  525. ser = Series([])
  526. assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))
  527. def expanding_mean(x, min_periods=1, freq=None):
  528. return mom.expanding_apply(x,
  529. lambda x: x.mean(),
  530. min_periods=min_periods,
  531. freq=freq)
  532. self._check_expanding(expanding_mean, np.mean)
  533. def test_expanding_apply_args_kwargs(self):
  534. def mean_w_arg(x, const):
  535. return np.mean(x) + const
  536. df = DataFrame(np.random.rand(20, 3))
  537. expected = mom.expanding_apply(df, np.mean) + 20.
  538. assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)),
  539. expected)
  540. assert_frame_equal(mom.expanding_apply(df, mean_w_arg,
  541. kwargs={'const' : 20}),
  542. expected)
  543. def test_expanding_corr(self):
  544. A = self.series.dropna()
  545. B = (A + randn(len(A)))[:-5]
  546. result = mom.expanding_corr(A, B)
  547. rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)
  548. assert_almost_equal(rolling_result, result)
  549. def test_expanding_count(self):
  550. result = mom.expanding_count(self.series)
  551. assert_almost_equal(result, mom.rolling_count(self.series,
  552. len(self.series)))
  553. def test_expanding_quantile(self):
  554. result = mom.expanding_quantile(self.series, 0.5)
  555. rolling_result = mom.rolling_quantile(self.series,
  556. len(self.series),
  557. 0.5, min_periods=1)
  558. assert_almost_equal(result, rolling_result)
  559. def test_expanding_cov(self):
  560. A = self.series
  561. B = (A + randn(len(A)))[:-5]
  562. result = mom.expanding_cov(A, B)
  563. rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)
  564. assert_almost_equal(rolling_result, result)
  565. def test_expanding_max(self):
  566. self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)
  567. def test_expanding_cov_pairwise(self):
  568. result = mom.expanding_cov(self.frame)
  569. rolling_result = mom.rolling_cov(self.frame, len(self.frame),
  570. min_periods=1)
  571. for i in result.items:
  572. assert_almost_equal(result[i], rolling_result[i])
  573. def test_expanding_corr_pairwise(self):
  574. result = mom.expanding_corr(self.frame)
  575. rolling_result = mom.rolling_corr(self.frame, len(self.frame),
  576. min_periods=1)
  577. for i in result.items:
  578. assert_almost_equal(result[i], rolling_result[i])
  579. def test_rolling_skew_edge_cases(self):
  580. all_nan = Series([np.NaN] * 5)
  581. # yields all NaN (0 variance)
  582. d = Series([1] * 5)
  583. x = mom.rolling_skew(d, window=5)
  584. assert_series_equal(all_nan, x)
  585. # yields all NaN (window too small)
  586. d = Series(np.random.randn(5))
  587. x = mom.rolling_skew(d, window=2)
  588. assert_series_equal(all_nan, x)
  589. # yields [NaN, NaN, NaN, 0.177994, 1.548824]
  590. d = Series([-1.50837035, -0.1297039 , 0.19501095,
  591. 1.73508164, 0.41941401])
  592. expected = Series([np.NaN, np.NaN, np.NaN,
  593. 0.177994, 1.548824])
  594. x = mom.rolling_skew(d, window=4)
  595. assert_series_equal(expected, x)
  596. def test_rolling_kurt_edge_cases(self):
  597. all_nan = Series([np.NaN] * 5)
  598. # yields all NaN (0 variance)
  599. d = Series([1] * 5)
  600. x = mom.rolling_kurt(d, window=5)
  601. assert_series_equal(all_nan, x)
  602. # yields all NaN (window too small)
  603. d = Series(np.random.randn(5))
  604. x = mom.rolling_kurt(d, window=3)
  605. assert_series_equal(all_nan, x)
  606. # yields [NaN, NaN, NaN, 1.224307, 2.671499]
  607. d = Series([-1.50837035, -0.1297039 , 0.19501095,
  608. 1.73508164, 0.41941401])
  609. expected = Series([np.NaN, np.NaN, np.NaN,
  610. 1.224307, 2.671499])
  611. x = mom.rolling_kurt(d, window=4)
  612. assert_series_equal(expected, x)
  613. def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
  614. has_time_rule=True, preserve_nan=True):
  615. result = func(self.arr)
  616. assert_almost_equal(result[10],
  617. static_comp(self.arr[:11]))
  618. if preserve_nan:
  619. assert(np.isnan(result[self._nan_locs]).all())
  620. arr = randn(50)
  621. if has_min_periods:
  622. result = func(arr, min_periods=30)
  623. assert(np.isnan(result[:29]).all())
  624. assert_almost_equal(result[-1], static_comp(arr[:50]))
  625. # min_periods is working correctly
  626. result = func(arr, min_periods=15)
  627. self.assertTrue(np.isnan(result[13]))
  628. self.assertFalse(np.isnan(result[14]))
  629. arr2 = randn(20)
  630. result = func(arr2, min_periods=5)
  631. self.assertTrue(isnull(result[3]))
  632. self.assertTrue(notnull(result[4]))
  633. # min_periods=0
  634. result0 = func(arr, min_periods=0)
  635. result1 = func(arr, min_periods=1)
  636. assert_almost_equal(result0, result1)
  637. else:
  638. result = func(arr)
  639. assert_almost_equal(result[-1], static_comp(arr[:50]))
  640. def _check_expanding_structures(self, func):
  641. series_result = func(self.series)
  642. tm.assert_isinstance(series_result, Series)
  643. frame_result = func(self.frame)
  644. self.assertEqual(type(frame_result), DataFrame)
  645. def _check_expanding(self, func, static_comp, has_min_periods=True,
  646. has_time_rule=True,
  647. preserve_nan=True):
  648. self._check_expanding_ndarray(func, static_comp,
  649. has_min_periods=has_min_periods,
  650. has_time_rule=has_time_rule,
  651. preserve_nan=preserve_nan)
  652. self._check_expanding_structures(func)
  653. def test_rolling_max_gh6297(self):
  654. """Replicate result expected in GH #6297"""
  655. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  656. # So that we can have 2 datapoints on one of the days
  657. indices.append(datetime(1975, 1, 3, 6, 0))
  658. series = Series(range(1, 7), index=indices)
  659. # Use floats instead of ints as values
  660. series = series.map(lambda x: float(x))
  661. # Sort chronologically
  662. series = series.sort_index()
  663. expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
  664. index=[datetime(1975, 1, i, 0)
  665. for i in range(1, 6)])
  666. x = mom.rolling_max(series, window=1, freq='D')
  667. assert_series_equal(expected, x)
  668. def test_rolling_max_how_resample(self):
  669. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  670. # So that we can have 3 datapoints on last day (4, 10, and 20)
  671. indices.append(datetime(1975, 1, 5, 1))
  672. indices.append(datetime(1975, 1, 5, 2))
  673. series = Series(list(range(0, 5)) + [10, 20], index=indices)
  674. # Use floats instead of ints as values
  675. series = series.map(lambda x: float(x))
  676. # Sort chronologically
  677. series = series.sort_index()
  678. # Default how should be max
  679. expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
  680. index=[datetime(1975, 1, i, 0)
  681. for i in range(1, 6)])
  682. x = mom.rolling_max(series, window=1, freq='D')
  683. assert_series_equal(expected, x)
  684. # Now specify median (10.0)
  685. expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
  686. index=[datetime(1975, 1, i, 0)
  687. for i in range(1, 6)])
  688. x = mom.rolling_max(series, window=1, freq='D', how='median')
  689. assert_series_equal(expected, x)
  690. # Now specify mean (4+10+20)/3
  691. v = (4.0+10.0+20.0)/3.0
  692. expected = Series([0.0, 1.0, 2.0, 3.0, v],
  693. index=[datetime(1975, 1, i, 0)
  694. for i in range(1, 6)])
  695. x = mom.rolling_max(series, window=1, freq='D', how='mean')
  696. assert_series_equal(expected, x)
  697. def test_rolling_min_how_resample(self):
  698. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  699. # So that we can have 3 datapoints on last day (4, 10, and 20)
  700. indices.append(datetime(1975, 1, 5, 1))
  701. indices.append(datetime(1975, 1, 5, 2))
  702. series = Series(list(range(0, 5)) + [10, 20], index=indices)
  703. # Use floats instead of ints as values
  704. series = series.map(lambda x: float(x))
  705. # Sort chronologically
  706. series = series.sort_index()
  707. # Default how should be min
  708. expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
  709. index=[datetime(1975, 1, i, 0)
  710. for i in range(1, 6)])
  711. x = mom.rolling_min(series, window=1, freq='D')
  712. assert_series_equal(expected, x)
  713. def test_rolling_median_how_resample(self):
  714. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  715. # So that we can have 3 datapoints on last day (4, 10, and 20)
  716. indices.append(datetime(1975, 1, 5, 1))
  717. indices.append(datetime(1975, 1, 5, 2))
  718. series = Series(list(range(0, 5)) + [10, 20], index=indices)
  719. # Use floats instead of ints as values
  720. series = series.map(lambda x: float(x))
  721. # Sort chronologically
  722. series = series.sort_index()
  723. # Default how should be median
  724. expected = Series([0.0, 1.0, 2.0, 3.0, 10],
  725. index=[datetime(1975, 1, i, 0)
  726. for i in range(1, 6)])
  727. x = mom.rolling_median(series, window=1, freq='D')
  728. assert_series_equal(expected, x)
  729. if __name__ == '__main__':
  730. import nose
  731. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  732. exit=False)