PageRenderTime 59ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/stats/tests/test_moments.py

https://github.com/ajcr/pandas
Python | 950 lines | 677 code | 218 blank | 55 comment | 64 complexity | 7655c4f8580db20c45dd0bb8353c3dd6 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import nose
  2. import sys
  3. import functools
  4. from datetime import datetime
  5. from numpy.random import randn
  6. import numpy as np
  7. from pandas import Series, DataFrame, bdate_range, isnull, notnull
  8. from pandas.util.testing import (
  9. assert_almost_equal, assert_series_equal, assert_frame_equal
  10. )
  11. import pandas.core.datetools as datetools
  12. import pandas.stats.moments as mom
  13. import pandas.util.testing as tm
  14. from pandas.compat import range, zip, PY3, StringIO
  15. N, K = 100, 10
  16. class TestMoments(tm.TestCase):
  17. _multiprocess_can_split_ = True
  18. _nan_locs = np.arange(20, 40)
  19. _inf_locs = np.array([])
  20. def setUp(self):
  21. arr = randn(N)
  22. arr[self._nan_locs] = np.NaN
  23. self.arr = arr
  24. self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
  25. self.series = Series(arr.copy(), index=self.rng)
  26. self.frame = DataFrame(randn(N, K), index=self.rng,
  27. columns=np.arange(K))
  28. def test_centered_axis_validation(self):
  29. # ok
  30. mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0)
  31. # bad axis
  32. self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1)
  33. # ok ok
  34. mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0)
  35. mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1)
  36. # bad axis
  37. self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2)
  38. def test_rolling_sum(self):
  39. self._check_moment_func(mom.rolling_sum, np.sum)
  40. def test_rolling_count(self):
  41. counter = lambda x: np.isfinite(x).astype(float).sum()
  42. self._check_moment_func(mom.rolling_count, counter,
  43. has_min_periods=False,
  44. preserve_nan=False,
  45. fill_value=0)
  46. def test_rolling_mean(self):
  47. self._check_moment_func(mom.rolling_mean, np.mean)
  48. def test_cmov_mean(self):
  49. tm._skip_if_no_scipy()
  50. try:
  51. from scikits.timeseries.lib import cmov_mean
  52. except ImportError:
  53. raise nose.SkipTest("no scikits.timeseries")
  54. vals = np.random.randn(10)
  55. xp = cmov_mean(vals, 5)
  56. rs = mom.rolling_mean(vals, 5, center=True)
  57. assert_almost_equal(xp.compressed(), rs[2:-2])
  58. assert_almost_equal(xp.mask, np.isnan(rs))
  59. xp = Series(rs)
  60. rs = mom.rolling_mean(Series(vals), 5, center=True)
  61. assert_series_equal(xp, rs)
  62. def test_cmov_window(self):
  63. tm._skip_if_no_scipy()
  64. try:
  65. from scikits.timeseries.lib import cmov_window
  66. except ImportError:
  67. raise nose.SkipTest("no scikits.timeseries")
  68. vals = np.random.randn(10)
  69. xp = cmov_window(vals, 5, 'boxcar')
  70. rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
  71. assert_almost_equal(xp.compressed(), rs[2:-2])
  72. assert_almost_equal(xp.mask, np.isnan(rs))
  73. xp = Series(rs)
  74. rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
  75. assert_series_equal(xp, rs)
  76. def test_cmov_window_corner(self):
  77. tm._skip_if_no_scipy()
  78. try:
  79. from scikits.timeseries.lib import cmov_window
  80. except ImportError:
  81. raise nose.SkipTest("no scikits.timeseries")
  82. # all nan
  83. vals = np.empty(10, dtype=float)
  84. vals.fill(np.nan)
  85. rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
  86. self.assertTrue(np.isnan(rs).all())
  87. # empty
  88. vals = np.array([])
  89. rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
  90. self.assertEqual(len(rs), 0)
  91. # shorter than window
  92. vals = np.random.randn(5)
  93. rs = mom.rolling_window(vals, 10, 'boxcar')
  94. self.assertTrue(np.isnan(rs).all())
  95. self.assertEqual(len(rs), 5)
  96. def test_cmov_window_frame(self):
  97. tm._skip_if_no_scipy()
  98. try:
  99. from scikits.timeseries.lib import cmov_window
  100. except ImportError:
  101. raise nose.SkipTest("no scikits.timeseries")
  102. # DataFrame
  103. vals = np.random.randn(10, 2)
  104. xp = cmov_window(vals, 5, 'boxcar')
  105. rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
  106. assert_frame_equal(DataFrame(xp), rs)
  107. def test_cmov_window_na_min_periods(self):
  108. tm._skip_if_no_scipy()
  109. try:
  110. from scikits.timeseries.lib import cmov_window
  111. except ImportError:
  112. raise nose.SkipTest("no scikits.timeseries")
  113. # min_periods
  114. vals = Series(np.random.randn(10))
  115. vals[4] = np.nan
  116. vals[8] = np.nan
  117. xp = mom.rolling_mean(vals, 5, min_periods=4, center=True)
  118. rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True)
  119. assert_series_equal(xp, rs)
  120. def test_cmov_window_regular(self):
  121. tm._skip_if_no_scipy()
  122. try:
  123. from scikits.timeseries.lib import cmov_window
  124. except ImportError:
  125. raise nose.SkipTest("no scikits.timeseries")
  126. win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
  127. 'blackmanharris', 'nuttall', 'barthann']
  128. for wt in win_types:
  129. vals = np.random.randn(10)
  130. xp = cmov_window(vals, 5, wt)
  131. rs = mom.rolling_window(Series(vals), 5, wt, center=True)
  132. assert_series_equal(Series(xp), rs)
  133. def test_cmov_window_special(self):
  134. tm._skip_if_no_scipy()
  135. try:
  136. from scikits.timeseries.lib import cmov_window
  137. except ImportError:
  138. raise nose.SkipTest("no scikits.timeseries")
  139. win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
  140. kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
  141. {'width': 0.5}]
  142. for wt, k in zip(win_types, kwds):
  143. vals = np.random.randn(10)
  144. xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
  145. rs = mom.rolling_window(Series(vals), 5, wt, center=True,
  146. **k)
  147. assert_series_equal(Series(xp), rs)
  148. def test_rolling_median(self):
  149. self._check_moment_func(mom.rolling_median, np.median)
  150. def test_rolling_min(self):
  151. self._check_moment_func(mom.rolling_min, np.min)
  152. a = np.array([1, 2, 3, 4, 5])
  153. b = mom.rolling_min(a, window=100, min_periods=1)
  154. assert_almost_equal(b, np.ones(len(a)))
  155. self.assertRaises(ValueError, mom.rolling_min, np.array([1,
  156. 2, 3]), window=3, min_periods=5)
  157. def test_rolling_max(self):
  158. self._check_moment_func(mom.rolling_max, np.max)
  159. a = np.array([1, 2, 3, 4, 5])
  160. b = mom.rolling_max(a, window=100, min_periods=1)
  161. assert_almost_equal(a, b)
  162. self.assertRaises(ValueError, mom.rolling_max, np.array([1,
  163. 2, 3]), window=3, min_periods=5)
  164. def test_rolling_quantile(self):
  165. qs = [.1, .5, .9]
  166. def scoreatpercentile(a, per):
  167. values = np.sort(a, axis=0)
  168. idx = per / 1. * (values.shape[0] - 1)
  169. return values[int(idx)]
  170. for q in qs:
  171. def f(x, window, min_periods=None, freq=None, center=False):
  172. return mom.rolling_quantile(x, window, q,
  173. min_periods=min_periods,
  174. freq=freq,
  175. center=center)
  176. def alt(x):
  177. return scoreatpercentile(x, q)
  178. self._check_moment_func(f, alt)
  179. def test_rolling_apply(self):
  180. ser = Series([])
  181. assert_series_equal(
  182. ser, mom.rolling_apply(ser, 10, lambda x: x.mean()))
  183. def roll_mean(x, window, min_periods=None, freq=None, center=False):
  184. return mom.rolling_apply(x, window,
  185. lambda x: x[np.isfinite(x)].mean(),
  186. min_periods=min_periods,
  187. freq=freq,
  188. center=center)
  189. self._check_moment_func(roll_mean, np.mean)
  190. def test_rolling_apply_out_of_bounds(self):
  191. # #1850
  192. arr = np.arange(4)
  193. # it works!
  194. result = mom.rolling_apply(arr, 10, np.sum)
  195. self.assertTrue(isnull(result).all())
  196. result = mom.rolling_apply(arr, 10, np.sum, min_periods=1)
  197. assert_almost_equal(result, result)
  198. def test_rolling_std(self):
  199. self._check_moment_func(mom.rolling_std,
  200. lambda x: np.std(x, ddof=1))
  201. self._check_moment_func(functools.partial(mom.rolling_std, ddof=0),
  202. lambda x: np.std(x, ddof=0))
  203. def test_rolling_std_1obs(self):
  204. result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]),
  205. 1, min_periods=1)
  206. expected = np.zeros(5)
  207. assert_almost_equal(result, expected)
  208. result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]),
  209. 3, min_periods=2)
  210. self.assertTrue(np.isnan(result[2]))
  211. def test_rolling_std_neg_sqrt(self):
  212. # unit test from Bottleneck
  213. # Test move_nanstd for neg sqrt.
  214. a = np.array([0.0011448196318903589,
  215. 0.00028718669878572767,
  216. 0.00028718669878572767,
  217. 0.00028718669878572767,
  218. 0.00028718669878572767])
  219. b = mom.rolling_std(a, window=3)
  220. self.assertTrue(np.isfinite(b[2:]).all())
  221. b = mom.ewmstd(a, span=3)
  222. self.assertTrue(np.isfinite(b[2:]).all())
  223. def test_rolling_var(self):
  224. self._check_moment_func(mom.rolling_var,
  225. lambda x: np.var(x, ddof=1),
  226. test_stable=True)
  227. self._check_moment_func(functools.partial(mom.rolling_var, ddof=0),
  228. lambda x: np.var(x, ddof=0))
  229. def test_rolling_skew(self):
  230. try:
  231. from scipy.stats import skew
  232. except ImportError:
  233. raise nose.SkipTest('no scipy')
  234. self._check_moment_func(mom.rolling_skew,
  235. lambda x: skew(x, bias=False))
  236. def test_rolling_kurt(self):
  237. try:
  238. from scipy.stats import kurtosis
  239. except ImportError:
  240. raise nose.SkipTest('no scipy')
  241. self._check_moment_func(mom.rolling_kurt,
  242. lambda x: kurtosis(x, bias=False))
  243. def test_fperr_robustness(self):
  244. # TODO: remove this once python 2.5 out of picture
  245. if PY3:
  246. raise nose.SkipTest("doesn't work on python 3")
  247. # #2114
  248. data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>'
  249. arr = np.frombuffer(data, dtype='<f8')
  250. if sys.byteorder != "little":
  251. arr = arr.byteswap().newbyteorder()
  252. result = mom.rolling_sum(arr, 2)
  253. self.assertTrue((result[1:] >= 0).all())
  254. result = mom.rolling_mean(arr, 2)
  255. self.assertTrue((result[1:] >= 0).all())
  256. result = mom.rolling_var(arr, 2)
  257. self.assertTrue((result[1:] >= 0).all())
  258. # #2527, ugh
  259. arr = np.array([0.00012456, 0.0003, 0])
  260. result = mom.rolling_mean(arr, 1)
  261. self.assertTrue(result[-1] >= 0)
  262. result = mom.rolling_mean(-arr, 1)
  263. self.assertTrue(result[-1] <= 0)
  264. def _check_moment_func(self, func, static_comp, window=50,
  265. has_min_periods=True,
  266. has_center=True,
  267. has_time_rule=True,
  268. preserve_nan=True,
  269. fill_value=None,
  270. test_stable=False):
  271. self._check_ndarray(func, static_comp, window=window,
  272. has_min_periods=has_min_periods,
  273. preserve_nan=preserve_nan,
  274. has_center=has_center,
  275. fill_value=fill_value,
  276. test_stable=test_stable)
  277. self._check_structures(func, static_comp,
  278. has_min_periods=has_min_periods,
  279. has_time_rule=has_time_rule,
  280. fill_value=fill_value,
  281. has_center=has_center)
  282. def _check_ndarray(self, func, static_comp, window=50,
  283. has_min_periods=True,
  284. preserve_nan=True,
  285. has_center=True,
  286. fill_value=None,
  287. test_stable=False):
  288. result = func(self.arr, window)
  289. assert_almost_equal(result[-1],
  290. static_comp(self.arr[-50:]))
  291. if preserve_nan:
  292. assert(np.isnan(result[self._nan_locs]).all())
  293. # excluding NaNs correctly
  294. arr = randn(50)
  295. arr[:10] = np.NaN
  296. arr[-10:] = np.NaN
  297. if has_min_periods:
  298. result = func(arr, 50, min_periods=30)
  299. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  300. # min_periods is working correctly
  301. result = func(arr, 20, min_periods=15)
  302. self.assertTrue(np.isnan(result[23]))
  303. self.assertFalse(np.isnan(result[24]))
  304. self.assertFalse(np.isnan(result[-6]))
  305. self.assertTrue(np.isnan(result[-5]))
  306. arr2 = randn(20)
  307. result = func(arr2, 10, min_periods=5)
  308. self.assertTrue(isnull(result[3]))
  309. self.assertTrue(notnull(result[4]))
  310. # min_periods=0
  311. result0 = func(arr, 20, min_periods=0)
  312. result1 = func(arr, 20, min_periods=1)
  313. assert_almost_equal(result0, result1)
  314. else:
  315. result = func(arr, 50)
  316. assert_almost_equal(result[-1], static_comp(arr[10:-10]))
  317. if has_center:
  318. if has_min_periods:
  319. result = func(arr, 20, min_periods=15, center=True)
  320. expected = func(arr, 20, min_periods=15)
  321. else:
  322. result = func(arr, 20, center=True)
  323. expected = func(arr, 20)
  324. assert_almost_equal(result[1], expected[10])
  325. if fill_value is None:
  326. self.assertTrue(np.isnan(result[-9:]).all())
  327. else:
  328. self.assertTrue((result[-9:] == 0).all())
  329. if has_min_periods:
  330. self.assertTrue(np.isnan(expected[23]))
  331. self.assertTrue(np.isnan(result[14]))
  332. self.assertTrue(np.isnan(expected[-5]))
  333. self.assertTrue(np.isnan(result[-14]))
  334. if test_stable:
  335. result = func(self.arr + 1e9, window)
  336. assert_almost_equal(result[-1],
  337. static_comp(self.arr[-50:] + 1e9))
  338. def _check_structures(self, func, static_comp,
  339. has_min_periods=True, has_time_rule=True,
  340. has_center=True,
  341. fill_value=None):
  342. series_result = func(self.series, 50)
  343. tm.assert_isinstance(series_result, Series)
  344. frame_result = func(self.frame, 50)
  345. self.assertEqual(type(frame_result), DataFrame)
  346. # check time_rule works
  347. if has_time_rule:
  348. win = 25
  349. minp = 10
  350. if has_min_periods:
  351. series_result = func(self.series[::2], win, min_periods=minp,
  352. freq='B')
  353. frame_result = func(self.frame[::2], win, min_periods=minp,
  354. freq='B')
  355. else:
  356. series_result = func(self.series[::2], win, freq='B')
  357. frame_result = func(self.frame[::2], win, freq='B')
  358. last_date = series_result.index[-1]
  359. prev_date = last_date - 24 * datetools.bday
  360. trunc_series = self.series[::2].truncate(prev_date, last_date)
  361. trunc_frame = self.frame[::2].truncate(prev_date, last_date)
  362. assert_almost_equal(series_result[-1], static_comp(trunc_series))
  363. assert_almost_equal(frame_result.xs(last_date),
  364. trunc_frame.apply(static_comp))
  365. if has_center:
  366. if has_min_periods:
  367. minp = 10
  368. series_xp = func(self.series, 25, min_periods=minp).shift(-12)
  369. frame_xp = func(self.frame, 25, min_periods=minp).shift(-12)
  370. series_rs = func(self.series, 25, min_periods=minp,
  371. center=True)
  372. frame_rs = func(self.frame, 25, min_periods=minp,
  373. center=True)
  374. else:
  375. series_xp = func(self.series, 25).shift(-12)
  376. frame_xp = func(self.frame, 25).shift(-12)
  377. series_rs = func(self.series, 25, center=True)
  378. frame_rs = func(self.frame, 25, center=True)
  379. if fill_value is not None:
  380. series_xp = series_xp.fillna(fill_value)
  381. frame_xp = frame_xp.fillna(fill_value)
  382. assert_series_equal(series_xp, series_rs)
  383. assert_frame_equal(frame_xp, frame_rs)
  384. def test_ewma(self):
  385. self._check_ew(mom.ewma)
  386. arr = np.zeros(1000)
  387. arr[5] = 1
  388. result = mom.ewma(arr, span=100, adjust=False).sum()
  389. self.assertTrue(np.abs(result - 1) < 1e-2)
  390. def test_ewma_nan_handling(self):
  391. s = Series([1.] + [np.nan] * 5 + [1.])
  392. result = mom.ewma(s, com=5)
  393. assert_almost_equal(result, [1] * len(s))
  394. def test_ewmvar(self):
  395. self._check_ew(mom.ewmvar)
  396. def test_ewmvol(self):
  397. self._check_ew(mom.ewmvol)
  398. def test_ewma_span_com_args(self):
  399. A = mom.ewma(self.arr, com=9.5)
  400. B = mom.ewma(self.arr, span=20)
  401. assert_almost_equal(A, B)
  402. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
  403. self.assertRaises(Exception, mom.ewma, self.arr)
  404. def test_ewma_halflife_arg(self):
  405. A = mom.ewma(self.arr, com=13.932726172912965)
  406. B = mom.ewma(self.arr, halflife=10.0)
  407. assert_almost_equal(A, B)
  408. self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50)
  409. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50)
  410. self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50)
  411. self.assertRaises(Exception, mom.ewma, self.arr)
  412. def test_ew_empty_arrays(self):
  413. arr = np.array([], dtype=np.float64)
  414. funcs = [mom.ewma, mom.ewmvol, mom.ewmvar]
  415. for f in funcs:
  416. result = f(arr, 3)
  417. assert_almost_equal(result, arr)
  418. def _check_ew(self, func):
  419. self._check_ew_ndarray(func)
  420. self._check_ew_structures(func)
  421. def _check_ew_ndarray(self, func, preserve_nan=False):
  422. result = func(self.arr, com=10)
  423. if preserve_nan:
  424. assert(np.isnan(result[self._nan_locs]).all())
  425. # excluding NaNs correctly
  426. arr = randn(50)
  427. arr[:10] = np.NaN
  428. arr[-10:] = np.NaN
  429. # ??? check something
  430. # pass in ints
  431. result2 = func(np.arange(50), span=10)
  432. self.assertEqual(result2.dtype, np.float_)
  433. def _check_ew_structures(self, func):
  434. series_result = func(self.series, com=10)
  435. tm.assert_isinstance(series_result, Series)
  436. frame_result = func(self.frame, com=10)
  437. self.assertEqual(type(frame_result), DataFrame)
  438. # binary moments
  439. def test_rolling_cov(self):
  440. A = self.series
  441. B = A + randn(len(A))
  442. result = mom.rolling_cov(A, B, 50, min_periods=25)
  443. assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
  444. def test_rolling_cov_pairwise(self):
  445. self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5)
  446. def test_rolling_corr(self):
  447. A = self.series
  448. B = A + randn(len(A))
  449. result = mom.rolling_corr(A, B, 50, min_periods=25)
  450. assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
  451. # test for correct bias correction
  452. a = tm.makeTimeSeries()
  453. b = tm.makeTimeSeries()
  454. a[:5] = np.nan
  455. b[:10] = np.nan
  456. result = mom.rolling_corr(a, b, len(a), min_periods=1)
  457. assert_almost_equal(result[-1], a.corr(b))
  458. def test_rolling_corr_pairwise(self):
  459. self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5)
  460. def _check_pairwise_moment(self, func, *args, **kwargs):
  461. panel = func(self.frame, *args, **kwargs)
  462. actual = panel.ix[:, 1, 5]
  463. expected = func(self.frame[1], self.frame[5], *args, **kwargs)
  464. tm.assert_series_equal(actual, expected)
  465. def test_flex_binary_moment(self):
  466. # GH3155
  467. # don't blow the stack
  468. self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None)
  469. def test_corr_sanity(self):
  470. #GH 3155
  471. df = DataFrame(
  472. np.array(
  473. [[ 0.87024726, 0.18505595],
  474. [ 0.64355431, 0.3091617 ],
  475. [ 0.92372966, 0.50552513],
  476. [ 0.00203756, 0.04520709],
  477. [ 0.84780328, 0.33394331],
  478. [ 0.78369152, 0.63919667]])
  479. )
  480. res = mom.rolling_corr(df[0],df[1],5,center=True)
  481. self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
  482. # and some fuzzing
  483. for i in range(10):
  484. df = DataFrame(np.random.rand(30,2))
  485. res = mom.rolling_corr(df[0],df[1],5,center=True)
  486. try:
  487. self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
  488. except:
  489. print(res)
  490. def test_flex_binary_frame(self):
  491. def _check(method):
  492. series = self.frame[1]
  493. res = method(series, self.frame, 10)
  494. res2 = method(self.frame, series, 10)
  495. exp = self.frame.apply(lambda x: method(series, x, 10))
  496. tm.assert_frame_equal(res, exp)
  497. tm.assert_frame_equal(res2, exp)
  498. frame2 = self.frame.copy()
  499. frame2.values[:] = np.random.randn(*frame2.shape)
  500. res3 = method(self.frame, frame2, 10)
  501. exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
  502. for k in self.frame))
  503. tm.assert_frame_equal(res3, exp)
  504. methods = [mom.rolling_corr, mom.rolling_cov]
  505. for meth in methods:
  506. _check(meth)
  507. def test_ewmcov(self):
  508. self._check_binary_ew(mom.ewmcov)
  509. def test_ewmcov_pairwise(self):
  510. self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5)
  511. def test_ewmcorr(self):
  512. self._check_binary_ew(mom.ewmcorr)
  513. def test_ewmcorr_pairwise(self):
  514. self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5)
  515. def _check_binary_ew(self, func):
  516. A = Series(randn(50), index=np.arange(50))
  517. B = A[2:] + randn(48)
  518. A[:10] = np.NaN
  519. B[-10:] = np.NaN
  520. result = func(A, B, 20, min_periods=5)
  521. self.assertTrue(np.isnan(result.values[:15]).all())
  522. self.assertFalse(np.isnan(result.values[15:]).any())
  523. self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
  524. def test_expanding_apply(self):
  525. ser = Series([])
  526. assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))
  527. def expanding_mean(x, min_periods=1, freq=None):
  528. return mom.expanding_apply(x,
  529. lambda x: x.mean(),
  530. min_periods=min_periods,
  531. freq=freq)
  532. self._check_expanding(expanding_mean, np.mean)
  533. def test_expanding_apply_args_kwargs(self):
  534. def mean_w_arg(x, const):
  535. return np.mean(x) + const
  536. df = DataFrame(np.random.rand(20, 3))
  537. expected = mom.expanding_apply(df, np.mean) + 20.
  538. assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)),
  539. expected)
  540. assert_frame_equal(mom.expanding_apply(df, mean_w_arg,
  541. kwargs={'const' : 20}),
  542. expected)
  543. def test_expanding_corr(self):
  544. A = self.series.dropna()
  545. B = (A + randn(len(A)))[:-5]
  546. result = mom.expanding_corr(A, B)
  547. rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)
  548. assert_almost_equal(rolling_result, result)
  549. def test_expanding_count(self):
  550. result = mom.expanding_count(self.series)
  551. assert_almost_equal(result, mom.rolling_count(self.series,
  552. len(self.series)))
  553. def test_expanding_quantile(self):
  554. result = mom.expanding_quantile(self.series, 0.5)
  555. rolling_result = mom.rolling_quantile(self.series,
  556. len(self.series),
  557. 0.5, min_periods=1)
  558. assert_almost_equal(result, rolling_result)
  559. def test_expanding_cov(self):
  560. A = self.series
  561. B = (A + randn(len(A)))[:-5]
  562. result = mom.expanding_cov(A, B)
  563. rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)
  564. assert_almost_equal(rolling_result, result)
  565. def test_expanding_max(self):
  566. self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)
  567. def test_expanding_cov_pairwise(self):
  568. result = mom.expanding_cov(self.frame)
  569. rolling_result = mom.rolling_cov(self.frame, len(self.frame),
  570. min_periods=1)
  571. for i in result.items:
  572. assert_almost_equal(result[i], rolling_result[i])
  573. def test_expanding_corr_pairwise(self):
  574. result = mom.expanding_corr(self.frame)
  575. rolling_result = mom.rolling_corr(self.frame, len(self.frame),
  576. min_periods=1)
  577. for i in result.items:
  578. assert_almost_equal(result[i], rolling_result[i])
  579. def test_rolling_skew_edge_cases(self):
  580. all_nan = Series([np.NaN] * 5)
  581. # yields all NaN (0 variance)
  582. d = Series([1] * 5)
  583. x = mom.rolling_skew(d, window=5)
  584. assert_series_equal(all_nan, x)
  585. # yields all NaN (window too small)
  586. d = Series(np.random.randn(5))
  587. x = mom.rolling_skew(d, window=2)
  588. assert_series_equal(all_nan, x)
  589. # yields [NaN, NaN, NaN, 0.177994, 1.548824]
  590. d = Series([-1.50837035, -0.1297039 , 0.19501095,
  591. 1.73508164, 0.41941401])
  592. expected = Series([np.NaN, np.NaN, np.NaN,
  593. 0.177994, 1.548824])
  594. x = mom.rolling_skew(d, window=4)
  595. assert_series_equal(expected, x)
  596. def test_rolling_kurt_edge_cases(self):
  597. all_nan = Series([np.NaN] * 5)
  598. # yields all NaN (0 variance)
  599. d = Series([1] * 5)
  600. x = mom.rolling_kurt(d, window=5)
  601. assert_series_equal(all_nan, x)
  602. # yields all NaN (window too small)
  603. d = Series(np.random.randn(5))
  604. x = mom.rolling_kurt(d, window=3)
  605. assert_series_equal(all_nan, x)
  606. # yields [NaN, NaN, NaN, 1.224307, 2.671499]
  607. d = Series([-1.50837035, -0.1297039 , 0.19501095,
  608. 1.73508164, 0.41941401])
  609. expected = Series([np.NaN, np.NaN, np.NaN,
  610. 1.224307, 2.671499])
  611. x = mom.rolling_kurt(d, window=4)
  612. assert_series_equal(expected, x)
  613. def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
  614. has_time_rule=True, preserve_nan=True):
  615. result = func(self.arr)
  616. assert_almost_equal(result[10],
  617. static_comp(self.arr[:11]))
  618. if preserve_nan:
  619. assert(np.isnan(result[self._nan_locs]).all())
  620. arr = randn(50)
  621. if has_min_periods:
  622. result = func(arr, min_periods=30)
  623. assert(np.isnan(result[:29]).all())
  624. assert_almost_equal(result[-1], static_comp(arr[:50]))
  625. # min_periods is working correctly
  626. result = func(arr, min_periods=15)
  627. self.assertTrue(np.isnan(result[13]))
  628. self.assertFalse(np.isnan(result[14]))
  629. arr2 = randn(20)
  630. result = func(arr2, min_periods=5)
  631. self.assertTrue(isnull(result[3]))
  632. self.assertTrue(notnull(result[4]))
  633. # min_periods=0
  634. result0 = func(arr, min_periods=0)
  635. result1 = func(arr, min_periods=1)
  636. assert_almost_equal(result0, result1)
  637. else:
  638. result = func(arr)
  639. assert_almost_equal(result[-1], static_comp(arr[:50]))
  640. def _check_expanding_structures(self, func):
  641. series_result = func(self.series)
  642. tm.assert_isinstance(series_result, Series)
  643. frame_result = func(self.frame)
  644. self.assertEqual(type(frame_result), DataFrame)
  645. def _check_expanding(self, func, static_comp, has_min_periods=True,
  646. has_time_rule=True,
  647. preserve_nan=True):
  648. self._check_expanding_ndarray(func, static_comp,
  649. has_min_periods=has_min_periods,
  650. has_time_rule=has_time_rule,
  651. preserve_nan=preserve_nan)
  652. self._check_expanding_structures(func)
  653. def test_rolling_max_gh6297(self):
  654. """Replicate result expected in GH #6297"""
  655. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  656. # So that we can have 2 datapoints on one of the days
  657. indices.append(datetime(1975, 1, 3, 6, 0))
  658. series = Series(range(1, 7), index=indices)
  659. # Use floats instead of ints as values
  660. series = series.map(lambda x: float(x))
  661. # Sort chronologically
  662. series = series.sort_index()
  663. expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
  664. index=[datetime(1975, 1, i, 0)
  665. for i in range(1, 6)])
  666. x = mom.rolling_max(series, window=1, freq='D')
  667. assert_series_equal(expected, x)
  668. def test_rolling_max_how_resample(self):
  669. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  670. # So that we can have 3 datapoints on last day (4, 10, and 20)
  671. indices.append(datetime(1975, 1, 5, 1))
  672. indices.append(datetime(1975, 1, 5, 2))
  673. series = Series(list(range(0, 5)) + [10, 20], index=indices)
  674. # Use floats instead of ints as values
  675. series = series.map(lambda x: float(x))
  676. # Sort chronologically
  677. series = series.sort_index()
  678. # Default how should be max
  679. expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
  680. index=[datetime(1975, 1, i, 0)
  681. for i in range(1, 6)])
  682. x = mom.rolling_max(series, window=1, freq='D')
  683. assert_series_equal(expected, x)
  684. # Now specify median (10.0)
  685. expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
  686. index=[datetime(1975, 1, i, 0)
  687. for i in range(1, 6)])
  688. x = mom.rolling_max(series, window=1, freq='D', how='median')
  689. assert_series_equal(expected, x)
  690. # Now specify mean (4+10+20)/3
  691. v = (4.0+10.0+20.0)/3.0
  692. expected = Series([0.0, 1.0, 2.0, 3.0, v],
  693. index=[datetime(1975, 1, i, 0)
  694. for i in range(1, 6)])
  695. x = mom.rolling_max(series, window=1, freq='D', how='mean')
  696. assert_series_equal(expected, x)
  697. def test_rolling_min_how_resample(self):
  698. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  699. # So that we can have 3 datapoints on last day (4, 10, and 20)
  700. indices.append(datetime(1975, 1, 5, 1))
  701. indices.append(datetime(1975, 1, 5, 2))
  702. series = Series(list(range(0, 5)) + [10, 20], index=indices)
  703. # Use floats instead of ints as values
  704. series = series.map(lambda x: float(x))
  705. # Sort chronologically
  706. series = series.sort_index()
  707. # Default how should be min
  708. expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
  709. index=[datetime(1975, 1, i, 0)
  710. for i in range(1, 6)])
  711. x = mom.rolling_min(series, window=1, freq='D')
  712. assert_series_equal(expected, x)
  713. def test_rolling_median_how_resample(self):
  714. indices = [datetime(1975, 1, i) for i in range(1, 6)]
  715. # So that we can have 3 datapoints on last day (4, 10, and 20)
  716. indices.append(datetime(1975, 1, 5, 1))
  717. indices.append(datetime(1975, 1, 5, 2))
  718. series = Series(list(range(0, 5)) + [10, 20], index=indices)
  719. # Use floats instead of ints as values
  720. series = series.map(lambda x: float(x))
  721. # Sort chronologically
  722. series = series.sort_index()
  723. # Default how should be median
  724. expected = Series([0.0, 1.0, 2.0, 3.0, 10],
  725. index=[datetime(1975, 1, i, 0)
  726. for i in range(1, 6)])
  727. x = mom.rolling_median(series, window=1, freq='D')
  728. assert_series_equal(expected, x)
  729. if __name__ == '__main__':
  730. import nose
  731. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  732. exit=False)