/pandas/stats/tests/test_moments.py
Python | 1064 lines | 769 code | 233 blank | 62 comment | 68 complexity | 739bfb3ca0b2f9dac40eb7e860c3b7fd MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
- import nose
- import sys
- import functools
- from datetime import datetime
- from numpy.random import randn
- import numpy as np
- from pandas import Series, DataFrame, bdate_range, isnull, notnull
- from pandas.util.testing import (
- assert_almost_equal, assert_series_equal, assert_frame_equal
- )
- import pandas.core.datetools as datetools
- import pandas.stats.moments as mom
- import pandas.util.testing as tm
- from pandas.compat import range, zip, PY3, StringIO
- N, K = 100, 10
- class TestMoments(tm.TestCase):
- _multiprocess_can_split_ = True
- _nan_locs = np.arange(20, 40)
- _inf_locs = np.array([])
- def setUp(self):
- arr = randn(N)
- arr[self._nan_locs] = np.NaN
- self.arr = arr
- self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
- self.series = Series(arr.copy(), index=self.rng)
- self.frame = DataFrame(randn(N, K), index=self.rng,
- columns=np.arange(K))
- def test_centered_axis_validation(self):
- # ok
- mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0)
- # bad axis
- self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1)
- # ok ok
- mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0)
- mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1)
- # bad axis
- self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2)
- def test_rolling_sum(self):
- self._check_moment_func(mom.rolling_sum, np.sum)
- def test_rolling_count(self):
- counter = lambda x: np.isfinite(x).astype(float).sum()
- self._check_moment_func(mom.rolling_count, counter,
- has_min_periods=False,
- preserve_nan=False,
- fill_value=0)
- def test_rolling_mean(self):
- self._check_moment_func(mom.rolling_mean, np.mean)
- def test_cmov_mean(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_mean
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- vals = np.random.randn(10)
- xp = cmov_mean(vals, 5)
- rs = mom.rolling_mean(vals, 5, center=True)
- assert_almost_equal(xp.compressed(), rs[2:-2])
- assert_almost_equal(xp.mask, np.isnan(rs))
- xp = Series(rs)
- rs = mom.rolling_mean(Series(vals), 5, center=True)
- assert_series_equal(xp, rs)
- def test_cmov_window(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_window
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- vals = np.random.randn(10)
- xp = cmov_window(vals, 5, 'boxcar')
- rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
- assert_almost_equal(xp.compressed(), rs[2:-2])
- assert_almost_equal(xp.mask, np.isnan(rs))
- xp = Series(rs)
- rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
- assert_series_equal(xp, rs)
- def test_cmov_window_corner(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_window
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- # all nan
- vals = np.empty(10, dtype=float)
- vals.fill(np.nan)
- rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
- self.assertTrue(np.isnan(rs).all())
- # empty
- vals = np.array([])
- rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
- self.assertEqual(len(rs), 0)
- # shorter than window
- vals = np.random.randn(5)
- rs = mom.rolling_window(vals, 10, 'boxcar')
- self.assertTrue(np.isnan(rs).all())
- self.assertEqual(len(rs), 5)
- def test_cmov_window_frame(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_window
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- # DataFrame
- vals = np.random.randn(10, 2)
- xp = cmov_window(vals, 5, 'boxcar')
- rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
- assert_frame_equal(DataFrame(xp), rs)
- def test_cmov_window_na_min_periods(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_window
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- # min_periods
- vals = Series(np.random.randn(10))
- vals[4] = np.nan
- vals[8] = np.nan
- xp = mom.rolling_mean(vals, 5, min_periods=4, center=True)
- rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True)
- assert_series_equal(xp, rs)
- def test_cmov_window_regular(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_window
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
- 'blackmanharris', 'nuttall', 'barthann']
- for wt in win_types:
- vals = np.random.randn(10)
- xp = cmov_window(vals, 5, wt)
- rs = mom.rolling_window(Series(vals), 5, wt, center=True)
- assert_series_equal(Series(xp), rs)
- def test_cmov_window_special(self):
- tm._skip_if_no_scipy()
- try:
- from scikits.timeseries.lib import cmov_window
- except ImportError:
- raise nose.SkipTest("no scikits.timeseries")
- win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
- kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
- {'width': 0.5}]
- for wt, k in zip(win_types, kwds):
- vals = np.random.randn(10)
- xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
- rs = mom.rolling_window(Series(vals), 5, wt, center=True,
- **k)
- assert_series_equal(Series(xp), rs)
- def test_rolling_median(self):
- self._check_moment_func(mom.rolling_median, np.median)
- def test_rolling_min(self):
- self._check_moment_func(mom.rolling_min, np.min)
- a = np.array([1, 2, 3, 4, 5])
- b = mom.rolling_min(a, window=100, min_periods=1)
- assert_almost_equal(b, np.ones(len(a)))
- self.assertRaises(ValueError, mom.rolling_min, np.array([1,
- 2, 3]), window=3, min_periods=5)
- def test_rolling_max(self):
- self._check_moment_func(mom.rolling_max, np.max)
- a = np.array([1, 2, 3, 4, 5])
- b = mom.rolling_max(a, window=100, min_periods=1)
- assert_almost_equal(a, b)
- self.assertRaises(ValueError, mom.rolling_max, np.array([1,
- 2, 3]), window=3, min_periods=5)
- def test_rolling_quantile(self):
- qs = [.1, .5, .9]
- def scoreatpercentile(a, per):
- values = np.sort(a, axis=0)
- idx = per / 1. * (values.shape[0] - 1)
- return values[int(idx)]
- for q in qs:
- def f(x, window, min_periods=None, freq=None, center=False):
- return mom.rolling_quantile(x, window, q,
- min_periods=min_periods,
- freq=freq,
- center=center)
- def alt(x):
- return scoreatpercentile(x, q)
- self._check_moment_func(f, alt)
- def test_rolling_apply(self):
- ser = Series([])
- assert_series_equal(
- ser, mom.rolling_apply(ser, 10, lambda x: x.mean()))
- def roll_mean(x, window, min_periods=None, freq=None, center=False):
- return mom.rolling_apply(x, window,
- lambda x: x[np.isfinite(x)].mean(),
- min_periods=min_periods,
- freq=freq,
- center=center)
- self._check_moment_func(roll_mean, np.mean)
- def test_rolling_apply_out_of_bounds(self):
- # #1850
- arr = np.arange(4)
- # it works!
- result = mom.rolling_apply(arr, 10, np.sum)
- self.assertTrue(isnull(result).all())
- result = mom.rolling_apply(arr, 10, np.sum, min_periods=1)
- assert_almost_equal(result, result)
- def test_rolling_std(self):
- self._check_moment_func(mom.rolling_std,
- lambda x: np.std(x, ddof=1))
- self._check_moment_func(functools.partial(mom.rolling_std, ddof=0),
- lambda x: np.std(x, ddof=0))
- def test_rolling_std_1obs(self):
- result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]),
- 1, min_periods=1)
- expected = np.zeros(5)
- assert_almost_equal(result, expected)
- result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]),
- 3, min_periods=2)
- self.assertTrue(np.isnan(result[2]))
- def test_rolling_std_neg_sqrt(self):
- # unit test from Bottleneck
- # Test move_nanstd for neg sqrt.
- a = np.array([0.0011448196318903589,
- 0.00028718669878572767,
- 0.00028718669878572767,
- 0.00028718669878572767,
- 0.00028718669878572767])
- b = mom.rolling_std(a, window=3)
- self.assertTrue(np.isfinite(b[2:]).all())
- b = mom.ewmstd(a, span=3)
- self.assertTrue(np.isfinite(b[2:]).all())
- def test_rolling_var(self):
- self._check_moment_func(mom.rolling_var,
- lambda x: np.var(x, ddof=1),
- test_stable=True)
- self._check_moment_func(functools.partial(mom.rolling_var, ddof=0),
- lambda x: np.var(x, ddof=0))
- def test_rolling_skew(self):
- try:
- from scipy.stats import skew
- except ImportError:
- raise nose.SkipTest('no scipy')
- self._check_moment_func(mom.rolling_skew,
- lambda x: skew(x, bias=False))
- def test_rolling_kurt(self):
- try:
- from scipy.stats import kurtosis
- except ImportError:
- raise nose.SkipTest('no scipy')
- self._check_moment_func(mom.rolling_kurt,
- lambda x: kurtosis(x, bias=False))
- def test_fperr_robustness(self):
- # TODO: remove this once python 2.5 out of picture
- if PY3:
- raise nose.SkipTest("doesn't work on python 3")
- # #2114
- data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>'
- arr = np.frombuffer(data, dtype='<f8')
- if sys.byteorder != "little":
- arr = arr.byteswap().newbyteorder()
- result = mom.rolling_sum(arr, 2)
- self.assertTrue((result[1:] >= 0).all())
- result = mom.rolling_mean(arr, 2)
- self.assertTrue((result[1:] >= 0).all())
- result = mom.rolling_var(arr, 2)
- self.assertTrue((result[1:] >= 0).all())
- # #2527, ugh
- arr = np.array([0.00012456, 0.0003, 0])
- result = mom.rolling_mean(arr, 1)
- self.assertTrue(result[-1] >= 0)
- result = mom.rolling_mean(-arr, 1)
- self.assertTrue(result[-1] <= 0)
- def _check_moment_func(self, func, static_comp, window=50,
- has_min_periods=True,
- has_center=True,
- has_time_rule=True,
- preserve_nan=True,
- fill_value=None,
- test_stable=False):
- self._check_ndarray(func, static_comp, window=window,
- has_min_periods=has_min_periods,
- preserve_nan=preserve_nan,
- has_center=has_center,
- fill_value=fill_value,
- test_stable=test_stable)
- self._check_structures(func, static_comp,
- has_min_periods=has_min_periods,
- has_time_rule=has_time_rule,
- fill_value=fill_value,
- has_center=has_center)
- def _check_ndarray(self, func, static_comp, window=50,
- has_min_periods=True,
- preserve_nan=True,
- has_center=True,
- fill_value=None,
- test_stable=False,
- test_window=True):
- result = func(self.arr, window)
- assert_almost_equal(result[-1],
- static_comp(self.arr[-50:]))
- if preserve_nan:
- assert(np.isnan(result[self._nan_locs]).all())
- # excluding NaNs correctly
- arr = randn(50)
- arr[:10] = np.NaN
- arr[-10:] = np.NaN
- if has_min_periods:
- result = func(arr, 50, min_periods=30)
- assert_almost_equal(result[-1], static_comp(arr[10:-10]))
- # min_periods is working correctly
- result = func(arr, 20, min_periods=15)
- self.assertTrue(np.isnan(result[23]))
- self.assertFalse(np.isnan(result[24]))
- self.assertFalse(np.isnan(result[-6]))
- self.assertTrue(np.isnan(result[-5]))
- arr2 = randn(20)
- result = func(arr2, 10, min_periods=5)
- self.assertTrue(isnull(result[3]))
- self.assertTrue(notnull(result[4]))
- # min_periods=0
- result0 = func(arr, 20, min_periods=0)
- result1 = func(arr, 20, min_periods=1)
- assert_almost_equal(result0, result1)
- else:
- result = func(arr, 50)
- assert_almost_equal(result[-1], static_comp(arr[10:-10]))
- if has_center:
- if has_min_periods:
- result = func(arr, 20, min_periods=15, center=True)
- expected = func(arr, 20, min_periods=15)
- else:
- result = func(arr, 20, center=True)
- expected = func(arr, 20)
- assert_almost_equal(result[1], expected[10])
- if fill_value is None:
- self.assertTrue(np.isnan(result[-9:]).all())
- else:
- self.assertTrue((result[-9:] == 0).all())
- if has_min_periods:
- self.assertTrue(np.isnan(expected[23]))
- self.assertTrue(np.isnan(result[14]))
- self.assertTrue(np.isnan(expected[-5]))
- self.assertTrue(np.isnan(result[-14]))
- if test_stable:
- result = func(self.arr + 1e9, window)
- assert_almost_equal(result[-1],
- static_comp(self.arr[-50:] + 1e9))
- # Test window larger than array, #7297
- if test_window:
- if has_min_periods:
- for minp in (0, len(self.arr)-1, len(self.arr)):
- result = func(self.arr, len(self.arr)+1, min_periods=minp)
- expected = func(self.arr, len(self.arr), min_periods=minp)
- nan_mask = np.isnan(result)
- self.assertTrue(np.array_equal(nan_mask,
- np.isnan(expected)))
- nan_mask = ~nan_mask
- assert_almost_equal(result[nan_mask], expected[nan_mask])
- else:
- result = func(self.arr, len(self.arr)+1)
- expected = func(self.arr, len(self.arr))
- nan_mask = np.isnan(result)
- self.assertTrue(np.array_equal(nan_mask, np.isnan(expected)))
- nan_mask = ~nan_mask
- assert_almost_equal(result[nan_mask], expected[nan_mask])
- def _check_structures(self, func, static_comp,
- has_min_periods=True, has_time_rule=True,
- has_center=True,
- fill_value=None):
- series_result = func(self.series, 50)
- tm.assert_isinstance(series_result, Series)
- frame_result = func(self.frame, 50)
- self.assertEqual(type(frame_result), DataFrame)
- # check time_rule works
- if has_time_rule:
- win = 25
- minp = 10
- if has_min_periods:
- series_result = func(self.series[::2], win, min_periods=minp,
- freq='B')
- frame_result = func(self.frame[::2], win, min_periods=minp,
- freq='B')
- else:
- series_result = func(self.series[::2], win, freq='B')
- frame_result = func(self.frame[::2], win, freq='B')
- last_date = series_result.index[-1]
- prev_date = last_date - 24 * datetools.bday
- trunc_series = self.series[::2].truncate(prev_date, last_date)
- trunc_frame = self.frame[::2].truncate(prev_date, last_date)
- assert_almost_equal(series_result[-1], static_comp(trunc_series))
- assert_almost_equal(frame_result.xs(last_date),
- trunc_frame.apply(static_comp))
- if has_center:
- if has_min_periods:
- minp = 10
- series_xp = func(self.series, 25, min_periods=minp).shift(-12)
- frame_xp = func(self.frame, 25, min_periods=minp).shift(-12)
- series_rs = func(self.series, 25, min_periods=minp,
- center=True)
- frame_rs = func(self.frame, 25, min_periods=minp,
- center=True)
- else:
- series_xp = func(self.series, 25).shift(-12)
- frame_xp = func(self.frame, 25).shift(-12)
- series_rs = func(self.series, 25, center=True)
- frame_rs = func(self.frame, 25, center=True)
- if fill_value is not None:
- series_xp = series_xp.fillna(fill_value)
- frame_xp = frame_xp.fillna(fill_value)
- assert_series_equal(series_xp, series_rs)
- assert_frame_equal(frame_xp, frame_rs)
- def test_ewma(self):
- self._check_ew(mom.ewma)
- arr = np.zeros(1000)
- arr[5] = 1
- result = mom.ewma(arr, span=100, adjust=False).sum()
- self.assertTrue(np.abs(result - 1) < 1e-2)
- def test_ewma_nan_handling(self):
- s = Series([1.] + [np.nan] * 5 + [1.])
- result = mom.ewma(s, com=5)
- assert_almost_equal(result, [1] * len(s))
- def test_ewmvar(self):
- self._check_ew(mom.ewmvar)
- def test_ewmvol(self):
- self._check_ew(mom.ewmvol)
- def test_ewma_span_com_args(self):
- A = mom.ewma(self.arr, com=9.5)
- B = mom.ewma(self.arr, span=20)
- assert_almost_equal(A, B)
- self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
- self.assertRaises(Exception, mom.ewma, self.arr)
- def test_ewma_halflife_arg(self):
- A = mom.ewma(self.arr, com=13.932726172912965)
- B = mom.ewma(self.arr, halflife=10.0)
- assert_almost_equal(A, B)
- self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50)
- self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50)
- self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50)
- self.assertRaises(Exception, mom.ewma, self.arr)
- def test_ew_empty_arrays(self):
- arr = np.array([], dtype=np.float64)
- funcs = [mom.ewma, mom.ewmvol, mom.ewmvar]
- for f in funcs:
- result = f(arr, 3)
- assert_almost_equal(result, arr)
- def _check_ew(self, func):
- self._check_ew_ndarray(func)
- self._check_ew_structures(func)
- def _check_ew_ndarray(self, func, preserve_nan=False):
- result = func(self.arr, com=10)
- if preserve_nan:
- assert(np.isnan(result[self._nan_locs]).all())
- # excluding NaNs correctly
- arr = randn(50)
- arr[:10] = np.NaN
- arr[-10:] = np.NaN
- # ??? check something
- # pass in ints
- result2 = func(np.arange(50), span=10)
- self.assertEqual(result2.dtype, np.float_)
- def _check_ew_structures(self, func):
- series_result = func(self.series, com=10)
- tm.assert_isinstance(series_result, Series)
- frame_result = func(self.frame, com=10)
- self.assertEqual(type(frame_result), DataFrame)
- # binary moments
- def test_rolling_cov(self):
- A = self.series
- B = A + randn(len(A))
- result = mom.rolling_cov(A, B, 50, min_periods=25)
- assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
- def test_rolling_cov_pairwise(self):
- self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5)
- def test_rolling_corr(self):
- A = self.series
- B = A + randn(len(A))
- result = mom.rolling_corr(A, B, 50, min_periods=25)
- assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
- # test for correct bias correction
- a = tm.makeTimeSeries()
- b = tm.makeTimeSeries()
- a[:5] = np.nan
- b[:10] = np.nan
- result = mom.rolling_corr(a, b, len(a), min_periods=1)
- assert_almost_equal(result[-1], a.corr(b))
- def test_rolling_corr_pairwise(self):
- self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5)
- def _check_pairwise_moment(self, func, *args, **kwargs):
- panel = func(self.frame, *args, **kwargs)
- actual = panel.ix[:, 1, 5]
- expected = func(self.frame[1], self.frame[5], *args, **kwargs)
- tm.assert_series_equal(actual, expected)
- def test_flex_binary_moment(self):
- # GH3155
- # don't blow the stack
- self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None)
- def test_corr_sanity(self):
- #GH 3155
- df = DataFrame(
- np.array(
- [[ 0.87024726, 0.18505595],
- [ 0.64355431, 0.3091617 ],
- [ 0.92372966, 0.50552513],
- [ 0.00203756, 0.04520709],
- [ 0.84780328, 0.33394331],
- [ 0.78369152, 0.63919667]])
- )
- res = mom.rolling_corr(df[0],df[1],5,center=True)
- self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
- # and some fuzzing
- for i in range(10):
- df = DataFrame(np.random.rand(30,2))
- res = mom.rolling_corr(df[0],df[1],5,center=True)
- try:
- self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
- except:
- print(res)
- def test_flex_binary_frame(self):
- def _check(method):
- series = self.frame[1]
- res = method(series, self.frame, 10)
- res2 = method(self.frame, series, 10)
- exp = self.frame.apply(lambda x: method(series, x, 10))
- tm.assert_frame_equal(res, exp)
- tm.assert_frame_equal(res2, exp)
- frame2 = self.frame.copy()
- frame2.values[:] = np.random.randn(*frame2.shape)
- res3 = method(self.frame, frame2, 10)
- exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
- for k in self.frame))
- tm.assert_frame_equal(res3, exp)
- methods = [mom.rolling_corr, mom.rolling_cov]
- for meth in methods:
- _check(meth)
- def test_ewmcov(self):
- self._check_binary_ew(mom.ewmcov)
- def test_ewmcov_pairwise(self):
- self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5)
- def test_ewmcorr(self):
- self._check_binary_ew(mom.ewmcorr)
- def test_ewmcorr_pairwise(self):
- self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5)
- def _check_binary_ew(self, func):
- A = Series(randn(50), index=np.arange(50))
- B = A[2:] + randn(48)
- A[:10] = np.NaN
- B[-10:] = np.NaN
- result = func(A, B, 20, min_periods=5)
- self.assertTrue(np.isnan(result.values[:15]).all())
- self.assertFalse(np.isnan(result.values[15:]).any())
- self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
- def test_expanding_apply(self):
- ser = Series([])
- assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))
- def expanding_mean(x, min_periods=1, freq=None):
- return mom.expanding_apply(x,
- lambda x: x.mean(),
- min_periods=min_periods,
- freq=freq)
- self._check_expanding(expanding_mean, np.mean)
- def test_expanding_apply_args_kwargs(self):
- def mean_w_arg(x, const):
- return np.mean(x) + const
- df = DataFrame(np.random.rand(20, 3))
- expected = mom.expanding_apply(df, np.mean) + 20.
- assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)),
- expected)
- assert_frame_equal(mom.expanding_apply(df, mean_w_arg,
- kwargs={'const' : 20}),
- expected)
- def test_expanding_corr(self):
- A = self.series.dropna()
- B = (A + randn(len(A)))[:-5]
- result = mom.expanding_corr(A, B)
- rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)
- assert_almost_equal(rolling_result, result)
- def test_expanding_count(self):
- result = mom.expanding_count(self.series)
- assert_almost_equal(result, mom.rolling_count(self.series,
- len(self.series)))
- def test_expanding_quantile(self):
- result = mom.expanding_quantile(self.series, 0.5)
- rolling_result = mom.rolling_quantile(self.series,
- len(self.series),
- 0.5, min_periods=1)
- assert_almost_equal(result, rolling_result)
- def test_expanding_cov(self):
- A = self.series
- B = (A + randn(len(A)))[:-5]
- result = mom.expanding_cov(A, B)
- rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)
- assert_almost_equal(rolling_result, result)
- def test_expanding_max(self):
- self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)
- def test_expanding_cov_pairwise(self):
- result = mom.expanding_cov(self.frame)
- rolling_result = mom.rolling_cov(self.frame, len(self.frame),
- min_periods=1)
- for i in result.items:
- assert_almost_equal(result[i], rolling_result[i])
- def test_expanding_corr_pairwise(self):
- result = mom.expanding_corr(self.frame)
- rolling_result = mom.rolling_corr(self.frame, len(self.frame),
- min_periods=1)
- for i in result.items:
- assert_almost_equal(result[i], rolling_result[i])
- def test_expanding_cov_diff_index(self):
- # GH 7512
- s1 = Series([1, 2, 3], index=[0, 1, 2])
- s2 = Series([1, 3], index=[0, 2])
- result = mom.expanding_cov(s1, s2)
- expected = Series([None, None, 2.0])
- assert_series_equal(result, expected)
- s2a = Series([1, None, 3], index=[0, 1, 2])
- result = mom.expanding_cov(s1, s2a)
- assert_series_equal(result, expected)
- s1 = Series([7, 8, 10], index=[0, 1, 3])
- s2 = Series([7, 9, 10], index=[0, 2, 3])
- result = mom.expanding_cov(s1, s2)
- expected = Series([None, None, None, 4.5])
- assert_series_equal(result, expected)
- def test_expanding_corr_diff_index(self):
- # GH 7512
- s1 = Series([1, 2, 3], index=[0, 1, 2])
- s2 = Series([1, 3], index=[0, 2])
- result = mom.expanding_corr(s1, s2)
- expected = Series([None, None, 1.0])
- assert_series_equal(result, expected)
- s2a = Series([1, None, 3], index=[0, 1, 2])
- result = mom.expanding_corr(s1, s2a)
- assert_series_equal(result, expected)
- s1 = Series([7, 8, 10], index=[0, 1, 3])
- s2 = Series([7, 9, 10], index=[0, 2, 3])
- result = mom.expanding_corr(s1, s2)
- expected = Series([None, None, None, 1.])
- assert_series_equal(result, expected)
- def test_rolling_cov_diff_length(self):
- # GH 7512
- s1 = Series([1, 2, 3], index=[0, 1, 2])
- s2 = Series([1, 3], index=[0, 2])
- result = mom.rolling_cov(s1, s2, window=3, min_periods=2)
- expected = Series([None, None, 2.0])
- assert_series_equal(result, expected)
- s2a = Series([1, None, 3], index=[0, 1, 2])
- result = mom.rolling_cov(s1, s2a, window=3, min_periods=2)
- assert_series_equal(result, expected)
- def test_rolling_corr_diff_length(self):
- # GH 7512
- s1 = Series([1, 2, 3], index=[0, 1, 2])
- s2 = Series([1, 3], index=[0, 2])
- result = mom.rolling_corr(s1, s2, window=3, min_periods=2)
- expected = Series([None, None, 1.0])
- assert_series_equal(result, expected)
- s2a = Series([1, None, 3], index=[0, 1, 2])
- result = mom.rolling_corr(s1, s2a, window=3, min_periods=2)
- assert_series_equal(result, expected)
- def test_expanding_cov_pairwise_diff_length(self):
- # GH 7512
- df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])
- df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B'])
- df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y'])
- df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y'])
- result1 = mom.expanding_cov(df1, df2, pairwise=True)[2]
- result2 = mom.expanding_cov(df1, df2a, pairwise=True)[2]
- result3 = mom.expanding_cov(df1a, df2, pairwise=True)[2]
- result4 = mom.expanding_cov(df1a, df2a, pairwise=True)[2]
- expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y'])
- assert_frame_equal(result1, expected)
- assert_frame_equal(result2, expected)
- assert_frame_equal(result3, expected)
- assert_frame_equal(result4, expected)
-
- def test_expanding_corr_pairwise_diff_length(self):
- # GH 7512
- df1 = DataFrame([[1,2], [3, 2], [3,4]], columns=['A','B'])
- df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B'])
- df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y'])
- df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y'])
- result1 = mom.expanding_corr(df1, df2, pairwise=True)[2]
- result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2]
- result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2]
- result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2]
- expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y'])
- assert_frame_equal(result1, expected)
- assert_frame_equal(result2, expected)
- assert_frame_equal(result3, expected)
- assert_frame_equal(result4, expected)
-
- def test_rolling_skew_edge_cases(self):
- all_nan = Series([np.NaN] * 5)
- # yields all NaN (0 variance)
- d = Series([1] * 5)
- x = mom.rolling_skew(d, window=5)
- assert_series_equal(all_nan, x)
- # yields all NaN (window too small)
- d = Series(np.random.randn(5))
- x = mom.rolling_skew(d, window=2)
- assert_series_equal(all_nan, x)
- # yields [NaN, NaN, NaN, 0.177994, 1.548824]
- d = Series([-1.50837035, -0.1297039 , 0.19501095,
- 1.73508164, 0.41941401])
- expected = Series([np.NaN, np.NaN, np.NaN,
- 0.177994, 1.548824])
- x = mom.rolling_skew(d, window=4)
- assert_series_equal(expected, x)
- def test_rolling_kurt_edge_cases(self):
- all_nan = Series([np.NaN] * 5)
- # yields all NaN (0 variance)
- d = Series([1] * 5)
- x = mom.rolling_kurt(d, window=5)
- assert_series_equal(all_nan, x)
- # yields all NaN (window too small)
- d = Series(np.random.randn(5))
- x = mom.rolling_kurt(d, window=3)
- assert_series_equal(all_nan, x)
- # yields [NaN, NaN, NaN, 1.224307, 2.671499]
- d = Series([-1.50837035, -0.1297039 , 0.19501095,
- 1.73508164, 0.41941401])
- expected = Series([np.NaN, np.NaN, np.NaN,
- 1.224307, 2.671499])
- x = mom.rolling_kurt(d, window=4)
- assert_series_equal(expected, x)
- def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
- has_time_rule=True, preserve_nan=True):
- result = func(self.arr)
- assert_almost_equal(result[10],
- static_comp(self.arr[:11]))
- if preserve_nan:
- assert(np.isnan(result[self._nan_locs]).all())
- arr = randn(50)
- if has_min_periods:
- result = func(arr, min_periods=30)
- assert(np.isnan(result[:29]).all())
- assert_almost_equal(result[-1], static_comp(arr[:50]))
- # min_periods is working correctly
- result = func(arr, min_periods=15)
- self.assertTrue(np.isnan(result[13]))
- self.assertFalse(np.isnan(result[14]))
- arr2 = randn(20)
- result = func(arr2, min_periods=5)
- self.assertTrue(isnull(result[3]))
- self.assertTrue(notnull(result[4]))
- # min_periods=0
- result0 = func(arr, min_periods=0)
- result1 = func(arr, min_periods=1)
- assert_almost_equal(result0, result1)
- else:
- result = func(arr)
- assert_almost_equal(result[-1], static_comp(arr[:50]))
- def _check_expanding_structures(self, func):
- series_result = func(self.series)
- tm.assert_isinstance(series_result, Series)
- frame_result = func(self.frame)
- self.assertEqual(type(frame_result), DataFrame)
- def _check_expanding(self, func, static_comp, has_min_periods=True,
- has_time_rule=True,
- preserve_nan=True):
- self._check_expanding_ndarray(func, static_comp,
- has_min_periods=has_min_periods,
- has_time_rule=has_time_rule,
- preserve_nan=preserve_nan)
- self._check_expanding_structures(func)
- def test_rolling_max_gh6297(self):
- """Replicate result expected in GH #6297"""
- indices = [datetime(1975, 1, i) for i in range(1, 6)]
- # So that we can have 2 datapoints on one of the days
- indices.append(datetime(1975, 1, 3, 6, 0))
- series = Series(range(1, 7), index=indices)
- # Use floats instead of ints as values
- series = series.map(lambda x: float(x))
- # Sort chronologically
- series = series.sort_index()
- expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
- index=[datetime(1975, 1, i, 0)
- for i in range(1, 6)])
- x = mom.rolling_max(series, window=1, freq='D')
- assert_series_equal(expected, x)
- def test_rolling_max_how_resample(self):
- indices = [datetime(1975, 1, i) for i in range(1, 6)]
- # So that we can have 3 datapoints on last day (4, 10, and 20)
- indices.append(datetime(1975, 1, 5, 1))
- indices.append(datetime(1975, 1, 5, 2))
- series = Series(list(range(0, 5)) + [10, 20], index=indices)
- # Use floats instead of ints as values
- series = series.map(lambda x: float(x))
- # Sort chronologically
- series = series.sort_index()
- # Default how should be max
- expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
- index=[datetime(1975, 1, i, 0)
- for i in range(1, 6)])
- x = mom.rolling_max(series, window=1, freq='D')
- assert_series_equal(expected, x)
- # Now specify median (10.0)
- expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
- index=[datetime(1975, 1, i, 0)
- for i in range(1, 6)])
- x = mom.rolling_max(series, window=1, freq='D', how='median')
- assert_series_equal(expected, x)
- # Now specify mean (4+10+20)/3
- v = (4.0+10.0+20.0)/3.0
- expected = Series([0.0, 1.0, 2.0, 3.0, v],
- index=[datetime(1975, 1, i, 0)
- for i in range(1, 6)])
- x = mom.rolling_max(series, window=1, freq='D', how='mean')
- assert_series_equal(expected, x)
- def test_rolling_min_how_resample(self):
- indices = [datetime(1975, 1, i) for i in range(1, 6)]
- # So that we can have 3 datapoints on last day (4, 10, and 20)
- indices.append(datetime(1975, 1, 5, 1))
- indices.append(datetime(1975, 1, 5, 2))
- series = Series(list(range(0, 5)) + [10, 20], index=indices)
- # Use floats instead of ints as values
- series = series.map(lambda x: float(x))
- # Sort chronologically
- series = series.sort_index()
- # Default how should be min
- expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
- index=[datetime(1975, 1, i, 0)
- for i in range(1, 6)])
- x = mom.rolling_min(series, window=1, freq='D')
- assert_series_equal(expected, x)
- def test_rolling_median_how_resample(self):
- indices = [datetime(1975, 1, i) for i in range(1, 6)]
- # So that we can have 3 datapoints on last day (4, 10, and 20)
- indices.append(datetime(1975, 1, 5, 1))
- indices.append(datetime(1975, 1, 5, 2))
- series = Series(list(range(0, 5)) + [10, 20], index=indices)
- # Use floats instead of ints as values
- series = series.map(lambda x: float(x))
- # Sort chronologically
- series = series.sort_index()
- # Default how should be median
- expected = Series([0.0, 1.0, 2.0, 3.0, 10],
- index=[datetime(1975, 1, i, 0)
- for i in range(1, 6)])
- x = mom.rolling_median(series, window=1, freq='D')
- assert_series_equal(expected, x)
- if __name__ == '__main__':
- import nose
- nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
- exit=False)