PageRenderTime 74ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/sparse/tests/test_sparse.py

http://github.com/wesm/pandas
Python | 1459 lines | 1027 code | 343 blank | 89 comment | 28 complexity | 5eb7215c0302cae722defbaf9b0bc820 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. # pylint: disable-msg=E1101,W0612
  2. from unittest import TestCase
  3. import cPickle as pickle
  4. import operator
  5. import nose
  6. from numpy import nan
  7. import numpy as np
  8. dec = np.testing.dec
  9. from pandas.util.testing import (assert_almost_equal, assert_series_equal,
  10. assert_frame_equal, assert_panel_equal)
  11. from numpy.testing import assert_equal
  12. from pandas import Series, DataFrame, DateRange, Panel
  13. from pandas.core.datetools import BDay
  14. import pandas.core.datetools as datetools
  15. import pandas.util.testing as tm
  16. import pandas.sparse.frame as spf
  17. from pandas._sparse import BlockIndex, IntIndex
  18. from pandas.sparse.api import (SparseSeries, SparseTimeSeries,
  19. SparseDataFrame, SparsePanel,
  20. SparseArray)
  21. import pandas.tests.test_frame as test_frame
  22. import pandas.tests.test_panel as test_panel
  23. import pandas.tests.test_series as test_series
  24. from test_array import assert_sp_array_equal
  25. def _test_data1():
  26. # nan-based
  27. arr = np.arange(20, dtype=float)
  28. index = np.arange(20)
  29. arr[:2] = nan
  30. arr[5:10] = nan
  31. arr[-3:] = nan
  32. return arr, index
  33. def _test_data2():
  34. # nan-based
  35. arr = np.arange(15, dtype=float)
  36. index = np.arange(15)
  37. arr[7:12] = nan
  38. arr[-1:] = nan
  39. return arr, index
  40. def _test_data1_zero():
  41. # zero-based
  42. arr, index = _test_data1()
  43. arr[np.isnan(arr)] = 0
  44. return arr, index
  45. def _test_data2_zero():
  46. # zero-based
  47. arr, index = _test_data2()
  48. arr[np.isnan(arr)] = 0
  49. return arr, index
  50. def assert_sp_series_equal(a, b):
  51. assert(a.index.equals(b.index))
  52. assert_sp_array_equal(a, b)
  53. def assert_sp_frame_equal(left, right, exact_indices=True):
  54. """
  55. exact: Series SparseIndex objects must be exactly the same, otherwise just
  56. compare dense representations
  57. """
  58. for col, series in left.iteritems():
  59. assert(col in right)
  60. # trade-off?
  61. if exact_indices:
  62. assert_sp_series_equal(series, right[col])
  63. else:
  64. assert_series_equal(series.to_dense(), right[col].to_dense())
  65. assert_almost_equal(left.default_fill_value,
  66. right.default_fill_value)
  67. # do I care?
  68. # assert(left.default_kind == right.default_kind)
  69. for col in right:
  70. assert(col in left)
  71. def assert_sp_panel_equal(left, right, exact_indices=True):
  72. for item, frame in left.iterkv():
  73. assert(item in right)
  74. # trade-off?
  75. assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices)
  76. assert_almost_equal(left.default_fill_value,
  77. right.default_fill_value)
  78. assert(left.default_kind == right.default_kind)
  79. for item in right:
  80. assert(item in left)
  81. class TestSparseSeries(TestCase,
  82. test_series.CheckNameIntegration):
  83. def setUp(self):
  84. arr, index = _test_data1()
  85. date_index = DateRange('1/1/2011', periods=len(index))
  86. self.bseries = SparseSeries(arr, index=index, kind='block')
  87. self.bseries.name = 'bseries'
  88. self.ts = self.bseries
  89. self.btseries = SparseSeries(arr, index=date_index, kind='block')
  90. self.iseries = SparseSeries(arr, index=index, kind='integer')
  91. arr, index = _test_data2()
  92. self.bseries2 = SparseSeries(arr, index=index, kind='block')
  93. self.iseries2 = SparseSeries(arr, index=index, kind='integer')
  94. arr, index = _test_data1_zero()
  95. self.zbseries = SparseSeries(arr, index=index, kind='block',
  96. fill_value=0)
  97. self.ziseries = SparseSeries(arr, index=index, kind='integer',
  98. fill_value=0)
  99. arr, index = _test_data2_zero()
  100. self.zbseries2 = SparseSeries(arr, index=index, kind='block',
  101. fill_value=0)
  102. self.ziseries2 = SparseSeries(arr, index=index, kind='integer',
  103. fill_value=0)
  104. def test_construct_DataFrame_with_sp_series(self):
  105. # it works!
  106. df = DataFrame({'col' : self.bseries})
  107. def test_sparse_to_dense(self):
  108. arr, index = _test_data1()
  109. series = self.bseries.to_dense()
  110. assert_equal(series, arr)
  111. series = self.bseries.to_dense(sparse_only=True)
  112. assert_equal(series, arr[np.isfinite(arr)])
  113. series = self.iseries.to_dense()
  114. assert_equal(series, arr)
  115. arr, index = _test_data1_zero()
  116. series = self.zbseries.to_dense()
  117. assert_equal(series, arr)
  118. series = self.ziseries.to_dense()
  119. assert_equal(series, arr)
  120. def test_dense_to_sparse(self):
  121. series = self.bseries.to_dense()
  122. bseries = series.to_sparse(kind='block')
  123. iseries = series.to_sparse(kind='integer')
  124. assert_sp_series_equal(bseries, self.bseries)
  125. assert_sp_series_equal(iseries, self.iseries)
  126. # non-NaN fill value
  127. series = self.zbseries.to_dense()
  128. zbseries = series.to_sparse(kind='block', fill_value=0)
  129. ziseries = series.to_sparse(kind='integer', fill_value=0)
  130. assert_sp_series_equal(zbseries, self.zbseries)
  131. assert_sp_series_equal(ziseries, self.ziseries)
  132. def test_to_dense_preserve_name(self):
  133. assert(self.bseries.name is not None)
  134. result = self.bseries.to_dense()
  135. self.assertEquals(result.name, self.bseries.name)
  136. def test_constructor(self):
  137. # test setup guys
  138. self.assert_(np.isnan(self.bseries.fill_value))
  139. self.assert_(isinstance(self.bseries.sp_index, BlockIndex))
  140. self.assert_(np.isnan(self.iseries.fill_value))
  141. self.assert_(isinstance(self.iseries.sp_index, IntIndex))
  142. self.assertEquals(self.zbseries.fill_value, 0)
  143. assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0))
  144. # pass SparseSeries
  145. s2 = SparseSeries(self.bseries)
  146. s3 = SparseSeries(self.iseries)
  147. s4 = SparseSeries(self.zbseries)
  148. assert_sp_series_equal(s2, self.bseries)
  149. assert_sp_series_equal(s3, self.iseries)
  150. assert_sp_series_equal(s4, self.zbseries)
  151. # Sparse time series works
  152. date_index = DateRange('1/1/2000', periods=len(self.bseries))
  153. s5 = SparseSeries(self.bseries, index=date_index)
  154. self.assert_(isinstance(s5, SparseTimeSeries))
  155. # pass Series
  156. bseries2 = SparseSeries(self.bseries.to_dense())
  157. assert_equal(self.bseries.sp_values, bseries2.sp_values)
  158. # pass dict?
  159. # don't copy the data by default
  160. values = np.ones(len(self.bseries.sp_values))
  161. sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
  162. sp.sp_values[:5] = 97
  163. self.assert_(values[0] == 97)
  164. # but can make it copy!
  165. sp = SparseSeries(values, sparse_index=self.bseries.sp_index,
  166. copy=True)
  167. sp.sp_values[:5] = 100
  168. self.assert_(values[0] == 97)
  169. def test_constructor_ndarray(self):
  170. pass
  171. def test_constructor_nonnan(self):
  172. arr = [0, 0, 0, nan, nan]
  173. sp_series = SparseSeries(arr, fill_value=0)
  174. assert_equal(sp_series.values, arr)
  175. def test_copy_astype(self):
  176. cop = self.bseries.astype(np.float_)
  177. self.assert_(cop is not self.bseries)
  178. self.assert_(cop.sp_index is self.bseries.sp_index)
  179. self.assert_(cop.dtype == np.float64)
  180. cop2 = self.iseries.copy()
  181. assert_sp_series_equal(cop, self.bseries)
  182. assert_sp_series_equal(cop2, self.iseries)
  183. # test that data is copied
  184. cop.sp_values[:5] = 97
  185. self.assert_(cop.sp_values[0] == 97)
  186. self.assert_(self.bseries.sp_values[0] != 97)
  187. # correct fill value
  188. zbcop = self.zbseries.copy()
  189. zicop = self.ziseries.copy()
  190. assert_sp_series_equal(zbcop, self.zbseries)
  191. assert_sp_series_equal(zicop, self.ziseries)
  192. # no deep copy
  193. view = self.bseries.copy(deep=False)
  194. view.sp_values[:5] = 5
  195. self.assert_((self.bseries.sp_values[:5] == 5).all())
  196. def test_astype(self):
  197. self.assertRaises(Exception, self.bseries.astype, np.int64)
  198. def test_kind(self):
  199. self.assertEquals(self.bseries.kind, 'block')
  200. self.assertEquals(self.iseries.kind, 'integer')
  201. def test_pickle(self):
  202. def _test_roundtrip(series):
  203. pickled = pickle.dumps(series, protocol=pickle.HIGHEST_PROTOCOL)
  204. unpickled = pickle.loads(pickled)
  205. assert_sp_series_equal(series, unpickled)
  206. assert_series_equal(series.to_dense(), unpickled.to_dense())
  207. self._check_all(_test_roundtrip)
  208. def _check_all(self, check_func):
  209. check_func(self.bseries)
  210. check_func(self.iseries)
  211. check_func(self.zbseries)
  212. check_func(self.ziseries)
  213. def test_getitem(self):
  214. def _check_getitem(sp, dense):
  215. for idx, val in dense.iteritems():
  216. assert_almost_equal(val, sp[idx])
  217. for i in xrange(len(dense)):
  218. assert_almost_equal(sp[i], dense[i])
  219. # j = np.float64(i)
  220. # assert_almost_equal(sp[j], dense[j])
  221. # API change 1/6/2012
  222. # negative getitem works
  223. # for i in xrange(len(dense)):
  224. # assert_almost_equal(sp[-i], dense[-i])
  225. _check_getitem(self.bseries, self.bseries.to_dense())
  226. _check_getitem(self.btseries, self.btseries.to_dense())
  227. _check_getitem(self.zbseries, self.zbseries.to_dense())
  228. _check_getitem(self.iseries, self.iseries.to_dense())
  229. _check_getitem(self.ziseries, self.ziseries.to_dense())
  230. # exception handling
  231. self.assertRaises(Exception, self.bseries.__getitem__,
  232. len(self.bseries) + 1)
  233. # index not contained
  234. self.assertRaises(Exception, self.btseries.__getitem__,
  235. self.btseries.index[-1] + BDay())
  236. def test_get_get_value(self):
  237. assert_almost_equal(self.bseries.get(10), self.bseries[10])
  238. self.assert_(self.bseries.get(len(self.bseries) + 1) is None)
  239. dt = self.btseries.index[10]
  240. result = self.btseries.get(dt)
  241. expected = self.btseries.to_dense()[dt]
  242. assert_almost_equal(result, expected)
  243. assert_almost_equal(self.bseries.get_value(10), self.bseries[10])
  244. def test_set_value(self):
  245. idx = self.btseries.index[7]
  246. res = self.btseries.set_value(idx, 0)
  247. self.assert_(res is not self.btseries)
  248. self.assertEqual(res[idx], 0)
  249. res = self.iseries.set_value('foobar', 0)
  250. self.assert_(res is not self.iseries)
  251. self.assert_(res.index[-1] == 'foobar')
  252. self.assertEqual(res['foobar'], 0)
  253. def test_getitem_slice(self):
  254. idx = self.bseries.index
  255. res = self.bseries[::2]
  256. self.assert_(isinstance(res, SparseSeries))
  257. assert_sp_series_equal(res, self.bseries.reindex(idx[::2]))
  258. res = self.bseries[:5]
  259. self.assert_(isinstance(res, SparseSeries))
  260. assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))
  261. res = self.bseries[5:]
  262. assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))
  263. # negative indices
  264. res = self.bseries[:-3]
  265. assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))
  266. def test_take(self):
  267. def _compare_with_dense(sp):
  268. dense = sp.to_dense()
  269. def _compare(idx):
  270. dense_result = dense.take(idx).values
  271. sparse_result = sp.take(idx)
  272. self.assert_(isinstance(sparse_result, SparseSeries))
  273. assert_almost_equal(dense_result, sparse_result.values)
  274. _compare([1., 2., 3., 4., 5., 0.])
  275. _compare([7, 2, 9, 0, 4])
  276. _compare([3, 6, 3, 4, 7])
  277. self._check_all(_compare_with_dense)
  278. self.assertRaises(Exception, self.bseries.take, [-1, 0])
  279. self.assertRaises(Exception, self.bseries.take,
  280. [0, len(self.bseries) + 1])
  281. # Corner case
  282. sp = SparseSeries(np.ones(10.) * nan)
  283. assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5))
  284. def test_setitem(self):
  285. self.assertRaises(Exception, self.bseries.__setitem__, 5, 7.)
  286. self.assertRaises(Exception, self.iseries.__setitem__, 5, 7.)
  287. def test_setslice(self):
  288. self.assertRaises(Exception, self.bseries.__setslice__, 5, 10, 7.)
  289. def test_operators(self):
  290. def _check_op(a, b, op):
  291. sp_result = op(a, b)
  292. adense = a.to_dense() if isinstance(a, SparseSeries) else a
  293. bdense = b.to_dense() if isinstance(b, SparseSeries) else b
  294. dense_result = op(adense, bdense)
  295. assert_almost_equal(sp_result.to_dense(), dense_result)
  296. def check(a, b):
  297. _check_op(a, b, operator.add)
  298. _check_op(a, b, operator.sub)
  299. _check_op(a, b, operator.truediv)
  300. _check_op(a, b, operator.floordiv)
  301. _check_op(a, b, operator.mul)
  302. _check_op(a, b, lambda x, y: operator.add(y, x))
  303. _check_op(a, b, lambda x, y: operator.sub(y, x))
  304. _check_op(a, b, lambda x, y: operator.truediv(y, x))
  305. _check_op(a, b, lambda x, y: operator.floordiv(y, x))
  306. _check_op(a, b, lambda x, y: operator.mul(y, x))
  307. # NaN ** 0 = 1 in C?
  308. # _check_op(a, b, operator.pow)
  309. # _check_op(a, b, lambda x, y: operator.pow(y, x))
  310. check(self.bseries, self.bseries)
  311. check(self.iseries, self.iseries)
  312. check(self.bseries, self.iseries)
  313. check(self.bseries, self.bseries2)
  314. check(self.bseries, self.iseries2)
  315. check(self.iseries, self.iseries2)
  316. # scalar value
  317. check(self.bseries, 5)
  318. # zero-based
  319. check(self.zbseries, self.zbseries * 2)
  320. check(self.zbseries, self.zbseries2)
  321. check(self.ziseries, self.ziseries2)
  322. # with dense
  323. result = self.bseries + self.bseries.to_dense()
  324. assert_sp_series_equal(result, self.bseries + self.bseries)
  325. # @dec.knownfailureif(True, 'Known NumPy failer as of 1.5.1')
  326. def test_operators_corner2(self):
  327. raise nose.SkipTest('known failer on numpy 1.5.1')
  328. # NumPy circumvents __r*__ operations
  329. val = np.float64(3.0)
  330. result = val - self.zbseries
  331. assert_sp_series_equal(result, 3 - self.zbseries)
  332. def test_binary_operators(self):
  333. def _check_inplace_op(op):
  334. tmp = self.bseries.copy()
  335. self.assertRaises(NotImplementedError, op, tmp, self.bseries)
  336. inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'ipow']
  337. for op in inplace_ops:
  338. _check_inplace_op(getattr(operator, op))
  339. def test_reindex(self):
  340. def _compare_with_series(sps, new_index):
  341. spsre = sps.reindex(new_index)
  342. series = sps.to_dense()
  343. seriesre = series.reindex(new_index)
  344. seriesre = seriesre.to_sparse(fill_value=sps.fill_value)
  345. assert_sp_series_equal(spsre, seriesre)
  346. assert_series_equal(spsre.to_dense(), seriesre.to_dense())
  347. _compare_with_series(self.bseries, self.bseries.index[::2])
  348. _compare_with_series(self.bseries, list(self.bseries.index[::2]))
  349. _compare_with_series(self.bseries, self.bseries.index[:10])
  350. _compare_with_series(self.bseries, self.bseries.index[5:])
  351. _compare_with_series(self.zbseries, self.zbseries.index[::2])
  352. _compare_with_series(self.zbseries, self.zbseries.index[:10])
  353. _compare_with_series(self.zbseries, self.zbseries.index[5:])
  354. # special cases
  355. same_index = self.bseries.reindex(self.bseries.index)
  356. assert_sp_series_equal(self.bseries, same_index)
  357. self.assert_(same_index is not self.bseries)
  358. # corner cases
  359. sp = SparseSeries([], index=[])
  360. sp_zero = SparseSeries([], index=[], fill_value=0)
  361. _compare_with_series(sp, np.arange(10))
  362. # with copy=False
  363. reindexed = self.bseries.reindex(self.bseries.index, copy=True)
  364. reindexed.sp_values[:] = 1.
  365. self.assert_((self.bseries.sp_values != 1.).all())
  366. reindexed = self.bseries.reindex(self.bseries.index, copy=False)
  367. reindexed.sp_values[:] = 1.
  368. self.assert_((self.bseries.sp_values == 1.).all())
  369. def test_sparse_reindex(self):
  370. length = 10
  371. def _check(values, index1, index2, fill_value):
  372. first_series = SparseSeries(values, sparse_index=index1,
  373. fill_value=fill_value)
  374. reindexed = first_series.sparse_reindex(index2)
  375. self.assert_(reindexed.sp_index is index2)
  376. int_indices1 = index1.to_int_index().indices
  377. int_indices2 = index2.to_int_index().indices
  378. expected = Series(values, index=int_indices1)
  379. expected = expected.reindex(int_indices2).fillna(fill_value)
  380. assert_almost_equal(expected.values, reindexed.sp_values)
  381. # make sure level argument asserts
  382. expected = expected.reindex(int_indices2).fillna(fill_value)
  383. def _check_with_fill_value(values, first, second, fill_value=nan):
  384. i_index1 = IntIndex(length, first)
  385. i_index2 = IntIndex(length, second)
  386. b_index1 = i_index1.to_block_index()
  387. b_index2 = i_index2.to_block_index()
  388. _check(values, i_index1, i_index2, fill_value)
  389. _check(values, b_index1, b_index2, fill_value)
  390. def _check_all(values, first, second):
  391. _check_with_fill_value(values, first, second, fill_value=nan)
  392. _check_with_fill_value(values, first, second, fill_value=0)
  393. index1 = [2, 4, 5, 6, 8, 9]
  394. values1 = np.arange(6.)
  395. _check_all(values1, index1, [2, 4, 5])
  396. _check_all(values1, index1, [2, 3, 4, 5, 6, 7, 8, 9])
  397. _check_all(values1, index1, [0, 1])
  398. _check_all(values1, index1, [0, 1, 7, 8, 9])
  399. _check_all(values1, index1, [])
  400. def test_repr(self):
  401. bsrepr = repr(self.bseries)
  402. isrepr = repr(self.iseries)
  403. def test_iter(self):
  404. pass
  405. def test_truncate(self):
  406. pass
  407. def test_fillna(self):
  408. pass
  409. def test_groupby(self):
  410. pass
  411. def test_reductions(self):
  412. def _compare_with_dense(obj, op):
  413. sparse_result = getattr(obj, op)()
  414. series = obj.to_dense()
  415. dense_result = getattr(series, op)()
  416. self.assertEquals(sparse_result, dense_result)
  417. to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew']
  418. def _compare_all(obj):
  419. for op in to_compare:
  420. _compare_with_dense(obj, op)
  421. _compare_all(self.bseries)
  422. self.bseries.sp_values[5:10] = np.NaN
  423. _compare_all(self.bseries)
  424. _compare_all(self.zbseries)
  425. self.zbseries.sp_values[5:10] = np.NaN
  426. _compare_all(self.zbseries)
  427. series = self.zbseries.copy()
  428. series.fill_value = 2
  429. _compare_all(series)
  430. def test_dropna(self):
  431. sp = SparseSeries([0, 0, 0, nan, nan, 5, 6],
  432. fill_value=0)
  433. sp_valid = sp.valid()
  434. assert_almost_equal(sp_valid.values,
  435. sp.to_dense().valid().values)
  436. self.assert_(sp_valid.index.equals(sp.to_dense().valid().index))
  437. self.assertEquals(len(sp_valid.sp_values), 2)
  438. result = self.bseries.dropna()
  439. expected = self.bseries.to_dense().dropna()
  440. self.assert_(not isinstance(result, SparseSeries))
  441. tm.assert_series_equal(result, expected)
  442. def test_homogenize(self):
  443. def _check_matches(indices, expected):
  444. data = {}
  445. for i, idx in enumerate(indices):
  446. data[i] = SparseSeries(idx.to_int_index().indices,
  447. sparse_index=idx)
  448. homogenized = spf.homogenize(data)
  449. for k, v in homogenized.iteritems():
  450. assert(v.sp_index.equals(expected))
  451. indices1 = [BlockIndex(10, [2], [7]),
  452. BlockIndex(10, [1, 6], [3, 4]),
  453. BlockIndex(10, [0], [10])]
  454. expected1 = BlockIndex(10, [2, 6], [2, 3])
  455. _check_matches(indices1, expected1)
  456. indices2 = [BlockIndex(10, [2], [7]),
  457. BlockIndex(10, [2], [7])]
  458. expected2 = indices2[0]
  459. _check_matches(indices2, expected2)
  460. # must have NaN fill value
  461. data = {'a' : SparseSeries(np.arange(7), sparse_index=expected2,
  462. fill_value=0)}
  463. nose.tools.assert_raises(Exception, spf.homogenize, data)
  464. def test_fill_value_corner(self):
  465. cop = self.zbseries.copy()
  466. cop.fill_value = 0
  467. result = self.bseries / cop
  468. self.assert_(np.isnan(result.fill_value))
  469. cop2 = self.zbseries.copy()
  470. cop2.fill_value = 1
  471. result = cop2 / cop
  472. self.assert_(np.isnan(result.fill_value))
  473. def test_shift(self):
  474. series = SparseSeries([nan, 1., 2., 3., nan, nan],
  475. index=np.arange(6))
  476. shifted = series.shift(0)
  477. self.assert_(shifted is not series)
  478. assert_sp_series_equal(shifted, series)
  479. f = lambda s: s.shift(1)
  480. _dense_series_compare(series, f)
  481. f = lambda s: s.shift(-2)
  482. _dense_series_compare(series, f)
  483. series = SparseSeries([nan, 1., 2., 3., nan, nan],
  484. index=DateRange('1/1/2000', periods=6))
  485. f = lambda s: s.shift(2, timeRule='WEEKDAY')
  486. _dense_series_compare(series, f)
  487. f = lambda s: s.shift(2, offset=datetools.bday)
  488. _dense_series_compare(series, f)
  489. def test_cumsum(self):
  490. result = self.bseries.cumsum()
  491. expected = self.bseries.to_dense().cumsum()
  492. self.assert_(isinstance(result, SparseSeries))
  493. self.assertEquals(result.name, self.bseries.name)
  494. assert_series_equal(result.to_dense(), expected)
  495. result = self.zbseries.cumsum()
  496. expected = self.zbseries.to_dense().cumsum()
  497. self.assert_(isinstance(result, Series))
  498. assert_series_equal(result, expected)
  499. def test_combine_first(self):
  500. s = self.bseries
  501. result = s[::2].combine_first(s)
  502. result2 = s[::2].combine_first(s.to_dense())
  503. expected = s[::2].to_dense().combine_first(s.to_dense())
  504. expected = expected.to_sparse(fill_value=s.fill_value)
  505. assert_sp_series_equal(result, result2)
  506. assert_sp_series_equal(result, expected)
  507. class TestSparseTimeSeries(TestCase):
  508. pass
  509. class TestSparseDataFrame(TestCase, test_frame.SafeForSparse):
  510. klass = SparseDataFrame
  511. def setUp(self):
  512. self.data = {'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
  513. 'B' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
  514. 'C' : np.arange(10),
  515. 'D' : [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
  516. self.dates = DateRange('1/1/2011', periods=10)
  517. self.frame = SparseDataFrame(self.data, index=self.dates)
  518. self.iframe = SparseDataFrame(self.data, index=self.dates,
  519. default_kind='integer')
  520. values = self.frame.values.copy()
  521. values[np.isnan(values)] = 0
  522. self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
  523. default_fill_value=0,
  524. index=self.dates)
  525. values = self.frame.values.copy()
  526. values[np.isnan(values)] = 2
  527. self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
  528. default_fill_value=2,
  529. index=self.dates)
  530. self.empty = SparseDataFrame()
  531. def test_as_matrix(self):
  532. empty = self.empty.as_matrix()
  533. self.assert_(empty.shape == (0, 0))
  534. no_cols = SparseDataFrame(index=np.arange(10))
  535. mat = no_cols.as_matrix()
  536. self.assert_(mat.shape == (10, 0))
  537. no_index = SparseDataFrame(columns=np.arange(10))
  538. mat = no_index.as_matrix()
  539. self.assert_(mat.shape == (0, 10))
  540. def test_copy(self):
  541. cp = self.frame.copy()
  542. self.assert_(isinstance(cp, SparseDataFrame))
  543. assert_sp_frame_equal(cp, self.frame)
  544. self.assert_(cp.index is self.frame.index)
  545. def test_constructor(self):
  546. for col, series in self.frame.iteritems():
  547. self.assert_(isinstance(series, SparseSeries))
  548. self.assert_(isinstance(self.iframe['A'].sp_index, IntIndex))
  549. # constructed zframe from matrix above
  550. self.assertEquals(self.zframe['A'].fill_value, 0)
  551. assert_almost_equal([0, 0, 0, 0, 1, 2, 3, 4, 5, 6],
  552. self.zframe['A'].values)
  553. # construct from nested dict
  554. data = {}
  555. for c, s in self.frame.iteritems():
  556. data[c] = s.to_dict()
  557. sdf = SparseDataFrame(data)
  558. assert_sp_frame_equal(sdf, self.frame)
  559. # TODO: test data is copied from inputs
  560. # init dict with different index
  561. idx = self.frame.index[:5]
  562. cons = SparseDataFrame(self.frame._series, index=idx,
  563. columns=self.frame.columns,
  564. default_fill_value=self.frame.default_fill_value,
  565. default_kind=self.frame.default_kind)
  566. reindexed = self.frame.reindex(idx)
  567. assert_sp_frame_equal(cons, reindexed)
  568. # assert level parameter breaks reindex
  569. self.assertRaises(Exception, self.frame.reindex, idx, level=0)
  570. def test_constructor_ndarray(self):
  571. # no index or columns
  572. sp = SparseDataFrame(self.frame.values)
  573. # 1d
  574. sp = SparseDataFrame(self.data['A'], index=self.dates,
  575. columns=['A'])
  576. assert_sp_frame_equal(sp, self.frame.reindex(columns=['A']))
  577. # raise on level argument
  578. self.assertRaises(Exception, self.frame.reindex, columns=['A'],
  579. level=1)
  580. # wrong length index / columns
  581. self.assertRaises(Exception, SparseDataFrame, self.frame.values,
  582. index=self.frame.index[:-1])
  583. self.assertRaises(Exception, SparseDataFrame, self.frame.values,
  584. columns=self.frame.columns[:-1])
  585. def test_constructor_empty(self):
  586. sp = SparseDataFrame()
  587. self.assert_(len(sp.index) == 0)
  588. self.assert_(len(sp.columns) == 0)
  589. def test_constructor_dataframe(self):
  590. dense = self.frame.to_dense()
  591. sp = SparseDataFrame(dense)
  592. assert_sp_frame_equal(sp, self.frame)
  593. def test_array_interface(self):
  594. res = np.sqrt(self.frame)
  595. dres = np.sqrt(self.frame.to_dense())
  596. assert_frame_equal(res.to_dense(), dres)
  597. def test_pickle(self):
  598. def _test_roundtrip(frame):
  599. pickled = pickle.dumps(frame, protocol=pickle.HIGHEST_PROTOCOL)
  600. unpickled = pickle.loads(pickled)
  601. assert_sp_frame_equal(frame, unpickled)
  602. _test_roundtrip(SparseDataFrame())
  603. self._check_all(_test_roundtrip)
  604. def test_dense_to_sparse(self):
  605. df = DataFrame({'A' : [nan, nan, nan, 1, 2],
  606. 'B' : [1, 2, nan, nan, nan]})
  607. sdf = df.to_sparse()
  608. self.assert_(isinstance(sdf, SparseDataFrame))
  609. self.assert_(np.isnan(sdf.default_fill_value))
  610. self.assert_(isinstance(sdf['A'].sp_index, BlockIndex))
  611. tm.assert_frame_equal(sdf.to_dense(), df)
  612. sdf = df.to_sparse(kind='integer')
  613. self.assert_(isinstance(sdf['A'].sp_index, IntIndex))
  614. df = DataFrame({'A' : [0, 0, 0, 1, 2],
  615. 'B' : [1, 2, 0, 0, 0]}, dtype=float)
  616. sdf = df.to_sparse(fill_value=0)
  617. self.assertEquals(sdf.default_fill_value, 0)
  618. tm.assert_frame_equal(sdf.to_dense(), df)
  619. def test_sparse_to_dense(self):
  620. pass
  621. def test_sparse_series_ops(self):
  622. self._check_all(self._check_frame_ops)
  623. def _check_frame_ops(self, frame):
  624. fill = frame.default_fill_value
  625. def _compare_to_dense(a, b, da, db, op):
  626. sparse_result = op(a, b)
  627. dense_result = op(da, db)
  628. dense_result = dense_result.to_sparse(fill_value=fill)
  629. assert_sp_frame_equal(sparse_result, dense_result,
  630. exact_indices=False)
  631. if isinstance(a, DataFrame) and isinstance(db, DataFrame):
  632. mixed_result = op(a, db)
  633. self.assert_(isinstance(mixed_result, SparseDataFrame))
  634. assert_sp_frame_equal(mixed_result, sparse_result,
  635. exact_indices=False)
  636. opnames = ['add', 'sub', 'mul', 'truediv', 'floordiv']
  637. ops = [getattr(operator, name) for name in opnames]
  638. fidx = frame.index
  639. # time series operations
  640. series = [frame['A'], frame['B'],
  641. frame['C'], frame['D'],
  642. frame['A'].reindex(fidx[:7]),
  643. frame['A'].reindex(fidx[::2]),
  644. SparseSeries([], index=[])]
  645. for op in ops:
  646. _compare_to_dense(frame, frame[::2], frame.to_dense(),
  647. frame[::2].to_dense(), op)
  648. for s in series:
  649. _compare_to_dense(frame, s, frame.to_dense(),
  650. s.to_dense(), op)
  651. _compare_to_dense(s, frame, s.to_dense(),
  652. frame.to_dense(), op)
  653. # cross-sectional operations
  654. series = [frame.xs(fidx[0]),
  655. frame.xs(fidx[3]),
  656. frame.xs(fidx[5]),
  657. frame.xs(fidx[7]),
  658. frame.xs(fidx[5])[:2]]
  659. for op in ops:
  660. for s in series:
  661. _compare_to_dense(frame, s, frame.to_dense(),
  662. s, op)
  663. _compare_to_dense(s, frame, s,
  664. frame.to_dense(), op)
  665. def test_op_corners(self):
  666. empty = self.empty + self.empty
  667. self.assert_(not empty)
  668. foo = self.frame + self.empty
  669. assert_sp_frame_equal(foo, self.frame * np.nan)
  670. foo = self.empty + self.frame
  671. assert_sp_frame_equal(foo, self.frame * np.nan)
  672. def test_scalar_ops(self):
  673. pass
  674. def test_getitem(self):
  675. pass
  676. def test_set_value(self):
  677. res = self.frame.set_value('foobar', 'B', 1.5)
  678. self.assert_(res is not self.frame)
  679. self.assert_(res.index[-1] == 'foobar')
  680. self.assertEqual(res.get_value('foobar', 'B'), 1.5)
  681. res2 = res.set_value('foobar', 'qux', 1.5)
  682. self.assert_(res2 is not res)
  683. self.assert_(np.array_equal(res2.columns,
  684. list(self.frame.columns) + ['qux']))
  685. self.assertEqual(res2.get_value('foobar', 'qux'), 1.5)
  686. def test_fancy_index_misc(self):
  687. # axis = 0
  688. sliced = self.frame.ix[-2:, :]
  689. expected = self.frame.reindex(index=self.frame.index[-2:])
  690. assert_sp_frame_equal(sliced, expected)
  691. # axis = 1
  692. sliced = self.frame.ix[:, -2:]
  693. expected = self.frame.reindex(columns=self.frame.columns[-2:])
  694. assert_sp_frame_equal(sliced, expected)
  695. def test_getitem_overload(self):
  696. # slicing
  697. sl = self.frame[:20]
  698. assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20]))
  699. # boolean indexing
  700. d = self.frame.index[5]
  701. indexer = self.frame.index > d
  702. subindex = self.frame.index[indexer]
  703. subframe = self.frame[indexer]
  704. self.assert_(np.array_equal(subindex, subframe.index))
  705. self.assertRaises(Exception, self.frame.__getitem__, indexer[:-1])
  706. def test_setitem(self):
  707. def _check_frame(frame):
  708. N = len(frame)
  709. # insert SparseSeries
  710. frame['E'] = frame['A']
  711. self.assert_(isinstance(frame['E'], SparseSeries))
  712. assert_sp_series_equal(frame['E'], frame['A'])
  713. # insert SparseSeries differently-indexed
  714. to_insert = frame['A'][::2]
  715. frame['E'] = to_insert
  716. assert_series_equal(frame['E'].to_dense(),
  717. to_insert.to_dense().reindex(frame.index))
  718. # insert Series
  719. frame['F'] = frame['A'].to_dense()
  720. self.assert_(isinstance(frame['F'], SparseSeries))
  721. assert_sp_series_equal(frame['F'], frame['A'])
  722. # insert Series differently-indexed
  723. to_insert = frame['A'].to_dense()[::2]
  724. frame['G'] = to_insert
  725. assert_series_equal(frame['G'].to_dense(),
  726. to_insert.reindex(frame.index))
  727. # insert ndarray
  728. frame['H'] = np.random.randn(N)
  729. self.assert_(isinstance(frame['H'], SparseSeries))
  730. to_sparsify = np.random.randn(N)
  731. to_sparsify[N // 2:] = frame.default_fill_value
  732. frame['I'] = to_sparsify
  733. self.assertEquals(len(frame['I'].sp_values), N // 2)
  734. # insert ndarray wrong size
  735. self.assertRaises(Exception, frame.__setitem__, 'foo',
  736. np.random.randn(N - 1))
  737. # scalar value
  738. frame['J'] = 5
  739. self.assertEquals(len(frame['J'].sp_values), N)
  740. self.assert_((frame['J'].sp_values == 5).all())
  741. frame['K'] = frame.default_fill_value
  742. self.assertEquals(len(frame['K'].sp_values), 0)
  743. self._check_all(_check_frame)
  744. def test_setitem_corner(self):
  745. self.frame['a'] = self.frame['B']
  746. assert_sp_series_equal(self.frame['a'], self.frame['B'])
  747. def test_setitem_array(self):
  748. arr = self.frame['B'].view(SparseArray)
  749. self.frame['E'] = arr
  750. assert_sp_series_equal(self.frame['E'], self.frame['B'])
  751. self.assertRaises(Exception, self.frame.__setitem__, 'F', arr[:-1])
  752. def test_delitem(self):
  753. A = self.frame['A']
  754. C = self.frame['C']
  755. del self.frame['B']
  756. self.assert_('B' not in self.frame)
  757. assert_sp_series_equal(self.frame['A'], A)
  758. assert_sp_series_equal(self.frame['C'], C)
  759. del self.frame['D']
  760. self.assert_('D' not in self.frame)
  761. del self.frame['A']
  762. self.assert_('A' not in self.frame)
  763. def test_set_columns(self):
  764. self.frame.columns = self.frame.columns
  765. self.assertRaises(Exception, setattr, self.frame, 'columns',
  766. self.frame.columns[:-1])
  767. def test_set_index(self):
  768. self.frame.index = self.frame.index
  769. self.assertRaises(Exception, setattr, self.frame, 'index',
  770. self.frame.index[:-1])
  771. def test_append(self):
  772. a = self.frame[:5]
  773. b = self.frame[5:]
  774. appended = a.append(b)
  775. assert_sp_frame_equal(appended, self.frame)
  776. a = self.frame.ix[:5, :3]
  777. b = self.frame.ix[5:]
  778. appended = a.append(b)
  779. assert_sp_frame_equal(appended.ix[:, :3], self.frame.ix[:, :3])
  780. def test_apply(self):
  781. applied = self.frame.apply(np.sqrt)
  782. self.assert_(isinstance(applied, SparseDataFrame))
  783. assert_almost_equal(applied.values, np.sqrt(self.frame.values))
  784. applied = self.fill_frame.apply(np.sqrt)
  785. self.assert_(applied['A'].fill_value == np.sqrt(2))
  786. # agg / broadcast
  787. applied = self.frame.apply(np.sum)
  788. assert_series_equal(applied,
  789. self.frame.to_dense().apply(np.sum))
  790. broadcasted = self.frame.apply(np.sum, broadcast=True)
  791. self.assert_(isinstance(broadcasted, SparseDataFrame))
  792. assert_frame_equal(broadcasted.to_dense(),
  793. self.frame.to_dense().apply(np.sum, broadcast=True))
  794. self.assert_(self.empty.apply(np.sqrt) is self.empty)
  795. def test_applymap(self):
  796. # just test that it works
  797. result = self.frame.applymap(lambda x: x * 2)
  798. self.assert_(isinstance(result, SparseDataFrame))
  799. def test_astype(self):
  800. self.assertRaises(Exception, self.frame.astype, np.int64)
  801. def test_fillna(self):
  802. self.assertRaises(NotImplementedError, self.frame.fillna, 0)
  803. def test_rename(self):
  804. # just check this works
  805. renamed = self.frame.rename(index=str)
  806. renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x)))
  807. def test_corr(self):
  808. res = self.frame.corr()
  809. assert_frame_equal(res, self.frame.to_dense().corr())
  810. def test_describe(self):
  811. self.frame['foo'] = np.nan
  812. desc = self.frame.describe()
  813. def test_join(self):
  814. left = self.frame.ix[:, ['A', 'B']]
  815. right = self.frame.ix[:, ['C', 'D']]
  816. joined = left.join(right)
  817. assert_sp_frame_equal(joined, self.frame)
  818. right = self.frame.ix[:, ['B', 'D']]
  819. self.assertRaises(Exception, left.join, right)
  820. def test_reindex(self):
  821. def _check_frame(frame):
  822. index = frame.index
  823. sidx = index[::2]
  824. sidx2 = index[:5]
  825. sparse_result = frame.reindex(sidx)
  826. dense_result = frame.to_dense().reindex(sidx)
  827. assert_frame_equal(sparse_result.to_dense(), dense_result)
  828. assert_frame_equal(frame.reindex(list(sidx)).to_dense(),
  829. dense_result)
  830. sparse_result2 = sparse_result.reindex(index)
  831. dense_result2 = dense_result.reindex(index)
  832. assert_frame_equal(sparse_result2.to_dense(), dense_result2)
  833. # propagate CORRECT fill value
  834. assert_almost_equal(sparse_result.default_fill_value,
  835. frame.default_fill_value)
  836. assert_almost_equal(sparse_result['A'].fill_value,
  837. frame['A'].fill_value)
  838. # length zero
  839. length_zero = frame.reindex([])
  840. self.assertEquals(len(length_zero), 0)
  841. self.assertEquals(len(length_zero.columns), len(frame.columns))
  842. self.assertEquals(len(length_zero['A']), 0)
  843. # frame being reindexed has length zero
  844. length_n = length_zero.reindex(index)
  845. self.assertEquals(len(length_n), len(frame))
  846. self.assertEquals(len(length_n.columns), len(frame.columns))
  847. self.assertEquals(len(length_n['A']), len(frame))
  848. # reindex columns
  849. reindexed = frame.reindex(columns=['A', 'B', 'Z'])
  850. self.assertEquals(len(reindexed.columns), 3)
  851. assert_almost_equal(reindexed['Z'].fill_value,
  852. frame.default_fill_value)
  853. self.assert_(np.isnan(reindexed['Z'].sp_values).all())
  854. _check_frame(self.frame)
  855. _check_frame(self.iframe)
  856. _check_frame(self.zframe)
  857. _check_frame(self.fill_frame)
  858. # with copy=False
  859. reindexed = self.frame.reindex(self.frame.index, copy=False)
  860. reindexed['F'] = reindexed['A']
  861. self.assert_('F' in self.frame)
  862. reindexed = self.frame.reindex(self.frame.index)
  863. reindexed['G'] = reindexed['A']
  864. self.assert_('G' not in self.frame)
  865. def test_density(self):
  866. df = SparseDataFrame({'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
  867. 'B' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
  868. 'C' : np.arange(10),
  869. 'D' : [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]})
  870. self.assertEquals(df.density, 0.75)
  871. def test_to_dense(self):
  872. def _check(frame):
  873. dense_dm = frame.to_dense()
  874. assert_frame_equal(frame, dense_dm)
  875. self._check_all(_check)
  876. def test_stack_sparse_frame(self):
  877. def _check(frame):
  878. dense_frame = frame.to_dense()
  879. wp = Panel.from_dict({'foo' : frame})
  880. from_dense_lp = wp.to_frame()
  881. from_sparse_lp = spf.stack_sparse_frame(frame)
  882. self.assert_(np.array_equal(from_dense_lp.values,
  883. from_sparse_lp.values))
  884. _check(self.frame)
  885. _check(self.iframe)
  886. # for now
  887. self.assertRaises(Exception, _check, self.zframe)
  888. self.assertRaises(Exception, _check, self.fill_frame)
  889. def test_transpose(self):
  890. def _check(frame):
  891. transposed = frame.T
  892. untransposed = transposed.T
  893. assert_sp_frame_equal(frame, untransposed)
  894. self._check_all(_check)
  895. def test_shift(self):
  896. def _check(frame):
  897. shifted = frame.shift(0)
  898. self.assert_(shifted is not frame)
  899. assert_sp_frame_equal(shifted, frame)
  900. f = lambda s: s.shift(1)
  901. _dense_frame_compare(frame, f)
  902. f = lambda s: s.shift(-2)
  903. _dense_frame_compare(frame, f)
  904. f = lambda s: s.shift(2, timeRule='WEEKDAY')
  905. _dense_frame_compare(frame, f)
  906. f = lambda s: s.shift(2, offset=datetools.bday)
  907. _dense_frame_compare(frame, f)
  908. self._check_all(_check)
  909. def test_count(self):
  910. result = self.frame.count()
  911. dense_result = self.frame.to_dense().count()
  912. assert_series_equal(result, dense_result)
  913. result = self.frame.count(1)
  914. dense_result = self.frame.to_dense().count(1)
  915. # win32 don't check dtype
  916. assert_series_equal(result, dense_result, check_dtype=False)
  917. def test_cumsum(self):
  918. result = self.frame.cumsum()
  919. expected = self.frame.to_dense().cumsum()
  920. self.assert_(isinstance(result, SparseDataFrame))
  921. assert_frame_equal(result.to_dense(), expected)
  922. def _check_all(self, check_func):
  923. check_func(self.frame)
  924. check_func(self.iframe)
  925. check_func(self.zframe)
  926. check_func(self.fill_frame)
  927. def test_combine_first(self):
  928. df = self.frame
  929. result = df[::2].combine_first(df)
  930. result2 = df[::2].combine_first(df.to_dense())
  931. expected = df[::2].to_dense().combine_first(df.to_dense())
  932. expected = expected.to_sparse(fill_value=df.default_fill_value)
  933. assert_sp_frame_equal(result, result2)
  934. assert_sp_frame_equal(result, expected)
  935. def _dense_series_compare(s, f):
  936. result = f(s)
  937. assert(isinstance(result, SparseSeries))
  938. dense_result = f(s.to_dense())
  939. assert_series_equal(result.to_dense(), dense_result)
  940. def _dense_frame_compare(frame, f):
  941. result = f(frame)
  942. assert(isinstance(frame, SparseDataFrame))
  943. dense_result = f(frame.to_dense())
  944. assert_frame_equal(result.to_dense(), dense_result)
  945. def panel_data1():
  946. index = DateRange('1/1/2011', periods=8)
  947. return DataFrame({
  948. 'A' : [nan, nan, nan, 0, 1, 2, 3, 4],
  949. 'B' : [0, 1, 2, 3, 4, nan, nan, nan],
  950. 'C' : [0, 1, 2, nan, nan, nan, 3, 4],
  951. 'D' : [nan, 0, 1, nan, 2, 3, 4, nan]
  952. }, index=index)
  953. def panel_data2():
  954. index = DateRange('1/1/2011', periods=9)
  955. return DataFrame({
  956. 'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5],
  957. 'B' : [0, 1, 2, 3, 4, 5, nan, nan, nan],
  958. 'C' : [0, 1, 2, nan, nan, nan, 3, 4, 5],
  959. 'D' : [nan, 0, 1, nan, 2, 3, 4, 5, nan]
  960. }, index=index)
  961. def panel_data3():
  962. index = DateRange('1/1/2011', periods=10).shift(-2)
  963. return DataFrame({
  964. 'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
  965. 'B' : [0, 1, 2, 3, 4, 5, 6, nan, nan, nan],
  966. 'C' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
  967. 'D' : [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan]
  968. }, index=index)
  969. class TestSparsePanel(TestCase,
  970. test_panel.SafeForLongAndSparse,
  971. test_panel.SafeForSparse):
  972. @classmethod
  973. def assert_panel_equal(cls, x, y):
  974. assert_sp_panel_equal(x, y)
  975. def setUp(self):
  976. self.data_dict = {
  977. 'ItemA' : panel_data1(),
  978. 'ItemB' : panel_data2(),
  979. 'ItemC' : panel_data3(),
  980. 'ItemD' : panel_data1(),
  981. }
  982. self.panel = SparsePanel(self.data_dict)
  983. @staticmethod
  984. def _test_op(panel, op):
  985. # arithmetic tests
  986. result = op(panel, 1)
  987. assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1))
  988. def test_constructor(self):
  989. self.assertRaises(Exception, SparsePanel, self.data_dict,
  990. items=['Item0', 'ItemA', 'ItemB'])
  991. def test_from_dict(self):
  992. fd = SparsePanel.from_dict(self.data_dict)
  993. assert_sp_panel_equal(fd, self.panel)
  994. def test_pickle(self):
  995. def _test_roundtrip(panel):
  996. pickled = pickle.dumps(panel, protocol=pickle.HIGHEST_PROTOCOL)
  997. unpickled = pickle.loads(pickled)
  998. assert_sp_panel_equal(panel, unpickled)
  999. _test_roundtrip(self.panel)
  1000. def test_dense_to_sparse(self):
  1001. wp = Panel.from_dict(self.data_dict)
  1002. dwp = wp.to_sparse()
  1003. self.assert_(isinstance(dwp['ItemA']['A'], SparseSeries))
  1004. def test_to_dense(self):
  1005. dwp = self.panel.to_dense()
  1006. dwp2 = Panel.from_dict(self.data_dict)
  1007. assert_panel_equal(dwp, dwp2)
  1008. def test_to_frame(self):
  1009. def _compare_with_dense(panel):
  1010. slp = panel.to_frame()
  1011. dlp = panel.to_dense().to_frame()
  1012. self.assert_(np.array_equal(slp.values, dlp.values))
  1013. self.assert_(slp.index.equals(dlp.index))
  1014. _compare_with_dense(self.panel)
  1015. _compare_with_dense(self.panel.reindex(items=['ItemA']))
  1016. zero_panel = SparsePanel(self.data_dict, default_fill_value=0)
  1017. self.assertRaises(Exception, zero_panel.to_frame)
  1018. self.assertRaises(Exception, self.panel.to_frame,
  1019. filter_observations=False)
  1020. def test_long_to_wide_sparse(self):
  1021. pass
  1022. def test_values(self):
  1023. pass
  1024. def test_setitem(self):
  1025. self.panel['ItemE'] = self.panel['ItemC']
  1026. self.panel['ItemF'] = self.panel['ItemC'].to_dense()
  1027. assert_sp_frame_equal(self.panel['ItemE'], self.panel['ItemC'])
  1028. assert_sp_frame_equal(self.panel['ItemF'], self.panel['ItemC'])
  1029. assert_almost_equal(self.panel.items, ['ItemA', 'ItemB', 'ItemC',
  1030. 'ItemD', 'ItemE', 'ItemF'])
  1031. self.assertRaises(Exception, self.panel.__setitem__, 'item6', 1)
  1032. def test_set_value(self):
  1033. def _check_loc(item, major, minor, val=1.5):
  1034. res = self.panel.set_value(item, major, minor, val)
  1035. self.assert_(res is not self.panel)
  1036. self.assertEquals(res.get_value(item, major, minor), val)
  1037. _check_loc('ItemA', self.panel.major_axis[4], self.panel.minor_axis[3])
  1038. _check_loc('ItemF', self.panel.major_axis[4], self.panel.minor_axis[3])
  1039. _check_loc('ItemF', 'foo', self.panel.minor_axis[3])
  1040. _check_loc('ItemE', 'foo', 'bar')
  1041. def test_delitem_pop(self):
  1042. del self.panel['ItemB']
  1043. assert_almost_equal(self.panel.items, ['ItemA', 'ItemC', 'ItemD'])
  1044. crackle = self.panel['ItemC']
  1045. pop = self.panel.pop('ItemC')
  1046. self.assert_(pop is crackle)
  1047. assert_almost_equal(self.panel.items, ['ItemA', 'ItemD'])
  1048. self.assertRaises(KeyError, self.panel.__delitem__, 'ItemC')
  1049. def test_copy(self):
  1050. cop = self.panel.copy()
  1051. assert_sp_panel_equal(cop, self.panel)
  1052. def test_reindex(self):
  1053. def _compare_with_dense(swp, items, major, minor):
  1054. swp_re = swp.reindex(items=items, major=major,
  1055. minor=minor)
  1056. dwp_re = swp.to_dense().reindex(items=items, major=major,
  1057. minor=minor)
  1058. assert_panel_equal(swp_re.to_dense(), dwp_re)
  1059. _compare_with_dense(self.panel, self.panel.items[:2],
  1060. self.panel.major_axis[::2],
  1061. self.panel.minor_axis[::2])
  1062. _compare_with_dense(self.panel, None,
  1063. self.panel.major_axis[::2],
  1064. self.panel.minor_axis[::2])
  1065. self.assertRaises(ValueError, self.panel.reindex)
  1066. # TODO: do something about this later...
  1067. self.assertRaises(Exception, self.panel.reindex,
  1068. items=['item0', 'ItemA', 'ItemB'])
  1069. # test copying
  1070. cp = self.panel.reindex(self.panel.major_axis, copy=True)
  1071. cp['ItemA']['E'] = cp['ItemA']['A']
  1072. self.assert_('E' not in self.panel['ItemA'])
  1073. def test_operators(self):
  1074. def _check_ops(panel):
  1075. def _dense_comp(op):
  1076. dense = panel.to_dense()
  1077. sparse_result = op(panel)
  1078. dense_result = op(dense)
  1079. assert_panel_equal(sparse_result.to_dense(), dense_result)
  1080. op1 = lambda x: x + 2
  1081. _dense_comp(op1)
  1082. op2 = lambda x: x.add(x.reindex(major=x.major_axis[::2]))
  1083. _dense_comp(op2)
  1084. op3 = lambda x: x.subtract(x.mean(0), axis=0)
  1085. _dense_comp(op3)
  1086. op4 = lambda x: x.subtract(x.mean(1), axis=1)
  1087. _dense_comp(op4)
  1088. op5 = lambda x: x.subtract(x.mean(2), axis=2)
  1089. _dense_comp(op5)
  1090. # TODO: this case not yet supported!
  1091. # op6 = lambda x: x.add(x.to_frame())
  1092. # _dense_comp(op6)
  1093. _check_ops(self.panel)
  1094. def test_major_xs(self):
  1095. def _dense_comp(sparse):
  1096. dense = sparse.to_dense()

Large files files are truncated, but you can click here to view the full file