PageRenderTime 91ms CodeModel.GetById 40ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/io/tests/test_pytables.py

https://github.com/bergtholdt/pandas
Python | 400 lines | 313 code | 73 blank | 14 comment | 20 complexity | 6a8fa734a89f34b644a91899525d7d4c MD5 | raw file
  1. import nose
  2. import unittest
  3. import os
  4. import sys
  5. import numpy as np
  6. from pandas import (Series, DataFrame, Panel, LongPanel, DateRange,
  7. MultiIndex)
  8. from pandas.io.pytables import HDFStore
  9. import pandas.util.testing as tm
  10. try:
  11. import tables
  12. except ImportError:
  13. raise nose.SkipTest('no pytables')
  14. class TesttHDFStore(unittest.TestCase):
  15. path = '__test__.h5'
  16. scratchpath = '__scratch__.h5'
  17. def setUp(self):
  18. self.store = HDFStore(self.path)
  19. def tearDown(self):
  20. self.store.close()
  21. os.remove(self.path)
  22. def test_len(self):
  23. self.store['a'] = tm.makeTimeSeries()
  24. self.store['b'] = tm.makeStringSeries()
  25. self.store['c'] = tm.makeDataFrame()
  26. self.store['d'] = tm.makePanel()
  27. self.assertEquals(len(self.store), 4)
  28. def test_repr(self):
  29. repr(self.store)
  30. self.store['a'] = tm.makeTimeSeries()
  31. self.store['b'] = tm.makeStringSeries()
  32. self.store['c'] = tm.makeDataFrame()
  33. self.store['d'] = tm.makePanel()
  34. repr(self.store)
  35. def test_reopen_handle(self):
  36. self.store['a'] = tm.makeTimeSeries()
  37. self.store.open('w', warn=False)
  38. self.assert_(self.store.handle.isopen)
  39. self.assertEquals(len(self.store), 0)
  40. def test_flush(self):
  41. self.store['a'] = tm.makeTimeSeries()
  42. self.store.flush()
  43. def test_get(self):
  44. self.store['a'] = tm.makeTimeSeries()
  45. left = self.store.get('a')
  46. right = self.store['a']
  47. tm.assert_series_equal(left, right)
  48. self.assertRaises(AttributeError, self.store.get, 'b')
  49. def test_put(self):
  50. ts = tm.makeTimeSeries()
  51. df = tm.makeTimeDataFrame()
  52. self.store['a'] = ts
  53. self.store['b'] = df[:10]
  54. self.store.put('c', df[:10], table=True)
  55. # not OK, not a table
  56. self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True)
  57. # node does not currently exist, test _is_table_type returns False in
  58. # this case
  59. self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True)
  60. # OK
  61. self.store.put('c', df[10:], append=True)
  62. # overwrite table
  63. self.store.put('c', df[:10], table=True, append=False)
  64. tm.assert_frame_equal(df[:10], self.store['c'])
  65. def test_put_compression(self):
  66. df = tm.makeTimeDataFrame()
  67. self.store.put('c', df, table=True, compression='blosc')
  68. tm.assert_frame_equal(self.store['c'], df)
  69. self.store.put('c', df, table=True, compression='zlib')
  70. tm.assert_frame_equal(self.store['c'], df)
  71. # can't compress if table=False
  72. self.assertRaises(ValueError, self.store.put, 'b', df,
  73. table=False, compression='blosc')
  74. def test_put_integer(self):
  75. # non-date, non-string index
  76. df = DataFrame(np.random.randn(50, 100))
  77. self._check_roundtrip(df, tm.assert_frame_equal)
  78. def test_append(self):
  79. df = tm.makeTimeDataFrame()
  80. self.store.put('c', df[:10], table=True)
  81. self.store.append('c', df[10:])
  82. tm.assert_frame_equal(self.store['c'], df)
  83. def test_remove(self):
  84. ts = tm.makeTimeSeries()
  85. df = tm.makeDataFrame()
  86. self.store['a'] = ts
  87. self.store['b'] = df
  88. self.store.remove('a')
  89. self.assertEquals(len(self.store), 1)
  90. tm.assert_frame_equal(df, self.store['b'])
  91. self.store.remove('b')
  92. self.assertEquals(len(self.store), 0)
  93. def test_remove_crit(self):
  94. wp = tm.makePanel()
  95. self.store.put('wp', wp, table=True)
  96. date = wp.major_axis[len(wp.major_axis) // 2]
  97. crit1 = {
  98. 'field' : 'index',
  99. 'op' : '>',
  100. 'value' : date
  101. }
  102. crit2 = {
  103. 'field' : 'column',
  104. 'value' : ['A', 'D']
  105. }
  106. self.store.remove('wp', where=[crit1])
  107. self.store.remove('wp', where=[crit2])
  108. result = self.store['wp']
  109. expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
  110. tm.assert_panel_equal(result, expected)
  111. def test_series(self):
  112. s = tm.makeStringSeries()
  113. self._check_roundtrip(s, tm.assert_series_equal)
  114. ts = tm.makeTimeSeries()
  115. self._check_roundtrip(ts, tm.assert_series_equal)
  116. def test_timeseries_preepoch(self):
  117. if sys.version_info[0] == 2 and sys.version_info[1] < 7:
  118. raise nose.SkipTest
  119. dr = DateRange('1/1/1940', '1/1/1960')
  120. ts = Series(np.random.randn(len(dr)), index=dr)
  121. self._check_roundtrip(ts, tm.assert_series_equal)
  122. def test_frame(self):
  123. df = tm.makeDataFrame()
  124. # put in some random NAs
  125. df.values[0, 0] = np.nan
  126. df.values[5, 3] = np.nan
  127. self._check_roundtrip_table(df, tm.assert_frame_equal)
  128. self._check_roundtrip(df, tm.assert_frame_equal)
  129. self._check_roundtrip_table(df, tm.assert_frame_equal,
  130. compression=True)
  131. self._check_roundtrip(df, tm.assert_frame_equal,
  132. compression=True)
  133. tdf = tm.makeTimeDataFrame()
  134. self._check_roundtrip(tdf, tm.assert_frame_equal)
  135. self._check_roundtrip(tdf, tm.assert_frame_equal,
  136. compression=True)
  137. # not consolidated
  138. df['foo'] = np.random.randn(len(df))
  139. self.store['df'] = df
  140. recons = self.store['df']
  141. self.assert_(recons._data.is_consolidated())
  142. def test_store_hierarchical(self):
  143. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  144. ['one', 'two', 'three']],
  145. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  146. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  147. names=['foo', 'bar'])
  148. frame = DataFrame(np.random.randn(10, 3), index=index,
  149. columns=['A', 'B', 'C'])
  150. self._check_roundtrip(frame, tm.assert_frame_equal)
  151. self._check_roundtrip(frame.T, tm.assert_frame_equal)
  152. self._check_roundtrip(frame['A'], tm.assert_series_equal)
  153. # check that the
  154. try:
  155. store = HDFStore(self.scratchpath)
  156. store['frame'] = frame
  157. recons = store['frame']
  158. assert(recons.index.names == ['foo', 'bar'])
  159. finally:
  160. store.close()
  161. os.remove(self.scratchpath)
  162. def test_store_mixed(self):
  163. def _make_one():
  164. df = tm.makeDataFrame()
  165. df['obj1'] = 'foo'
  166. df['obj2'] = 'bar'
  167. df['bool1'] = df['A'] > 0
  168. df['bool2'] = df['B'] > 0
  169. df['int1'] = 1
  170. df['int2'] = 2
  171. return df.consolidate()
  172. df1 = _make_one()
  173. df2 = _make_one()
  174. self._check_roundtrip(df1, tm.assert_frame_equal)
  175. self._check_roundtrip(df2, tm.assert_frame_equal)
  176. self.store['obj'] = df1
  177. tm.assert_frame_equal(self.store['obj'], df1)
  178. self.store['obj'] = df2
  179. tm.assert_frame_equal(self.store['obj'], df2)
  180. # storing in Table not yet supported
  181. self.assertRaises(Exception, self.store.put, 'foo',
  182. df1, table=True)
  183. # check that can store Series of all of these types
  184. self._check_roundtrip(df1['obj1'], tm.assert_series_equal)
  185. self._check_roundtrip(df1['bool1'], tm.assert_series_equal)
  186. self._check_roundtrip(df1['int1'], tm.assert_series_equal)
  187. # try with compression
  188. self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
  189. compression=True)
  190. self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
  191. compression=True)
  192. self._check_roundtrip(df1['int1'], tm.assert_series_equal,
  193. compression=True)
  194. self._check_roundtrip(df1, tm.assert_frame_equal,
  195. compression=True)
  196. def test_wide(self):
  197. wp = tm.makePanel()
  198. self._check_roundtrip(wp, tm.assert_panel_equal)
  199. def test_wide_table(self):
  200. wp = tm.makePanel()
  201. self._check_roundtrip_table(wp, tm.assert_panel_equal)
  202. def test_wide_table_dups(self):
  203. wp = tm.makePanel()
  204. try:
  205. store = HDFStore(self.scratchpath)
  206. store._quiet = True
  207. store.put('panel', wp, table=True)
  208. store.put('panel', wp, table=True, append=True)
  209. recons = store['panel']
  210. tm.assert_panel_equal(recons, wp)
  211. finally:
  212. store.close()
  213. os.remove(self.scratchpath)
  214. def test_long(self):
  215. def _check(left, right):
  216. tm.assert_panel_equal(left.to_wide(),
  217. right.to_wide())
  218. wp = tm.makePanel()
  219. self._check_roundtrip(wp.to_long(), _check)
  220. def test_longpanel(self):
  221. pass
  222. def test_overwrite_node(self):
  223. self.store['a'] = tm.makeTimeDataFrame()
  224. ts = tm.makeTimeSeries()
  225. self.store['a'] = ts
  226. tm.assert_series_equal(self.store['a'], ts)
  227. def test_panel_select(self):
  228. wp = tm.makePanel()
  229. self.store.put('wp', wp, table=True)
  230. date = wp.major_axis[len(wp.major_axis) // 2]
  231. crit1 = {
  232. 'field' : 'index',
  233. 'op' : '>=',
  234. 'value' : date
  235. }
  236. crit2 = {
  237. 'field' : 'column',
  238. 'value' : ['A', 'D']
  239. }
  240. result = self.store.select('wp', [crit1, crit2])
  241. expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
  242. tm.assert_panel_equal(result, expected)
  243. def test_frame_select(self):
  244. df = tm.makeTimeDataFrame()
  245. self.store.put('frame', df, table=True)
  246. date = df.index[len(df) // 2]
  247. crit1 = {
  248. 'field' : 'index',
  249. 'op' : '>=',
  250. 'value' : date
  251. }
  252. crit2 = {
  253. 'field' : 'column',
  254. 'value' : ['A', 'D']
  255. }
  256. crit3 = {
  257. 'field' : 'column',
  258. 'value' : 'A'
  259. }
  260. result = self.store.select('frame', [crit1, crit2])
  261. expected = df.ix[date:, ['A', 'D']]
  262. tm.assert_frame_equal(result, expected)
  263. result = self.store.select('frame', [crit3])
  264. expected = df.ix[:, ['A']]
  265. tm.assert_frame_equal(result, expected)
  266. # can't select if not written as table
  267. self.store['frame'] = df
  268. self.assertRaises(Exception, self.store.select,
  269. 'frame', [crit1, crit2])
  270. def test_select_filter_corner(self):
  271. df = DataFrame(np.random.randn(50, 100))
  272. df.index = ['%.3d' % c for c in df.index]
  273. df.columns = ['%.3d' % c for c in df.columns]
  274. self.store.put('frame', df, table=True)
  275. crit = {
  276. 'field' : 'column',
  277. 'value' : df.columns[:75]
  278. }
  279. result = self.store.select('frame', [crit])
  280. tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
  281. def _check_roundtrip(self, obj, comparator, compression=False):
  282. options = {}
  283. if compression:
  284. options['complib'] = 'blosc'
  285. store = HDFStore(self.scratchpath, 'w', **options)
  286. try:
  287. store['obj'] = obj
  288. retrieved = store['obj']
  289. comparator(retrieved, obj)
  290. finally:
  291. store.close()
  292. os.remove(self.scratchpath)
  293. def _check_roundtrip_table(self, obj, comparator, compression=False):
  294. options = {}
  295. if compression:
  296. options['complib'] = 'blosc'
  297. store = HDFStore(self.scratchpath, 'w', **options)
  298. try:
  299. store.put('obj', obj, table=True)
  300. retrieved = store['obj']
  301. sorted_obj = _test_sort(obj)
  302. comparator(retrieved, sorted_obj)
  303. finally:
  304. store.close()
  305. os.remove(self.scratchpath)
  306. def test_legacy_read(self):
  307. pth = curpath()
  308. store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')
  309. store['a']
  310. store['b']
  311. store['c']
  312. store['d']
  313. store.close()
  314. def curpath():
  315. pth, _ = os.path.split(os.path.abspath(__file__))
  316. return pth
  317. def _test_sort(obj):
  318. if isinstance(obj, DataFrame):
  319. return obj.reindex(sorted(obj.index))
  320. elif isinstance(obj, Panel):
  321. return obj.reindex(major=sorted(obj.major_axis))
  322. else:
  323. raise ValueError('type not supported here')
  324. if __name__ == '__main__':
  325. import nose
  326. nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
  327. exit=False)