PageRenderTime 37ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/io/tests/test_pytables.py

https://github.com/benracine/pandas
Python | 472 lines | 368 code | 87 blank | 17 comment | 29 complexity | 517823e33303f39e495407ce183b64b0 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import nose
  2. import unittest
  3. import os
  4. import sys
  5. import numpy as np
  6. from pandas import (Series, DataFrame, Panel, LongPanel, DateRange,
  7. MultiIndex)
  8. from pandas.io.pytables import HDFStore
  9. import pandas.util.testing as tm
  10. try:
  11. import tables
  12. except ImportError:
  13. raise nose.SkipTest('no pytables')
  14. from distutils.version import LooseVersion
  15. _default_compressor = LooseVersion(tables.__version__) >= '2.2' \
  16. and 'blosc' or 'zlib'
  17. class TesttHDFStore(unittest.TestCase):
  18. path = '__test__.h5'
  19. scratchpath = '__scratch__.h5'
  20. def setUp(self):
  21. self.store = HDFStore(self.path)
  22. def tearDown(self):
  23. self.store.close()
  24. os.remove(self.path)
  25. def test_len(self):
  26. self.store['a'] = tm.makeTimeSeries()
  27. self.store['b'] = tm.makeStringSeries()
  28. self.store['c'] = tm.makeDataFrame()
  29. self.store['d'] = tm.makePanel()
  30. self.assertEquals(len(self.store), 4)
  31. def test_repr(self):
  32. repr(self.store)
  33. self.store['a'] = tm.makeTimeSeries()
  34. self.store['b'] = tm.makeStringSeries()
  35. self.store['c'] = tm.makeDataFrame()
  36. self.store['d'] = tm.makePanel()
  37. repr(self.store)
  38. def test_reopen_handle(self):
  39. self.store['a'] = tm.makeTimeSeries()
  40. self.store.open('w', warn=False)
  41. self.assert_(self.store.handle.isopen)
  42. self.assertEquals(len(self.store), 0)
  43. def test_flush(self):
  44. self.store['a'] = tm.makeTimeSeries()
  45. self.store.flush()
  46. def test_get(self):
  47. self.store['a'] = tm.makeTimeSeries()
  48. left = self.store.get('a')
  49. right = self.store['a']
  50. tm.assert_series_equal(left, right)
  51. self.assertRaises(AttributeError, self.store.get, 'b')
  52. def test_put(self):
  53. ts = tm.makeTimeSeries()
  54. df = tm.makeTimeDataFrame()
  55. self.store['a'] = ts
  56. self.store['b'] = df[:10]
  57. self.store.put('c', df[:10], table=True)
  58. # not OK, not a table
  59. self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True)
  60. # node does not currently exist, test _is_table_type returns False in
  61. # this case
  62. self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True)
  63. # OK
  64. self.store.put('c', df[10:], append=True)
  65. # overwrite table
  66. self.store.put('c', df[:10], table=True, append=False)
  67. tm.assert_frame_equal(df[:10], self.store['c'])
  68. def test_put_compression(self):
  69. df = tm.makeTimeDataFrame()
  70. self.store.put('c', df, table=True, compression='zlib')
  71. tm.assert_frame_equal(self.store['c'], df)
  72. # can't compress if table=False
  73. self.assertRaises(ValueError, self.store.put, 'b', df,
  74. table=False, compression='zlib')
  75. def test_put_compression_blosc(self):
  76. tm.skip_if_no_package('tables', '2.2', app='blosc support')
  77. df = tm.makeTimeDataFrame()
  78. # can't compress if table=False
  79. self.assertRaises(ValueError, self.store.put, 'b', df,
  80. table=False, compression='blosc')
  81. self.store.put('c', df, table=True, compression='blosc')
  82. tm.assert_frame_equal(self.store['c'], df)
  83. def test_put_integer(self):
  84. # non-date, non-string index
  85. df = DataFrame(np.random.randn(50, 100))
  86. self._check_roundtrip(df, tm.assert_frame_equal)
  87. def test_append(self):
  88. df = tm.makeTimeDataFrame()
  89. self.store.put('c', df[:10], table=True)
  90. self.store.append('c', df[10:])
  91. tm.assert_frame_equal(self.store['c'], df)
  92. def test_append_diff_item_order(self):
  93. wp = tm.makePanel()
  94. wp1 = wp.ix[:, :10, :]
  95. wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :]
  96. self.store.put('panel', wp1, table=True)
  97. self.assertRaises(Exception, self.store.put, 'panel', wp2,
  98. append=True)
  99. def test_remove(self):
  100. ts = tm.makeTimeSeries()
  101. df = tm.makeDataFrame()
  102. self.store['a'] = ts
  103. self.store['b'] = df
  104. self.store.remove('a')
  105. self.assertEquals(len(self.store), 1)
  106. tm.assert_frame_equal(df, self.store['b'])
  107. self.store.remove('b')
  108. self.assertEquals(len(self.store), 0)
  109. def test_remove_where_not_exist(self):
  110. crit1 = {
  111. 'field' : 'index',
  112. 'op' : '>',
  113. 'value' : 'foo'
  114. }
  115. self.store.remove('a', where=[crit1])
  116. def test_remove_crit(self):
  117. wp = tm.makePanel()
  118. self.store.put('wp', wp, table=True)
  119. date = wp.major_axis[len(wp.major_axis) // 2]
  120. crit1 = {
  121. 'field' : 'index',
  122. 'op' : '>',
  123. 'value' : date
  124. }
  125. crit2 = {
  126. 'field' : 'column',
  127. 'value' : ['A', 'D']
  128. }
  129. self.store.remove('wp', where=[crit1])
  130. self.store.remove('wp', where=[crit2])
  131. result = self.store['wp']
  132. expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
  133. tm.assert_panel_equal(result, expected)
  134. def test_series(self):
  135. s = tm.makeStringSeries()
  136. self._check_roundtrip(s, tm.assert_series_equal)
  137. ts = tm.makeTimeSeries()
  138. self._check_roundtrip(ts, tm.assert_series_equal)
  139. def test_timeseries_preepoch(self):
  140. if sys.version_info[0] == 2 and sys.version_info[1] < 7:
  141. raise nose.SkipTest
  142. dr = DateRange('1/1/1940', '1/1/1960')
  143. ts = Series(np.random.randn(len(dr)), index=dr)
  144. try:
  145. self._check_roundtrip(ts, tm.assert_series_equal)
  146. except OverflowError:
  147. raise nose.SkipTest('known failer on some windows platforms')
  148. def test_frame(self):
  149. df = tm.makeDataFrame()
  150. # put in some random NAs
  151. df.values[0, 0] = np.nan
  152. df.values[5, 3] = np.nan
  153. self._check_roundtrip_table(df, tm.assert_frame_equal)
  154. self._check_roundtrip(df, tm.assert_frame_equal)
  155. self._check_roundtrip_table(df, tm.assert_frame_equal,
  156. compression=True)
  157. self._check_roundtrip(df, tm.assert_frame_equal,
  158. compression=True)
  159. tdf = tm.makeTimeDataFrame()
  160. self._check_roundtrip(tdf, tm.assert_frame_equal)
  161. self._check_roundtrip(tdf, tm.assert_frame_equal,
  162. compression=True)
  163. # not consolidated
  164. df['foo'] = np.random.randn(len(df))
  165. self.store['df'] = df
  166. recons = self.store['df']
  167. self.assert_(recons._data.is_consolidated())
  168. # empty
  169. self.assertRaises(ValueError, self._check_roundtrip, df[:0],
  170. tm.assert_frame_equal)
  171. def test_can_serialize_dates(self):
  172. rng = [x.date() for x in DateRange('1/1/2000', '1/30/2000')]
  173. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  174. self._check_roundtrip(frame, tm.assert_frame_equal)
  175. def test_store_hierarchical(self):
  176. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  177. ['one', 'two', 'three']],
  178. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  179. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  180. names=['foo', 'bar'])
  181. frame = DataFrame(np.random.randn(10, 3), index=index,
  182. columns=['A', 'B', 'C'])
  183. self._check_roundtrip(frame, tm.assert_frame_equal)
  184. self._check_roundtrip(frame.T, tm.assert_frame_equal)
  185. self._check_roundtrip(frame['A'], tm.assert_series_equal)
  186. # check that the names are stored
  187. try:
  188. store = HDFStore(self.scratchpath)
  189. store['frame'] = frame
  190. recons = store['frame']
  191. assert(recons.index.names == ['foo', 'bar'])
  192. finally:
  193. store.close()
  194. os.remove(self.scratchpath)
  195. def test_store_index_name(self):
  196. df = tm.makeDataFrame()
  197. df.index.name = 'foo'
  198. try:
  199. store = HDFStore(self.scratchpath)
  200. store['frame'] = df
  201. recons = store['frame']
  202. assert(recons.index.name == 'foo')
  203. finally:
  204. store.close()
  205. os.remove(self.scratchpath)
  206. def test_store_series_name(self):
  207. df = tm.makeDataFrame()
  208. series = df['A']
  209. try:
  210. store = HDFStore(self.scratchpath)
  211. store['series'] = series
  212. recons = store['series']
  213. assert(recons.name == 'A')
  214. finally:
  215. store.close()
  216. os.remove(self.scratchpath)
  217. def test_store_mixed(self):
  218. def _make_one():
  219. df = tm.makeDataFrame()
  220. df['obj1'] = 'foo'
  221. df['obj2'] = 'bar'
  222. df['bool1'] = df['A'] > 0
  223. df['bool2'] = df['B'] > 0
  224. df['int1'] = 1
  225. df['int2'] = 2
  226. return df.consolidate()
  227. df1 = _make_one()
  228. df2 = _make_one()
  229. self._check_roundtrip(df1, tm.assert_frame_equal)
  230. self._check_roundtrip(df2, tm.assert_frame_equal)
  231. self.store['obj'] = df1
  232. tm.assert_frame_equal(self.store['obj'], df1)
  233. self.store['obj'] = df2
  234. tm.assert_frame_equal(self.store['obj'], df2)
  235. # storing in Table not yet supported
  236. self.assertRaises(Exception, self.store.put, 'foo',
  237. df1, table=True)
  238. # check that can store Series of all of these types
  239. self._check_roundtrip(df1['obj1'], tm.assert_series_equal)
  240. self._check_roundtrip(df1['bool1'], tm.assert_series_equal)
  241. self._check_roundtrip(df1['int1'], tm.assert_series_equal)
  242. # try with compression
  243. self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
  244. compression=True)
  245. self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
  246. compression=True)
  247. self._check_roundtrip(df1['int1'], tm.assert_series_equal,
  248. compression=True)
  249. self._check_roundtrip(df1, tm.assert_frame_equal,
  250. compression=True)
  251. def test_wide(self):
  252. wp = tm.makePanel()
  253. self._check_roundtrip(wp, tm.assert_panel_equal)
  254. def test_wide_table(self):
  255. wp = tm.makePanel()
  256. self._check_roundtrip_table(wp, tm.assert_panel_equal)
  257. def test_wide_table_dups(self):
  258. wp = tm.makePanel()
  259. try:
  260. store = HDFStore(self.scratchpath)
  261. store._quiet = True
  262. store.put('panel', wp, table=True)
  263. store.put('panel', wp, table=True, append=True)
  264. recons = store['panel']
  265. tm.assert_panel_equal(recons, wp)
  266. finally:
  267. store.close()
  268. os.remove(self.scratchpath)
  269. def test_long(self):
  270. def _check(left, right):
  271. tm.assert_panel_equal(left.to_wide(),
  272. right.to_wide())
  273. wp = tm.makePanel()
  274. self._check_roundtrip(wp.to_long(), _check)
  275. # empty
  276. self.assertRaises(ValueError, self._check_roundtrip, wp.to_long()[:0],
  277. _check)
  278. def test_longpanel(self):
  279. pass
  280. def test_overwrite_node(self):
  281. self.store['a'] = tm.makeTimeDataFrame()
  282. ts = tm.makeTimeSeries()
  283. self.store['a'] = ts
  284. tm.assert_series_equal(self.store['a'], ts)
  285. def test_panel_select(self):
  286. wp = tm.makePanel()
  287. self.store.put('wp', wp, table=True)
  288. date = wp.major_axis[len(wp.major_axis) // 2]
  289. crit1 = {
  290. 'field' : 'index',
  291. 'op' : '>=',
  292. 'value' : date
  293. }
  294. crit2 = {
  295. 'field' : 'column',
  296. 'value' : ['A', 'D']
  297. }
  298. result = self.store.select('wp', [crit1, crit2])
  299. expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
  300. tm.assert_panel_equal(result, expected)
  301. def test_frame_select(self):
  302. df = tm.makeTimeDataFrame()
  303. self.store.put('frame', df, table=True)
  304. date = df.index[len(df) // 2]
  305. crit1 = {
  306. 'field' : 'index',
  307. 'op' : '>=',
  308. 'value' : date
  309. }
  310. crit2 = {
  311. 'field' : 'column',
  312. 'value' : ['A', 'D']
  313. }
  314. crit3 = {
  315. 'field' : 'column',
  316. 'value' : 'A'
  317. }
  318. result = self.store.select('frame', [crit1, crit2])
  319. expected = df.ix[date:, ['A', 'D']]
  320. tm.assert_frame_equal(result, expected)
  321. result = self.store.select('frame', [crit3])
  322. expected = df.ix[:, ['A']]
  323. tm.assert_frame_equal(result, expected)
  324. # can't select if not written as table
  325. self.store['frame'] = df
  326. self.assertRaises(Exception, self.store.select,
  327. 'frame', [crit1, crit2])
  328. def test_select_filter_corner(self):
  329. df = DataFrame(np.random.randn(50, 100))
  330. df.index = ['%.3d' % c for c in df.index]
  331. df.columns = ['%.3d' % c for c in df.columns]
  332. self.store.put('frame', df, table=True)
  333. crit = {
  334. 'field' : 'column',
  335. 'value' : df.columns[:75]
  336. }
  337. result = self.store.select('frame', [crit])
  338. tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
  339. def _check_roundtrip(self, obj, comparator, compression=False):
  340. options = {}
  341. if compression:
  342. options['complib'] = _default_compressor
  343. store = HDFStore(self.scratchpath, 'w', **options)
  344. try:
  345. store['obj'] = obj
  346. retrieved = store['obj']
  347. comparator(retrieved, obj)
  348. finally:
  349. store.close()
  350. os.remove(self.scratchpath)
  351. def _check_roundtrip_table(self, obj, comparator, compression=False):
  352. options = {}
  353. if compression:
  354. options['complib'] = _default_compressor
  355. store = HDFStore(self.scratchpath, 'w', **options)
  356. try:
  357. store.put('obj', obj, table=True)
  358. retrieved = store['obj']
  359. sorted_obj = _test_sort(obj)
  360. comparator(retrieved, sorted_obj)
  361. finally:
  362. store.close()
  363. os.remove(self.scratchpath)
  364. def test_legacy_read(self):
  365. pth = curpath()
  366. store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')
  367. store['a']
  368. store['b']
  369. store['c']
  370. store['d']
  371. store.close()
  372. def curpath():
  373. pth, _ = os.path.split(os.path.abspath(__file__))
  374. return pth
  375. def _test_sort(obj):
  376. if isinstance(obj, DataFrame):
  377. return obj.reindex(sorted(obj.index))
  378. elif isinstance(obj, Panel):
  379. return obj.reindex(major=sorted(obj.major_axis))
  380. else:
  381. raise ValueError('type not supported here')
  382. if __name__ == '__main__':
  383. import nose
  384. nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
  385. exit=False)