PageRenderTime 90ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/io/tests/test_pytables.py

http://github.com/pydata/pandas
Python | 4364 lines | 4124 code | 177 blank | 63 comment | 86 complexity | 59b462ea4b46ff347b15ad7d8b8f3338 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import nose
  2. import sys
  3. import os
  4. import warnings
  5. import tempfile
  6. from contextlib import contextmanager
  7. import datetime
  8. import numpy as np
  9. import pandas
  10. from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
  11. date_range, Index, DatetimeIndex, isnull)
  12. from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf,
  13. IncompatibilityWarning, PerformanceWarning,
  14. AttributeConflictWarning, DuplicateWarning,
  15. PossibleDataLossError, ClosedFileError)
  16. from pandas.io import pytables as pytables
  17. import pandas.util.testing as tm
  18. from pandas.util.testing import (assert_panel4d_equal,
  19. assert_panel_equal,
  20. assert_frame_equal,
  21. assert_series_equal)
  22. from pandas import concat, Timestamp
  23. from pandas import compat, _np_version_under1p7
  24. from pandas.compat import range, lrange, u
  25. from pandas.util.testing import assert_produces_warning
  26. try:
  27. import tables
  28. except ImportError:
  29. raise nose.SkipTest('no pytables')
  30. from distutils.version import LooseVersion
  31. _default_compressor = LooseVersion(tables.__version__) >= '2.2' \
  32. and 'blosc' or 'zlib'
  33. _multiprocess_can_split_ = False
  34. # contextmanager to ensure the file cleanup
  35. def safe_remove(path):
  36. if path is not None:
  37. try:
  38. os.remove(path)
  39. except:
  40. pass
  41. def safe_close(store):
  42. try:
  43. if store is not None:
  44. store.close()
  45. except:
  46. pass
  47. def create_tempfile(path):
  48. """ create an unopened named temporary file """
  49. return os.path.join(tempfile.gettempdir(),path)
  50. @contextmanager
  51. def ensure_clean_store(path, mode='a', complevel=None, complib=None,
  52. fletcher32=False):
  53. try:
  54. # put in the temporary path if we don't have one already
  55. if not len(os.path.dirname(path)):
  56. path = create_tempfile(path)
  57. store = HDFStore(path, mode=mode, complevel=complevel,
  58. complib=complib, fletcher32=False)
  59. yield store
  60. finally:
  61. safe_close(store)
  62. if mode == 'w' or mode == 'a':
  63. safe_remove(path)
  64. @contextmanager
  65. def ensure_clean_path(path):
  66. """
  67. return essentially a named temporary file that is not opened
  68. and deleted on existing; if path is a list, then create and
  69. return list of filenames
  70. """
  71. try:
  72. if isinstance(path, list):
  73. filenames = [ create_tempfile(p) for p in path ]
  74. yield filenames
  75. else:
  76. filenames = [ create_tempfile(path) ]
  77. yield filenames[0]
  78. finally:
  79. for f in filenames:
  80. safe_remove(f)
  81. # set these parameters so we don't have file sharing
  82. tables.parameters.MAX_NUMEXPR_THREADS = 1
  83. tables.parameters.MAX_BLOSC_THREADS = 1
  84. tables.parameters.MAX_THREADS = 1
  85. def _maybe_remove(store, key):
  86. """For tests using tables, try removing the table to be sure there is
  87. no content from previous tests using the same table name."""
  88. try:
  89. store.remove(key)
  90. except:
  91. pass
  92. def compat_assert_produces_warning(w,f):
  93. """ don't produce a warning under PY3 """
  94. if compat.PY3:
  95. f()
  96. else:
  97. with tm.assert_produces_warning(expected_warning=w):
  98. f()
  99. class TestHDFStore(tm.TestCase):
  100. @classmethod
  101. def setUpClass(cls):
  102. super(TestHDFStore, cls).setUpClass()
  103. # Pytables 3.0.0 deprecates lots of things
  104. tm.reset_testing_mode()
  105. @classmethod
  106. def tearDownClass(cls):
  107. super(TestHDFStore, cls).tearDownClass()
  108. # Pytables 3.0.0 deprecates lots of things
  109. tm.set_testing_mode()
  110. def setUp(self):
  111. warnings.filterwarnings(action='ignore', category=FutureWarning)
  112. self.path = 'tmp.__%s__.h5' % tm.rands(10)
  113. def tearDown(self):
  114. pass
  115. def test_factory_fun(self):
  116. try:
  117. with get_store(self.path) as tbl:
  118. raise ValueError('blah')
  119. except ValueError:
  120. pass
  121. finally:
  122. safe_remove(self.path)
  123. try:
  124. with get_store(self.path) as tbl:
  125. tbl['a'] = tm.makeDataFrame()
  126. with get_store(self.path) as tbl:
  127. self.assertEqual(len(tbl), 1)
  128. self.assertEqual(type(tbl['a']), DataFrame)
  129. finally:
  130. safe_remove(self.path)
  131. def test_conv_read_write(self):
  132. try:
  133. def roundtrip(key, obj,**kwargs):
  134. obj.to_hdf(self.path, key,**kwargs)
  135. return read_hdf(self.path, key)
  136. o = tm.makeTimeSeries()
  137. assert_series_equal(o, roundtrip('series',o))
  138. o = tm.makeStringSeries()
  139. assert_series_equal(o, roundtrip('string_series',o))
  140. o = tm.makeDataFrame()
  141. assert_frame_equal(o, roundtrip('frame',o))
  142. o = tm.makePanel()
  143. assert_panel_equal(o, roundtrip('panel',o))
  144. # table
  145. df = DataFrame(dict(A=lrange(5), B=lrange(5)))
  146. df.to_hdf(self.path,'table',append=True)
  147. result = read_hdf(self.path, 'table', where = ['index>2'])
  148. assert_frame_equal(df[df.index>2],result)
  149. finally:
  150. safe_remove(self.path)
  151. def test_long_strings(self):
  152. # GH6166
  153. # unconversion of long strings was being chopped in earlier
  154. # versions of numpy < 1.7.2
  155. df = DataFrame({'a': [tm.rands(100) for _ in range(10)]},
  156. index=[tm.rands(100) for _ in range(10)])
  157. with ensure_clean_store(self.path) as store:
  158. store.append('df', df, data_columns=['a'])
  159. result = store.select('df')
  160. assert_frame_equal(df, result)
  161. def test_api(self):
  162. # GH4584
  163. # API issue when to_hdf doesn't acdept append AND format args
  164. with ensure_clean_path(self.path) as path:
  165. df = tm.makeDataFrame()
  166. df.iloc[:10].to_hdf(path,'df',append=True,format='table')
  167. df.iloc[10:].to_hdf(path,'df',append=True,format='table')
  168. assert_frame_equal(read_hdf(path,'df'),df)
  169. # append to False
  170. df.iloc[:10].to_hdf(path,'df',append=False,format='table')
  171. df.iloc[10:].to_hdf(path,'df',append=True,format='table')
  172. assert_frame_equal(read_hdf(path,'df'),df)
  173. with ensure_clean_path(self.path) as path:
  174. df = tm.makeDataFrame()
  175. df.iloc[:10].to_hdf(path,'df',append=True)
  176. df.iloc[10:].to_hdf(path,'df',append=True,format='table')
  177. assert_frame_equal(read_hdf(path,'df'),df)
  178. # append to False
  179. df.iloc[:10].to_hdf(path,'df',append=False,format='table')
  180. df.iloc[10:].to_hdf(path,'df',append=True)
  181. assert_frame_equal(read_hdf(path,'df'),df)
  182. with ensure_clean_path(self.path) as path:
  183. df = tm.makeDataFrame()
  184. df.to_hdf(path,'df',append=False,format='fixed')
  185. assert_frame_equal(read_hdf(path,'df'),df)
  186. df.to_hdf(path,'df',append=False,format='f')
  187. assert_frame_equal(read_hdf(path,'df'),df)
  188. df.to_hdf(path,'df',append=False)
  189. assert_frame_equal(read_hdf(path,'df'),df)
  190. df.to_hdf(path,'df')
  191. assert_frame_equal(read_hdf(path,'df'),df)
  192. with ensure_clean_store(self.path) as store:
  193. path = store._path
  194. df = tm.makeDataFrame()
  195. _maybe_remove(store,'df')
  196. store.append('df',df.iloc[:10],append=True,format='table')
  197. store.append('df',df.iloc[10:],append=True,format='table')
  198. assert_frame_equal(store.select('df'),df)
  199. # append to False
  200. _maybe_remove(store,'df')
  201. store.append('df',df.iloc[:10],append=False,format='table')
  202. store.append('df',df.iloc[10:],append=True,format='table')
  203. assert_frame_equal(store.select('df'),df)
  204. # formats
  205. _maybe_remove(store,'df')
  206. store.append('df',df.iloc[:10],append=False,format='table')
  207. store.append('df',df.iloc[10:],append=True,format='table')
  208. assert_frame_equal(store.select('df'),df)
  209. _maybe_remove(store,'df')
  210. store.append('df',df.iloc[:10],append=False,format='table')
  211. store.append('df',df.iloc[10:],append=True,format=None)
  212. assert_frame_equal(store.select('df'),df)
  213. with ensure_clean_path(self.path) as path:
  214. # invalid
  215. df = tm.makeDataFrame()
  216. self.assertRaises(ValueError, df.to_hdf, path,'df',append=True,format='f')
  217. self.assertRaises(ValueError, df.to_hdf, path,'df',append=True,format='fixed')
  218. self.assertRaises(TypeError, df.to_hdf, path,'df',append=True,format='foo')
  219. self.assertRaises(TypeError, df.to_hdf, path,'df',append=False,format='bar')
  220. def test_api_default_format(self):
  221. # default_format option
  222. with ensure_clean_store(self.path) as store:
  223. df = tm.makeDataFrame()
  224. pandas.set_option('io.hdf.default_format','fixed')
  225. _maybe_remove(store,'df')
  226. store.put('df',df)
  227. self.assertFalse(store.get_storer('df').is_table)
  228. self.assertRaises(ValueError, store.append, 'df2',df)
  229. pandas.set_option('io.hdf.default_format','table')
  230. _maybe_remove(store,'df')
  231. store.put('df',df)
  232. self.assertTrue(store.get_storer('df').is_table)
  233. _maybe_remove(store,'df2')
  234. store.append('df2',df)
  235. self.assertTrue(store.get_storer('df').is_table)
  236. pandas.set_option('io.hdf.default_format',None)
  237. with ensure_clean_path(self.path) as path:
  238. df = tm.makeDataFrame()
  239. pandas.set_option('io.hdf.default_format','fixed')
  240. df.to_hdf(path,'df')
  241. with get_store(path) as store:
  242. self.assertFalse(store.get_storer('df').is_table)
  243. self.assertRaises(ValueError, df.to_hdf, path,'df2', append=True)
  244. pandas.set_option('io.hdf.default_format','table')
  245. df.to_hdf(path,'df3')
  246. with get_store(path) as store:
  247. self.assertTrue(store.get_storer('df3').is_table)
  248. df.to_hdf(path,'df4',append=True)
  249. with get_store(path) as store:
  250. self.assertTrue(store.get_storer('df4').is_table)
  251. pandas.set_option('io.hdf.default_format',None)
  252. def test_keys(self):
  253. with ensure_clean_store(self.path) as store:
  254. store['a'] = tm.makeTimeSeries()
  255. store['b'] = tm.makeStringSeries()
  256. store['c'] = tm.makeDataFrame()
  257. store['d'] = tm.makePanel()
  258. store['foo/bar'] = tm.makePanel()
  259. self.assertEqual(len(store), 5)
  260. self.assertTrue(set(
  261. store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar']))
  262. def test_repr(self):
  263. with ensure_clean_store(self.path) as store:
  264. repr(store)
  265. store['a'] = tm.makeTimeSeries()
  266. store['b'] = tm.makeStringSeries()
  267. store['c'] = tm.makeDataFrame()
  268. store['d'] = tm.makePanel()
  269. store['foo/bar'] = tm.makePanel()
  270. store.append('e', tm.makePanel())
  271. df = tm.makeDataFrame()
  272. df['obj1'] = 'foo'
  273. df['obj2'] = 'bar'
  274. df['bool1'] = df['A'] > 0
  275. df['bool2'] = df['B'] > 0
  276. df['bool3'] = True
  277. df['int1'] = 1
  278. df['int2'] = 2
  279. df['timestamp1'] = Timestamp('20010102')
  280. df['timestamp2'] = Timestamp('20010103')
  281. df['datetime1'] = datetime.datetime(2001,1,2,0,0)
  282. df['datetime2'] = datetime.datetime(2001,1,3,0,0)
  283. df.ix[3:6,['obj1']] = np.nan
  284. df = df.consolidate().convert_objects()
  285. warnings.filterwarnings('ignore', category=PerformanceWarning)
  286. store['df'] = df
  287. warnings.filterwarnings('always', category=PerformanceWarning)
  288. # make a random group in hdf space
  289. store._handle.createGroup(store._handle.root,'bah')
  290. repr(store)
  291. str(store)
  292. # storers
  293. with ensure_clean_store(self.path) as store:
  294. df = tm.makeDataFrame()
  295. store.append('df',df)
  296. s = store.get_storer('df')
  297. repr(s)
  298. str(s)
  299. def test_contains(self):
  300. with ensure_clean_store(self.path) as store:
  301. store['a'] = tm.makeTimeSeries()
  302. store['b'] = tm.makeDataFrame()
  303. store['foo/bar'] = tm.makeDataFrame()
  304. self.assertIn('a', store)
  305. self.assertIn('b', store)
  306. self.assertNotIn('c', store)
  307. self.assertIn('foo/bar', store)
  308. self.assertIn('/foo/bar', store)
  309. self.assertNotIn('/foo/b', store)
  310. self.assertNotIn('bar', store)
  311. # GH 2694
  312. warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
  313. store['node())'] = tm.makeDataFrame()
  314. self.assertIn('node())', store)
  315. def test_versioning(self):
  316. with ensure_clean_store(self.path) as store:
  317. store['a'] = tm.makeTimeSeries()
  318. store['b'] = tm.makeDataFrame()
  319. df = tm.makeTimeDataFrame()
  320. _maybe_remove(store, 'df1')
  321. store.append('df1', df[:10])
  322. store.append('df1', df[10:])
  323. self.assertEqual(store.root.a._v_attrs.pandas_version, '0.10.1')
  324. self.assertEqual(store.root.b._v_attrs.pandas_version, '0.10.1')
  325. self.assertEqual(store.root.df1._v_attrs.pandas_version, '0.10.1')
  326. # write a file and wipe its versioning
  327. _maybe_remove(store, 'df2')
  328. store.append('df2', df)
  329. # this is an error because its table_type is appendable, but no version
  330. # info
  331. store.get_node('df2')._v_attrs.pandas_version = None
  332. self.assertRaises(Exception, store.select, 'df2')
  333. def test_mode(self):
  334. df = tm.makeTimeDataFrame()
  335. def check(mode):
  336. with ensure_clean_path(self.path) as path:
  337. # constructor
  338. if mode in ['r','r+']:
  339. self.assertRaises(IOError, HDFStore, path, mode=mode)
  340. else:
  341. store = HDFStore(path,mode=mode)
  342. self.assertEqual(store._handle.mode, mode)
  343. store.close()
  344. with ensure_clean_path(self.path) as path:
  345. # context
  346. if mode in ['r','r+']:
  347. def f():
  348. with get_store(path,mode=mode) as store:
  349. pass
  350. self.assertRaises(IOError, f)
  351. else:
  352. with get_store(path,mode=mode) as store:
  353. self.assertEqual(store._handle.mode, mode)
  354. with ensure_clean_path(self.path) as path:
  355. # conv write
  356. if mode in ['r','r+']:
  357. self.assertRaises(IOError, df.to_hdf, path, 'df', mode=mode)
  358. df.to_hdf(path,'df',mode='w')
  359. else:
  360. df.to_hdf(path,'df',mode=mode)
  361. # conv read
  362. if mode in ['w']:
  363. self.assertRaises(KeyError, read_hdf, path, 'df', mode=mode)
  364. else:
  365. result = read_hdf(path,'df',mode=mode)
  366. assert_frame_equal(result,df)
  367. check('r')
  368. check('r+')
  369. check('a')
  370. check('w')
  371. def test_reopen_handle(self):
  372. with ensure_clean_path(self.path) as path:
  373. store = HDFStore(path,mode='a')
  374. store['a'] = tm.makeTimeSeries()
  375. # invalid mode change
  376. self.assertRaises(PossibleDataLossError, store.open, 'w')
  377. store.close()
  378. self.assertFalse(store.is_open)
  379. # truncation ok here
  380. store.open('w')
  381. self.assertTrue(store.is_open)
  382. self.assertEqual(len(store), 0)
  383. store.close()
  384. self.assertFalse(store.is_open)
  385. store = HDFStore(path,mode='a')
  386. store['a'] = tm.makeTimeSeries()
  387. # reopen as read
  388. store.open('r')
  389. self.assertTrue(store.is_open)
  390. self.assertEqual(len(store), 1)
  391. self.assertEqual(store._mode, 'r')
  392. store.close()
  393. self.assertFalse(store.is_open)
  394. # reopen as append
  395. store.open('a')
  396. self.assertTrue(store.is_open)
  397. self.assertEqual(len(store), 1)
  398. self.assertEqual(store._mode, 'a')
  399. store.close()
  400. self.assertFalse(store.is_open)
  401. # reopen as append (again)
  402. store.open('a')
  403. self.assertTrue(store.is_open)
  404. self.assertEqual(len(store), 1)
  405. self.assertEqual(store._mode, 'a')
  406. store.close()
  407. self.assertFalse(store.is_open)
  408. def test_open_args(self):
  409. with ensure_clean_path(self.path) as path:
  410. df = tm.makeDataFrame()
  411. # create an in memory store
  412. store = HDFStore(path,mode='a',driver='H5FD_CORE',driver_core_backing_store=0)
  413. store['df'] = df
  414. store.append('df2',df)
  415. tm.assert_frame_equal(store['df'],df)
  416. tm.assert_frame_equal(store['df2'],df)
  417. store.close()
  418. # only supported on pytable >= 3.0.0
  419. if LooseVersion(tables.__version__) >= '3.0.0':
  420. # the file should not have actually been written
  421. self.assertFalse(os.path.exists(path))
  422. def test_flush(self):
  423. with ensure_clean_store(self.path) as store:
  424. store['a'] = tm.makeTimeSeries()
  425. store.flush()
  426. store.flush(fsync=True)
  427. def test_get(self):
  428. with ensure_clean_store(self.path) as store:
  429. store['a'] = tm.makeTimeSeries()
  430. left = store.get('a')
  431. right = store['a']
  432. tm.assert_series_equal(left, right)
  433. left = store.get('/a')
  434. right = store['/a']
  435. tm.assert_series_equal(left, right)
  436. self.assertRaises(KeyError, store.get, 'b')
  437. def test_getattr(self):
  438. with ensure_clean_store(self.path) as store:
  439. s = tm.makeTimeSeries()
  440. store['a'] = s
  441. # test attribute access
  442. result = store.a
  443. tm.assert_series_equal(result, s)
  444. result = getattr(store,'a')
  445. tm.assert_series_equal(result, s)
  446. df = tm.makeTimeDataFrame()
  447. store['df'] = df
  448. result = store.df
  449. tm.assert_frame_equal(result, df)
  450. # errors
  451. self.assertRaises(AttributeError, getattr, store, 'd')
  452. for x in ['mode','path','handle','complib']:
  453. self.assertRaises(AttributeError, getattr, store, x)
  454. # not stores
  455. for x in ['mode','path','handle','complib']:
  456. getattr(store,"_%s" % x)
  457. def test_put(self):
  458. with ensure_clean_store(self.path) as store:
  459. ts = tm.makeTimeSeries()
  460. df = tm.makeTimeDataFrame()
  461. store['a'] = ts
  462. store['b'] = df[:10]
  463. store['foo/bar/bah'] = df[:10]
  464. store['foo'] = df[:10]
  465. store['/foo'] = df[:10]
  466. store.put('c', df[:10], format='table')
  467. # not OK, not a table
  468. self.assertRaises(
  469. ValueError, store.put, 'b', df[10:], append=True)
  470. # node does not currently exist, test _is_table_type returns False in
  471. # this case
  472. # _maybe_remove(store, 'f')
  473. # self.assertRaises(ValueError, store.put, 'f', df[10:], append=True)
  474. # can't put to a table (use append instead)
  475. self.assertRaises(ValueError, store.put, 'c', df[10:], append=True)
  476. # overwrite table
  477. store.put('c', df[:10], format='table', append=False)
  478. tm.assert_frame_equal(df[:10], store['c'])
  479. def test_put_string_index(self):
  480. with ensure_clean_store(self.path) as store:
  481. index = Index(
  482. ["I am a very long string index: %s" % i for i in range(20)])
  483. s = Series(np.arange(20), index=index)
  484. df = DataFrame({'A': s, 'B': s})
  485. store['a'] = s
  486. tm.assert_series_equal(store['a'], s)
  487. store['b'] = df
  488. tm.assert_frame_equal(store['b'], df)
  489. # mixed length
  490. index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)])
  491. s = Series(np.arange(21), index=index)
  492. df = DataFrame({'A': s, 'B': s})
  493. store['a'] = s
  494. tm.assert_series_equal(store['a'], s)
  495. store['b'] = df
  496. tm.assert_frame_equal(store['b'], df)
  497. def test_put_compression(self):
  498. with ensure_clean_store(self.path) as store:
  499. df = tm.makeTimeDataFrame()
  500. store.put('c', df, format='table', complib='zlib')
  501. tm.assert_frame_equal(store['c'], df)
  502. # can't compress if format='fixed'
  503. self.assertRaises(ValueError, store.put, 'b', df,
  504. format='fixed', complib='zlib')
  505. def test_put_compression_blosc(self):
  506. tm.skip_if_no_package('tables', '2.2', app='blosc support')
  507. df = tm.makeTimeDataFrame()
  508. with ensure_clean_store(self.path) as store:
  509. # can't compress if format='fixed'
  510. self.assertRaises(ValueError, store.put, 'b', df,
  511. format='fixed', complib='blosc')
  512. store.put('c', df, format='table', complib='blosc')
  513. tm.assert_frame_equal(store['c'], df)
  514. def test_put_integer(self):
  515. # non-date, non-string index
  516. df = DataFrame(np.random.randn(50, 100))
  517. self._check_roundtrip(df, tm.assert_frame_equal)
  518. def test_put_mixed_type(self):
  519. df = tm.makeTimeDataFrame()
  520. df['obj1'] = 'foo'
  521. df['obj2'] = 'bar'
  522. df['bool1'] = df['A'] > 0
  523. df['bool2'] = df['B'] > 0
  524. df['bool3'] = True
  525. df['int1'] = 1
  526. df['int2'] = 2
  527. df['timestamp1'] = Timestamp('20010102')
  528. df['timestamp2'] = Timestamp('20010103')
  529. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  530. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  531. df.ix[3:6, ['obj1']] = np.nan
  532. df = df.consolidate().convert_objects()
  533. with ensure_clean_store(self.path) as store:
  534. _maybe_remove(store, 'df')
  535. # cannot use assert_produces_warning here for some reason
  536. # a PendingDeprecationWarning is also raised?
  537. warnings.filterwarnings('ignore', category=PerformanceWarning)
  538. store.put('df',df)
  539. warnings.filterwarnings('always', category=PerformanceWarning)
  540. expected = store.get('df')
  541. tm.assert_frame_equal(expected,df)
  542. def test_append(self):
  543. with ensure_clean_store(self.path) as store:
  544. df = tm.makeTimeDataFrame()
  545. _maybe_remove(store, 'df1')
  546. store.append('df1', df[:10])
  547. store.append('df1', df[10:])
  548. tm.assert_frame_equal(store['df1'], df)
  549. _maybe_remove(store, 'df2')
  550. store.put('df2', df[:10], format='table')
  551. store.append('df2', df[10:])
  552. tm.assert_frame_equal(store['df2'], df)
  553. _maybe_remove(store, 'df3')
  554. store.append('/df3', df[:10])
  555. store.append('/df3', df[10:])
  556. tm.assert_frame_equal(store['df3'], df)
  557. # this is allowed by almost always don't want to do it
  558. with tm.assert_produces_warning(expected_warning=tables.NaturalNameWarning):
  559. _maybe_remove(store, '/df3 foo')
  560. store.append('/df3 foo', df[:10])
  561. store.append('/df3 foo', df[10:])
  562. tm.assert_frame_equal(store['df3 foo'], df)
  563. # panel
  564. wp = tm.makePanel()
  565. _maybe_remove(store, 'wp1')
  566. store.append('wp1', wp.ix[:, :10, :])
  567. store.append('wp1', wp.ix[:, 10:, :])
  568. assert_panel_equal(store['wp1'], wp)
  569. # ndim
  570. p4d = tm.makePanel4D()
  571. _maybe_remove(store, 'p4d')
  572. store.append('p4d', p4d.ix[:, :, :10, :])
  573. store.append('p4d', p4d.ix[:, :, 10:, :])
  574. assert_panel4d_equal(store['p4d'], p4d)
  575. # test using axis labels
  576. _maybe_remove(store, 'p4d')
  577. store.append('p4d', p4d.ix[:, :, :10, :], axes=[
  578. 'items', 'major_axis', 'minor_axis'])
  579. store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
  580. 'items', 'major_axis', 'minor_axis'])
  581. assert_panel4d_equal(store['p4d'], p4d)
  582. # test using differnt number of items on each axis
  583. p4d2 = p4d.copy()
  584. p4d2['l4'] = p4d['l1']
  585. p4d2['l5'] = p4d['l1']
  586. _maybe_remove(store, 'p4d2')
  587. store.append(
  588. 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis'])
  589. assert_panel4d_equal(store['p4d2'], p4d2)
  590. # test using differt order of items on the non-index axes
  591. _maybe_remove(store, 'wp1')
  592. wp_append1 = wp.ix[:, :10, :]
  593. store.append('wp1', wp_append1)
  594. wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1])
  595. store.append('wp1', wp_append2)
  596. assert_panel_equal(store['wp1'], wp)
  597. # dtype issues - mizxed type in a single object column
  598. df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
  599. df['mixed_column'] = 'testing'
  600. df.ix[2, 'mixed_column'] = np.nan
  601. _maybe_remove(store, 'df')
  602. store.append('df', df)
  603. tm.assert_frame_equal(store['df'], df)
  604. # uints - test storage of uints
  605. uint_data = DataFrame({'u08' : Series(np.random.random_integers(0, high=255, size=5), dtype=np.uint8),
  606. 'u16' : Series(np.random.random_integers(0, high=65535, size=5), dtype=np.uint16),
  607. 'u32' : Series(np.random.random_integers(0, high=2**30, size=5), dtype=np.uint32),
  608. 'u64' : Series([2**58, 2**59, 2**60, 2**61, 2**62], dtype=np.uint64)},
  609. index=np.arange(5))
  610. _maybe_remove(store, 'uints')
  611. store.append('uints', uint_data)
  612. tm.assert_frame_equal(store['uints'], uint_data)
  613. # uints - test storage of uints in indexable columns
  614. _maybe_remove(store, 'uints')
  615. store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported
  616. tm.assert_frame_equal(store['uints'], uint_data)
  617. def test_append_series(self):
  618. with ensure_clean_store(self.path) as store:
  619. # basic
  620. ss = tm.makeStringSeries()
  621. ts = tm.makeTimeSeries()
  622. ns = Series(np.arange(100))
  623. store.append('ss', ss)
  624. result = store['ss']
  625. tm.assert_series_equal(result, ss)
  626. self.assertIsNone(result.name)
  627. store.append('ts', ts)
  628. result = store['ts']
  629. tm.assert_series_equal(result, ts)
  630. self.assertIsNone(result.name)
  631. ns.name = 'foo'
  632. store.append('ns', ns)
  633. result = store['ns']
  634. tm.assert_series_equal(result, ns)
  635. self.assertEqual(result.name, ns.name)
  636. # select on the values
  637. expected = ns[ns>60]
  638. result = store.select('ns',Term('foo>60'))
  639. tm.assert_series_equal(result,expected)
  640. # select on the index and values
  641. expected = ns[(ns>70) & (ns.index<90)]
  642. result = store.select('ns',[Term('foo>70'), Term('index<90')])
  643. tm.assert_series_equal(result,expected)
  644. # multi-index
  645. mi = DataFrame(np.random.randn(5,1),columns=['A'])
  646. mi['B'] = np.arange(len(mi))
  647. mi['C'] = 'foo'
  648. mi.loc[3:5,'C'] = 'bar'
  649. mi.set_index(['C','B'],inplace=True)
  650. s = mi.stack()
  651. s.index = s.index.droplevel(2)
  652. store.append('mi', s)
  653. tm.assert_series_equal(store['mi'], s)
  654. def test_store_index_types(self):
  655. # GH5386
  656. # test storing various index types
  657. with ensure_clean_store(self.path) as store:
  658. def check(format,index):
  659. df = DataFrame(np.random.randn(10,2),columns=list('AB'))
  660. df.index = index(len(df))
  661. _maybe_remove(store, 'df')
  662. store.put('df',df,format=format)
  663. assert_frame_equal(df,store['df'])
  664. for index in [ tm.makeFloatIndex, tm.makeStringIndex, tm.makeIntIndex,
  665. tm.makeDateIndex, tm.makePeriodIndex ]:
  666. check('table',index)
  667. check('fixed',index)
  668. # unicode
  669. index = tm.makeUnicodeIndex
  670. if compat.PY3:
  671. check('table',index)
  672. check('fixed',index)
  673. else:
  674. # only support for fixed types (and they have a perf warning)
  675. self.assertRaises(TypeError, check, 'table', index)
  676. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  677. check('fixed',index)
  678. def test_encoding(self):
  679. if LooseVersion(tables.__version__) < '3.0.0':
  680. raise nose.SkipTest('tables version does not support proper encoding')
  681. if sys.byteorder != 'little':
  682. raise nose.SkipTest('system byteorder is not little')
  683. with ensure_clean_store(self.path) as store:
  684. df = DataFrame(dict(A='foo',B='bar'),index=range(5))
  685. df.loc[2,'A'] = np.nan
  686. df.loc[3,'B'] = np.nan
  687. _maybe_remove(store, 'df')
  688. store.append('df', df, encoding='ascii')
  689. tm.assert_frame_equal(store['df'], df)
  690. expected = df.reindex(columns=['A'])
  691. result = store.select('df',Term('columns=A',encoding='ascii'))
  692. tm.assert_frame_equal(result,expected)
  693. def test_append_some_nans(self):
  694. with ensure_clean_store(self.path) as store:
  695. df = DataFrame({'A' : Series(np.random.randn(20)).astype('int32'),
  696. 'A1' : np.random.randn(20),
  697. 'A2' : np.random.randn(20),
  698. 'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime.datetime(2001,1,2,0,0) },
  699. index=np.arange(20))
  700. # some nans
  701. _maybe_remove(store, 'df1')
  702. df.ix[0:15,['A1','B','D','E']] = np.nan
  703. store.append('df1', df[:10])
  704. store.append('df1', df[10:])
  705. tm.assert_frame_equal(store['df1'], df)
  706. # first column
  707. df1 = df.copy()
  708. df1.ix[:,'A1'] = np.nan
  709. _maybe_remove(store, 'df1')
  710. store.append('df1', df1[:10])
  711. store.append('df1', df1[10:])
  712. tm.assert_frame_equal(store['df1'], df1)
  713. # 2nd column
  714. df2 = df.copy()
  715. df2.ix[:,'A2'] = np.nan
  716. _maybe_remove(store, 'df2')
  717. store.append('df2', df2[:10])
  718. store.append('df2', df2[10:])
  719. tm.assert_frame_equal(store['df2'], df2)
  720. # datetimes
  721. df3 = df.copy()
  722. df3.ix[:,'E'] = np.nan
  723. _maybe_remove(store, 'df3')
  724. store.append('df3', df3[:10])
  725. store.append('df3', df3[10:])
  726. tm.assert_frame_equal(store['df3'], df3)
  727. def test_append_all_nans(self):
  728. with ensure_clean_store(self.path) as store:
  729. df = DataFrame({'A1' : np.random.randn(20),
  730. 'A2' : np.random.randn(20)},
  731. index=np.arange(20))
  732. df.ix[0:15,:] = np.nan
  733. # nan some entire rows (dropna=True)
  734. _maybe_remove(store, 'df')
  735. store.append('df', df[:10], dropna=True)
  736. store.append('df', df[10:], dropna=True)
  737. tm.assert_frame_equal(store['df'], df[-4:])
  738. # nan some entire rows (dropna=False)
  739. _maybe_remove(store, 'df2')
  740. store.append('df2', df[:10], dropna=False)
  741. store.append('df2', df[10:], dropna=False)
  742. tm.assert_frame_equal(store['df2'], df)
  743. # tests the option io.hdf.dropna_table
  744. pandas.set_option('io.hdf.dropna_table',False)
  745. _maybe_remove(store, 'df3')
  746. store.append('df3', df[:10])
  747. store.append('df3', df[10:])
  748. tm.assert_frame_equal(store['df3'], df)
  749. pandas.set_option('io.hdf.dropna_table',True)
  750. _maybe_remove(store, 'df4')
  751. store.append('df4', df[:10])
  752. store.append('df4', df[10:])
  753. tm.assert_frame_equal(store['df4'], df[-4:])
  754. # nan some entire rows (string are still written!)
  755. df = DataFrame({'A1' : np.random.randn(20),
  756. 'A2' : np.random.randn(20),
  757. 'B' : 'foo', 'C' : 'bar'},
  758. index=np.arange(20))
  759. df.ix[0:15,:] = np.nan
  760. _maybe_remove(store, 'df')
  761. store.append('df', df[:10], dropna=True)
  762. store.append('df', df[10:], dropna=True)
  763. tm.assert_frame_equal(store['df'], df)
  764. _maybe_remove(store, 'df2')
  765. store.append('df2', df[:10], dropna=False)
  766. store.append('df2', df[10:], dropna=False)
  767. tm.assert_frame_equal(store['df2'], df)
  768. # nan some entire rows (but since we have dates they are still written!)
  769. df = DataFrame({'A1' : np.random.randn(20),
  770. 'A2' : np.random.randn(20),
  771. 'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime.datetime(2001,1,2,0,0) },
  772. index=np.arange(20))
  773. df.ix[0:15,:] = np.nan
  774. _maybe_remove(store, 'df')
  775. store.append('df', df[:10], dropna=True)
  776. store.append('df', df[10:], dropna=True)
  777. tm.assert_frame_equal(store['df'], df)
  778. _maybe_remove(store, 'df2')
  779. store.append('df2', df[:10], dropna=False)
  780. store.append('df2', df[10:], dropna=False)
  781. tm.assert_frame_equal(store['df2'], df)
  782. def test_append_frame_column_oriented(self):
  783. with ensure_clean_store(self.path) as store:
  784. # column oriented
  785. df = tm.makeTimeDataFrame()
  786. _maybe_remove(store, 'df1')
  787. store.append('df1', df.ix[:, :2], axes=['columns'])
  788. store.append('df1', df.ix[:, 2:])
  789. tm.assert_frame_equal(store['df1'], df)
  790. result = store.select('df1', 'columns=A')
  791. expected = df.reindex(columns=['A'])
  792. tm.assert_frame_equal(expected, result)
  793. # selection on the non-indexable
  794. result = store.select(
  795. 'df1', ('columns=A', Term('index=df.index[0:4]')))
  796. expected = df.reindex(columns=['A'], index=df.index[0:4])
  797. tm.assert_frame_equal(expected, result)
  798. # this isn't supported
  799. self.assertRaises(TypeError, store.select, 'df1', (
  800. 'columns=A', Term('index>df.index[4]')))
  801. def test_append_with_different_block_ordering(self):
  802. #GH 4096; using same frames, but different block orderings
  803. with ensure_clean_store(self.path) as store:
  804. for i in range(10):
  805. df = DataFrame(np.random.randn(10,2),columns=list('AB'))
  806. df['index'] = range(10)
  807. df['index'] += i*10
  808. df['int64'] = Series([1]*len(df),dtype='int64')
  809. df['int16'] = Series([1]*len(df),dtype='int16')
  810. if i % 2 == 0:
  811. del df['int64']
  812. df['int64'] = Series([1]*len(df),dtype='int64')
  813. if i % 3 == 0:
  814. a = df.pop('A')
  815. df['A'] = a
  816. df.set_index('index',inplace=True)
  817. store.append('df',df)
  818. # test a different ordering but with more fields (like invalid combinate)
  819. with ensure_clean_store(self.path) as store:
  820. df = DataFrame(np.random.randn(10,2),columns=list('AB'), dtype='float64')
  821. df['int64'] = Series([1]*len(df),dtype='int64')
  822. df['int16'] = Series([1]*len(df),dtype='int16')
  823. store.append('df',df)
  824. # store additonal fields in different blocks
  825. df['int16_2'] = Series([1]*len(df),dtype='int16')
  826. self.assertRaises(ValueError, store.append, 'df', df)
  827. # store multile additonal fields in different blocks
  828. df['float_3'] = Series([1.]*len(df),dtype='float64')
  829. self.assertRaises(ValueError, store.append, 'df', df)
  830. def test_ndim_indexables(self):
  831. """ test using ndim tables in new ways"""
  832. with ensure_clean_store(self.path) as store:
  833. p4d = tm.makePanel4D()
  834. def check_indexers(key, indexers):
  835. for i, idx in enumerate(indexers):
  836. self.assertTrue(getattr(getattr(
  837. store.root, key).table.description, idx)._v_pos == i)
  838. # append then change (will take existing schema)
  839. indexers = ['items', 'major_axis', 'minor_axis']
  840. _maybe_remove(store, 'p4d')
  841. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  842. store.append('p4d', p4d.ix[:, :, 10:, :])
  843. assert_panel4d_equal(store.select('p4d'), p4d)
  844. check_indexers('p4d', indexers)
  845. # same as above, but try to append with differnt axes
  846. _maybe_remove(store, 'p4d')
  847. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  848. store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
  849. 'labels', 'items', 'major_axis'])
  850. assert_panel4d_equal(store.select('p4d'), p4d)
  851. check_indexers('p4d', indexers)
  852. # pass incorrect number of axes
  853. _maybe_remove(store, 'p4d')
  854. self.assertRaises(ValueError, store.append, 'p4d', p4d.ix[
  855. :, :, :10, :], axes=['major_axis', 'minor_axis'])
  856. # different than default indexables #1
  857. indexers = ['labels', 'major_axis', 'minor_axis']
  858. _maybe_remove(store, 'p4d')
  859. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  860. store.append('p4d', p4d.ix[:, :, 10:, :])
  861. assert_panel4d_equal(store['p4d'], p4d)
  862. check_indexers('p4d', indexers)
  863. # different than default indexables #2
  864. indexers = ['major_axis', 'labels', 'minor_axis']
  865. _maybe_remove(store, 'p4d')
  866. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  867. store.append('p4d', p4d.ix[:, :, 10:, :])
  868. assert_panel4d_equal(store['p4d'], p4d)
  869. check_indexers('p4d', indexers)
  870. # partial selection
  871. result = store.select('p4d', ['labels=l1'])
  872. expected = p4d.reindex(labels=['l1'])
  873. assert_panel4d_equal(result, expected)
  874. # partial selection2
  875. result = store.select('p4d', [Term(
  876. 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')])
  877. expected = p4d.reindex(
  878. labels=['l1'], items=['ItemA'], minor_axis=['B'])
  879. assert_panel4d_equal(result, expected)
  880. # non-existant partial selection
  881. result = store.select('p4d', [Term(
  882. 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')])
  883. expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B'])
  884. assert_panel4d_equal(result, expected)
  885. def test_append_with_strings(self):
  886. with ensure_clean_store(self.path) as store:
  887. wp = tm.makePanel()
  888. wp2 = wp.rename_axis(
  889. dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2)
  890. def check_col(key,name,size):
  891. self.assertEqual(getattr(store.get_storer(key).table.description,name).itemsize, size)
  892. store.append('s1', wp, min_itemsize=20)
  893. store.append('s1', wp2)
  894. expected = concat([wp, wp2], axis=2)
  895. expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
  896. assert_panel_equal(store['s1'], expected)
  897. check_col('s1', 'minor_axis', 20)
  898. # test dict format
  899. store.append('s2', wp, min_itemsize={'minor_axis': 20})
  900. store.append('s2', wp2)
  901. expected = concat([wp, wp2], axis=2)
  902. expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
  903. assert_panel_equal(store['s2'], expected)
  904. check_col('s2', 'minor_axis', 20)
  905. # apply the wrong field (similar to #1)
  906. store.append('s3', wp, min_itemsize={'major_axis': 20})
  907. self.assertRaises(ValueError, store.append, 's3', wp2)
  908. # test truncation of bigger strings
  909. store.append('s4', wp)
  910. self.assertRaises(ValueError, store.append, 's4', wp2)
  911. # avoid truncation on elements
  912. df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
  913. store.append('df_big', df)
  914. tm.assert_frame_equal(store.select('df_big'), df)
  915. check_col('df_big', 'values_block_1', 15)
  916. # appending smaller string ok
  917. df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
  918. store.append('df_big', df2)
  919. expected = concat([df, df2])
  920. tm.assert_frame_equal(store.select('df_big'), expected)
  921. check_col('df_big', 'values_block_1', 15)
  922. # avoid truncation on elements
  923. df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
  924. store.append('df_big2', df, min_itemsize={'values': 50})
  925. tm.assert_frame_equal(store.select('df_big2'), df)
  926. check_col('df_big2', 'values_block_1', 50)
  927. # bigger string on next append
  928. store.append('df_new', df)
  929. df_new = DataFrame(
  930. [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
  931. self.assertRaises(ValueError, store.append, 'df_new', df_new)
  932. # with nans
  933. _maybe_remove(store, 'df')
  934. df = tm.makeTimeDataFrame()
  935. df['string'] = 'foo'
  936. df.ix[1:4, 'string'] = np.nan
  937. df['string2'] = 'bar'
  938. df.ix[4:8, 'string2'] = np.nan
  939. df['string3'] = 'bah'
  940. df.ix[1:, 'string3'] = np.nan
  941. store.append('df', df)
  942. result = store.select('df')
  943. tm.assert_frame_equal(result, df)
  944. with ensure_clean_store(self.path) as store:
  945. def check_col(key,name,size):
  946. self.assertEqual(getattr(store.get_storer(key).table.description,name).itemsize, size)
  947. df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10))
  948. # a min_itemsize that creates a data_column
  949. _maybe_remove(store, 'df')
  950. store.append('df', df, min_itemsize={'A' : 200 })
  951. check_col('df', 'A', 200)
  952. self.assertEqual(store.get_storer('df').data_columns, ['A'])
  953. # a min_itemsize that creates a data_column2
  954. _maybe_remove(store, 'df')
  955. store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 })
  956. check_col('df', 'A', 200)
  957. self.assertEqual(store.get_storer('df').data_columns, ['B','A'])
  958. # a min_itemsize that creates a data_column2
  959. _maybe_remove(store, 'df')
  960. store.append('df', df, data_columns = ['B'], min_itemsize={'values' : 200 })
  961. check_col('df', 'B', 200)
  962. check_col('df', 'values_block_0', 200)
  963. self.assertEqual(store.get_storer('df').data_columns, ['B'])
  964. # infer the .typ on subsequent appends
  965. _maybe_remove(store, 'df')
  966. store.append('df', df[:5], min_itemsize=200)
  967. store.append('df', df[5:], min_itemsize=200)
  968. tm.assert_frame_equal(store['df'], df)
  969. # invalid min_itemsize keys
  970. df = DataFrame(['foo','foo','foo','barh','barh','barh'],columns=['A'])
  971. _maybe_remove(store, 'df')
  972. self.assertRaises(ValueError, store.append, 'df', df, min_itemsize={'foo' : 20, 'foobar' : 20})
  973. def test_append_with_data_columns(self):
  974. with ensure_clean_store(self.path) as store:
  975. df = tm.makeTimeDataFrame()
  976. df.loc[:,'B'].iloc[0] = 1.
  977. _maybe_remove(store, 'df')
  978. store.append('df', df[:2], data_columns=['B'])
  979. store.append('df', df[2:])
  980. tm.assert_frame_equal(store['df'], df)
  981. # check that we have indicies created
  982. assert(store._handle.root.df.table.cols.index.is_indexed is True)
  983. assert(store._handle.root.df.table.cols.B.is_indexed is True)
  984. # data column searching
  985. result = store.select('df', [Term('B>0')])
  986. expected = df[df.B > 0]
  987. tm.assert_frame_equal(result, expected)
  988. # data column searching (with an indexable and a data_columns)
  989. result = store.select(
  990. 'df', [Term('B>0'), Term('index>df.index[3]')])
  991. df_new = df.reindex(index=df.index[4:])
  992. expected = df_new[df_new.B > 0]
  993. tm.assert_frame_equal(result, expected)
  994. # data column selection with a string data_column
  995. df_new = df.copy()
  996. df_new['string'] = 'foo'
  997. df_new['string'][1:4] = np.nan
  998. df_new['string'][5:6] = 'bar'
  999. _maybe_remove(store, 'df')
  1000. store.append('df', df_new, data_columns=['string'])
  1001. result = store.select('df', [Term('string=foo')])
  1002. expected = df_new[df_new.string == 'foo']
  1003. tm.assert_frame_equal(result, expected)
  1004. # using min_itemsize and a data column
  1005. def check_col(key,name,size):
  1006. self.assertEqual(getattr(store.get_storer(key).table.description,name).itemsize, size)
  1007. with ensure_clean_store(self.path) as store:
  1008. _maybe_remove(store, 'df')
  1009. store.append('df', df_new, data_columns=['string'],
  1010. min_itemsize={'string': 30})
  1011. check_col('df', 'string', 30)
  1012. _maybe_remove(store, 'df')
  1013. store.append(
  1014. 'df', df_new, data_columns=['string'], min_itemsize=30)
  1015. check_col('df', 'string', 30)
  1016. _maybe_remove(store, 'df')
  1017. store.append('df', df_new, data_columns=['string'],
  1018. min_itemsize={'values': 30})
  1019. check_col('df', 'string', 30)
  1020. with ensure_clean_store(self.path) as store:
  1021. df_new['string2'] = 'foobarbah'
  1022. df_new['string_block1'] = 'foobarbah1'
  1023. df_new['string_block2'] = 'foobarbah2'
  1024. _maybe_remove(store, 'df')
  1025. store.append('df', df_new, data_columns=['string', 'string2'], min_itemsize={'string': 30, 'string2': 40, 'values': 50})
  1026. check_col('df', 'string', 30)
  1027. check_col('df', 'string2', 40)
  1028. check_col('df', 'values_block_1', 50)
  1029. with ensure_clean_store(self.path) as store:
  1030. # multiple data columns
  1031. df_new = df.copy()
  1032. df_new.loc[:,'A'].iloc[0] = 1.
  1033. df_new.loc[:,'B'].iloc[0] = -1.
  1034. df_new['string'] = 'foo'
  1035. df_new['string'][1:4] = np.nan
  1036. df_new['string'][5:6] = 'bar'
  1037. df_new['string2'] = 'foo'
  1038. df_new['string2'][2:5] = np.nan
  1039. df_new['string2'][7:8] = 'bar'
  1040. _maybe_remove(store, 'df')
  1041. store.append(
  1042. 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
  1043. result = store.select('df', [Term('string=foo'), Term(
  1044. 'string2=foo'), Term('A>0'), Term('B<0')])
  1045. expected = df_new[(df_new.string == 'foo') & (
  1046. df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
  1047. tm.assert_frame_equal(result, expected, check_index_type=False)
  1048. # yield an empty frame
  1049. result = store.select('df', [Term('string=foo'), Term(
  1050. 'string2=cool')])
  1051. expected = df_new[(df_new.string == 'foo') & (
  1052. df_new.string2 == 'cool')]
  1053. tm.assert_frame_equal(result, expected, check_index_type=False)
  1054. with ensure_clean_store(self.path) as store:
  1055. # doc example
  1056. df_dc = df.copy()
  1057. df_dc['string'] = 'foo'
  1058. df_dc.ix[4:6, 'string'] = np.nan
  1059. df_dc.ix[7:9, 'string'] = 'bar'
  1060. df_dc['string2'] = 'cool'
  1061. df_dc['datetime'] = Timestamp('20010102')
  1062. df_dc = df_dc.convert_objects()
  1063. df_dc.ix[3:5, ['A', 'B', 'datetime']] = np.nan
  1064. _maybe_remove(store, 'df_dc')
  1065. store.append('df_dc', df_dc, data_columns=['B', 'C',
  1066. 'string', 'string2', 'datetime'])
  1067. result = store.select('df_dc', [Term('B>0')])
  1068. expected = df_dc[df_dc.B > 0]
  1069. tm.assert_frame_equal(result, expected, check_index_type=False)
  1070. result = store.select(
  1071. 'df_dc', ['B > 0', 'C > 0', 'string == foo'])
  1072. expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
  1073. df_dc.string == 'foo')]
  1074. tm.assert_frame_equal(result, expected, check_index_type=False)
  1075. with ensure_clean_store(self.path) as store:
  1076. # doc example part 2
  1077. np.random.seed(1234)
  1078. index = date_range('1/1/2000', periods=8)
  1079. df_dc = DataFrame(np.random.randn(8, 3), index=index,
  1080. columns=['A', 'B', 'C'])
  1081. df_dc['string'] = 'foo'
  1082. df_dc.ix[4:6,'string'] = np.nan
  1083. df_dc.ix[7:9,'string'] = 'bar'
  1084. df_dc.ix[:,['B','C']] = df_dc.ix[:,['B','C']].abs()
  1085. df_dc['string2'] = 'cool'
  1086. # on-disk operations
  1087. store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2'])
  1088. result = store.select('df_dc', [ Term('B>0') ])
  1089. expected = df_dc[df_dc.B>0]
  1090. tm.assert_frame_equal(result,expected)
  1091. result = store.select('df_dc', ['B > 0', 'C > 0', 'string == "foo"'])
  1092. expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == 'foo')]
  1093. tm.assert_frame_equal(result,expected)
  1094. with ensure_clean_store(self.path) as store:
  1095. # panel
  1096. # GH5717 not handling data_columns
  1097. np.random.seed(1234)
  1098. p = tm.makePanel()
  1099. store.append('p1',p)
  1100. tm.assert_panel_equal(store.select('p1'),p)
  1101. store.append('p2',p,data_columns=True)
  1102. tm.assert_panel_equal(store.select('p2'),p)
  1103. result = store.select('p2',where='ItemA>0')
  1104. expected = p.to_frame()
  1105. expected = expected[expected['ItemA']>0]
  1106. tm.assert_frame_equal(result.to_frame(),expected)
  1107. result = store.select('p2',where='ItemA>0 & minor_axis=["A","B"]')
  1108. expected = p.to_frame()
  1109. expected = expected[expected['ItemA']>0]
  1110. expected = expected[expected.reset_index(level=['major']).index.isin(['A','B'])]
  1111. tm.assert_frame_equal(result.to_frame(),expected)
  1112. def test_create_table_index(self):
  1113. with ensure_clean_store(self.path) as store:
  1114. def col(t,column):
  1115. return getattr(store.get_storer(t).table.cols,column)
  1116. # index=False
  1117. wp = tm.makePanel()
  1118. store.append('p5', wp, index=False)
  1119. store.create_table_index('p5', columns=['major_axis'])
  1120. assert(col('p5', 'major_axis').is_indexed is True)
  1121. assert(col('p5', 'minor_axis').is_indexed is False)
  1122. # index=True
  1123. store.append('p5i', wp, index=True)
  1124. assert(col('p5i', 'major_axis').is_indexed is True)
  1125. assert(col('p5i', 'minor_axis').is_indexed is True)
  1126. # default optlevels
  1127. store.get_storer('p5').create_index()
  1128. assert(col('p5', 'major_axis').index.optlevel == 6)
  1129. assert(col('p5', 'minor_axis').index.kind == 'medium')
  1130. # let's change the indexing scheme
  1131. store.create_table_index('p5')
  1132. assert(col('p5', 'major_axis').index.optlevel == 6)
  1133. assert(col('p5', 'minor_axis').index.kind == 'medium')
  1134. store.create_table_index('p5', optlevel=9)
  1135. assert(col('p5', 'major_axis').index.optlevel == 9)
  1136. assert(col('p5', 'minor_axis').index.kind == 'medium')
  1137. store.create_table_index('p5', kind='full')
  1138. assert(col('p5', 'major_axis').index.optlevel == 9)
  1139. assert(col('p5', 'minor_axis').index.kind == 'full')
  1140. store.create_table_index('p5', optlevel=1, kind='light')
  1141. assert(col('p5', 'major_axis').index.optlevel == 1)
  1142. assert(col('p5', 'minor_axis').index.kind == 'light')
  1143. # data columns
  1144. df = tm.makeTimeDataFrame()
  1145. df['string'] = 'foo'
  1146. df['string2'] = 'bar'
  1147. store.append('f', df, data_columns=['string', 'string2'])
  1148. assert(col('f', 'index').is_indexed is True)
  1149. assert(col('f', 'string').is_indexed is True)
  1150. assert(col('f', 'string2').is_indexed is True)
  1151. # specify index=columns
  1152. store.append(
  1153. 'f2', df, index=['string'], data_columns=['string', 'string2'])
  1154. assert(col('f2', 'index').is_indexed is False)
  1155. assert(col('f2', 'string').is_indexed is True)
  1156. assert(col('f2', 'string2').is_indexed is False)
  1157. # try to index a non-table
  1158. _maybe_remove(store, 'f2')
  1159. store.put('f2', df)
  1160. self.assertRaises(TypeError, store.create_table_index, 'f2')
  1161. # try to change the version supports flag
  1162. from pandas.io import pytables
  1163. pytables._table_supports_index = False
  1164. self.assertRaises(Exception, store.create_table_index, 'f')
  1165. # test out some versions
  1166. original = tables.__version__
  1167. for v in ['2.2', '2.2b']:
  1168. pytables._table_mod = None
  1169. pytables._table_supports_index = False
  1170. tables.__version__ = v
  1171. self.assertRaises(Exception, store.create_table_index, 'f')
  1172. for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', '3.0.0', '3.1.0', original]:
  1173. pytables._table_mod = None
  1174. pytables._table_supports_index = False
  1175. tables.__version__ = v
  1176. store.create_table_index('f')
  1177. pytables._table_mod = None
  1178. pytables._table_supports_index = False
  1179. tables.__version__ = original
  1180. def test_big_table_frame(self):
  1181. raise nose.SkipTest('no big table frame')
  1182. # create and write a big table
  1183. df = DataFrame(np.random.randn(2000 * 100, 100), index=range(
  1184. 2000 * 100), columns=['E%03d' % i for i in range(100)])
  1185. for x in range(20):
  1186. df['String%03d' % x] = 'string%03d' % x
  1187. import time
  1188. x = time.time()
  1189. with ensure_clean_store(self.path,mode='w') as store:
  1190. store.append('df', df)
  1191. rows = store.root.df.table.nrows
  1192. recons = store.select('df')
  1193. assert isinstance(recons, DataFrame)
  1194. com.pprint_thing("\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x))
  1195. def test_big_table2_frame(self):
  1196. # this is a really big table: 1m rows x 60 float columns, 20 string, 20 datetime
  1197. # columns
  1198. raise nose.SkipTest('no big table2 frame')
  1199. # create and write a big table
  1200. com.pprint_thing("\nbig_table2 start")
  1201. import time
  1202. start_time = time.time()
  1203. df = DataFrame(np.random.randn(1000 * 1000, 60), index=range(int(
  1204. 1000 * 1000)), columns=['E%03d' % i for i in range(60)])
  1205. for x in range(20):
  1206. df['String%03d' % x] = 'string%03d' % x
  1207. for x in range(20):
  1208. df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0)
  1209. com.pprint_thing("\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f"
  1210. % (len(df.index), time.time() - start_time))
  1211. def f(chunksize):
  1212. with ensure_clean_store(self.path,mode='w') as store:
  1213. store.append('df', df, chunksize=chunksize)
  1214. r = store.root.df.table.nrows
  1215. return r
  1216. for c in [10000, 50000, 250000]:
  1217. start_time = time.time()
  1218. com.pprint_thing("big_table2 frame [chunk->%s]" % c)
  1219. rows = f(c)
  1220. com.pprint_thing("big_table2 frame [rows->%s,chunk->%s] -> %5.2f"
  1221. % (rows, c, time.time() - start_time))
  1222. def test_big_put_frame(self):
  1223. raise nose.SkipTest('no big put frame')
  1224. com.pprint_thing("\nbig_put start")
  1225. import time
  1226. start_time = time.time()
  1227. df = DataFrame(np.random.randn(1000 * 1000, 60), index=range(int(
  1228. 1000 * 1000)), columns=['E%03d' % i for i in range(60)])
  1229. for x in range(20):
  1230. df['String%03d' % x] = 'string%03d' % x
  1231. for x in range(20):
  1232. df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0)
  1233. com.pprint_thing("\nbig_put frame (creation of df) [rows->%s] -> %5.2f"
  1234. % (len(df.index), time.time() - start_time))
  1235. with ensure_clean_store(self.path, mode='w') as store:
  1236. start_time = time.time()
  1237. store = HDFStore(self.path, mode='w')
  1238. store.put('df', df)
  1239. com.pprint_thing(df.get_dtype_counts())
  1240. com.pprint_thing("big_put frame [shape->%s] -> %5.2f"
  1241. % (df.shape, time.time() - start_time))
  1242. def test_big_table_panel(self):
  1243. raise nose.SkipTest('no big table panel')
  1244. # create and write a big table
  1245. wp = Panel(
  1246. np.random.randn(20, 1000, 1000), items=['Item%03d' % i for i in range(20)],
  1247. major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in range(1000)])
  1248. wp.ix[:, 100:200, 300:400] = np.nan
  1249. for x in range(100):
  1250. wp['String%03d'] = 'string%03d' % x
  1251. import time
  1252. x = time.time()
  1253. with ensure_clean_store(self.path, mode='w') as store:
  1254. store.append('wp', wp)
  1255. rows = store.root.wp.table.nrows
  1256. recons = store.select('wp')
  1257. assert isinstance(recons, Panel)
  1258. com.pprint_thing("\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x))
  1259. def test_append_diff_item_order(self):
  1260. wp = tm.makePanel()
  1261. wp1 = wp.ix[:, :10, :]
  1262. wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :]
  1263. with ensure_clean_store(self.path) as store:
  1264. store.put('panel', wp1, format='table')
  1265. self.assertRaises(ValueError, store.put, 'panel', wp2,
  1266. append=True)
  1267. def test_append_hierarchical(self):
  1268. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  1269. ['one', 'two', 'three']],
  1270. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  1271. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  1272. names=['foo', 'bar'])
  1273. df = DataFrame(np.random.randn(10, 3), index=index,
  1274. columns=['A', 'B', 'C'])
  1275. with ensure_clean_store(self.path) as store:
  1276. store.append('mi', df)
  1277. result = store.select('mi')
  1278. tm.assert_frame_equal(result, df)
  1279. # GH 3748
  1280. result = store.select('mi',columns=['A','B'])
  1281. expected = df.reindex(columns=['A','B'])
  1282. tm.assert_frame_equal(result,expected)
  1283. with ensure_clean_path('test.hdf') as path:
  1284. df.to_hdf(path,'df',format='table')
  1285. result = read_hdf(path,'df',columns=['A','B'])
  1286. expected = df.reindex(columns=['A','B'])
  1287. tm.assert_frame_equal(result,expected)
  1288. def test_column_multiindex(self):
  1289. # GH 4710
  1290. # recreate multi-indexes properly
  1291. index = MultiIndex.from_tuples([('A','a'), ('A','b'), ('B','a'), ('B','b')], names=['first','second'])
  1292. df = DataFrame(np.arange(12).reshape(3,4), columns=index)
  1293. with ensure_clean_store(self.path) as store:
  1294. store.put('df',df)
  1295. tm.assert_frame_equal(store['df'],df,check_index_type=True,check_column_type=True)
  1296. store.put('df1',df,format='table')
  1297. tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True)
  1298. self.assertRaises(ValueError, store.put, 'df2',df,format='table',data_columns=['A'])
  1299. self.assertRaises(ValueError, store.put, 'df3',df,format='table',data_columns=True)
  1300. # appending multi-column on existing table (see GH 6167)
  1301. with ensure_clean_store(self.path) as store:
  1302. store.append('df2', df)
  1303. store.append('df2', df)
  1304. tm.assert_frame_equal(store['df2'], concat((df,df)))
  1305. # non_index_axes name
  1306. df = DataFrame(np.arange(12).reshape(3,4), columns=Index(list('ABCD'),name='foo'))
  1307. with ensure_clean_store(self.path) as store:
  1308. store.put('df1',df,format='table')
  1309. tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True)
  1310. def test_store_multiindex(self):
  1311. # validate multi-index names
  1312. # GH 5527
  1313. with ensure_clean_store(self.path) as store:
  1314. def make_index(names=None):
  1315. return MultiIndex.from_tuples([( datetime.datetime(2013,12,d), s, t) for d in range(1,3) for s in range(2) for t in range(3)],
  1316. names=names)
  1317. # no names
  1318. _maybe_remove(store, 'df')
  1319. df = DataFrame(np.zeros((12,2)), columns=['a','b'], index=make_index())
  1320. store.append('df',df)
  1321. tm.assert_frame_equal(store.select('df'),df)
  1322. # partial names
  1323. _maybe_remove(store, 'df')
  1324. df = DataFrame(np.zeros((12,2)), columns=['a','b'], index=make_index(['date',None,None]))
  1325. store.append('df',df)
  1326. tm.assert_frame_equal(store.select('df'),df)
  1327. # series
  1328. _maybe_remove(store, 's')
  1329. s = Series(np.zeros(12), index=make_index(['date',None,None]))
  1330. store.append('s',s)
  1331. tm.assert_series_equal(store.select('s'),s)
  1332. # dup with column
  1333. _maybe_remove(store, 'df')
  1334. df = DataFrame(np.zeros((12,2)), columns=['a','b'], index=make_index(['date','a','t']))
  1335. self.assertRaises(ValueError, store.append, 'df',df)
  1336. # dup within level
  1337. _maybe_remove(store, 'df')
  1338. df = DataFrame(np.zeros((12,2)), columns=['a','b'], index=make_index(['date','date','date']))
  1339. self.assertRaises(ValueError, store.append, 'df',df)
  1340. # fully names
  1341. _maybe_remove(store, 'df')
  1342. df = DataFrame(np.zeros((12,2)), columns=['a','b'], index=make_index(['date','s','t']))
  1343. store.append('df',df)
  1344. tm.assert_frame_equal(store.select('df'),df)
  1345. def test_select_columns_in_where(self):
  1346. # GH 6169
  1347. # recreate multi-indexes when columns is passed
  1348. # in the `where` argument
  1349. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  1350. ['one', 'two', 'three']],
  1351. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  1352. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  1353. names=['foo_name', 'bar_name'])
  1354. # With a DataFrame
  1355. df = DataFrame(np.random.randn(10, 3), index=index,
  1356. columns=['A', 'B', 'C'])
  1357. with ensure_clean_store(self.path) as store:
  1358. store.put('df', df, format='table')
  1359. expected = df[['A']]
  1360. tm.assert_frame_equal(store.select('df', columns=['A']), expected)
  1361. tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected)
  1362. # With a Series
  1363. s = Series(np.random.randn(10), index=index,
  1364. name='A')
  1365. with ensure_clean_store(self.path) as store:
  1366. store.put('s', s, format='table')
  1367. tm.assert_series_equal(store.select('s', where="columns=['A']"),s)
  1368. def test_pass_spec_to_storer(self):
  1369. df = tm.makeDataFrame()
  1370. with ensure_clean_store(self.path) as store:
  1371. store.put('df',df)
  1372. self.assertRaises(TypeError, store.select, 'df', columns=['A'])
  1373. self.assertRaises(TypeError, store.select, 'df',where=[('columns=A')])
  1374. def test_append_misc(self):
  1375. with ensure_clean_store(self.path) as store:
  1376. # unsuported data types for non-tables
  1377. p4d = tm.makePanel4D()
  1378. self.assertRaises(TypeError, store.put,'p4d',p4d)
  1379. # unsuported data types
  1380. self.assertRaises(TypeError, store.put,'abc',None)
  1381. self.assertRaises(TypeError, store.put,'abc','123')
  1382. self.assertRaises(TypeError, store.put,'abc',123)
  1383. self.assertRaises(TypeError, store.put,'abc',np.arange(5))
  1384. df = tm.makeDataFrame()
  1385. store.append('df', df, chunksize=1)
  1386. result = store.select('df')
  1387. tm.assert_frame_equal(result, df)
  1388. store.append('df1', df, expectedrows=10)
  1389. result = store.select('df1')
  1390. tm.assert_frame_equal(result, df)
  1391. # more chunksize in append tests
  1392. def check(obj, comparator):
  1393. for c in [10, 200, 1000]:
  1394. with ensure_clean_store(self.path,mode='w') as store:
  1395. store.append('obj', obj, chunksize=c)
  1396. result = store.select('obj')
  1397. comparator(result,obj)
  1398. df = tm.makeDataFrame()
  1399. df['string'] = 'foo'
  1400. df['float322'] = 1.
  1401. df['float322'] = df['float322'].astype('float32')
  1402. df['bool'] = df['float322'] > 0
  1403. df['time1'] = Timestamp('20130101')
  1404. df['time2'] = Timestamp('20130102')
  1405. check(df, tm.assert_frame_equal)
  1406. p = tm.makePanel()
  1407. check(p, assert_panel_equal)
  1408. p4d = tm.makePanel4D()
  1409. check(p4d, assert_panel4d_equal)
  1410. # empty frame, GH4273
  1411. with ensure_clean_store(self.path) as store:
  1412. # 0 len
  1413. df_empty = DataFrame(columns=list('ABC'))
  1414. store.append('df',df_empty)
  1415. self.assertRaises(KeyError,store.select, 'df')
  1416. # repeated append of 0/non-zero frames
  1417. df = DataFrame(np.random.rand(10,3),columns=list('ABC'))
  1418. store.append('df',df)
  1419. assert_frame_equal(store.select('df'),df)
  1420. store.append('df',df_empty)
  1421. assert_frame_equal(store.select('df'),df)
  1422. # store
  1423. df = DataFrame(columns=list('ABC'))
  1424. store.put('df2',df)
  1425. assert_frame_equal(store.select('df2'),df)
  1426. # 0 len
  1427. p_empty = Panel(items=list('ABC'))
  1428. store.append('p',p_empty)
  1429. self.assertRaises(KeyError,store.select, 'p')
  1430. # repeated append of 0/non-zero frames
  1431. p = Panel(np.random.randn(3,4,5),items=list('ABC'))
  1432. store.append('p',p)
  1433. assert_panel_equal(store.select('p'),p)
  1434. store.append('p',p_empty)
  1435. assert_panel_equal(store.select('p'),p)
  1436. # store
  1437. store.put('p2',p_empty)
  1438. assert_panel_equal(store.select('p2'),p_empty)
  1439. def test_append_raise(self):
  1440. with ensure_clean_store(self.path) as store:
  1441. # test append with invalid input to get good error messages
  1442. # list in column
  1443. df = tm.makeDataFrame()
  1444. df['invalid'] = [['a']] * len(df)
  1445. self.assertEqual(df.dtypes['invalid'], np.object_)
  1446. self.assertRaises(TypeError, store.append,'df',df)
  1447. # multiple invalid columns
  1448. df['invalid2'] = [['a']] * len(df)
  1449. df['invalid3'] = [['a']] * len(df)
  1450. self.assertRaises(TypeError, store.append,'df',df)
  1451. # datetime with embedded nans as object
  1452. df = tm.makeDataFrame()
  1453. s = Series(datetime.datetime(2001,1,2),index=df.index)
  1454. s = s.astype(object)
  1455. s[0:5] = np.nan
  1456. df['invalid'] = s
  1457. self.assertEqual(df.dtypes['invalid'], np.object_)
  1458. self.assertRaises(TypeError, store.append,'df', df)
  1459. # directy ndarray
  1460. self.assertRaises(TypeError, store.append,'df',np.arange(10))
  1461. # series directly
  1462. self.assertRaises(TypeError, store.append,'df',Series(np.arange(10)))
  1463. # appending an incompatbile table
  1464. df = tm.makeDataFrame()
  1465. store.append('df',df)
  1466. df['foo'] = 'foo'
  1467. self.assertRaises(ValueError, store.append,'df',df)
  1468. def test_table_index_incompatible_dtypes(self):
  1469. df1 = DataFrame({'a': [1, 2, 3]})
  1470. df2 = DataFrame({'a': [4, 5, 6]},
  1471. index=date_range('1/1/2000', periods=3))
  1472. with ensure_clean_store(self.path) as store:
  1473. store.put('frame', df1, format='table')
  1474. self.assertRaises(TypeError, store.put, 'frame', df2,
  1475. format='table', append=True)
  1476. def test_table_values_dtypes_roundtrip(self):
  1477. with ensure_clean_store(self.path) as store:
  1478. df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
  1479. store.append('df_f8', df1)
  1480. assert_series_equal(df1.dtypes,store['df_f8'].dtypes)
  1481. df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
  1482. store.append('df_i8', df2)
  1483. assert_series_equal(df2.dtypes,store['df_i8'].dtypes)
  1484. # incompatible dtype
  1485. self.assertRaises(ValueError, store.append, 'df_i8', df1)
  1486. # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought)
  1487. df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A'])
  1488. store.append('df_f4', df1)
  1489. assert_series_equal(df1.dtypes,store['df_f4'].dtypes)
  1490. assert df1.dtypes[0] == 'float32'
  1491. # check with mixed dtypes
  1492. df1 = DataFrame(dict([ (c,Series(np.random.randn(5),dtype=c)) for c in
  1493. ['float32','float64','int32','int64','int16','int8'] ]))
  1494. df1['string'] = 'foo'
  1495. df1['float322'] = 1.
  1496. df1['float322'] = df1['float322'].astype('float32')
  1497. df1['bool'] = df1['float32'] > 0
  1498. df1['time1'] = Timestamp('20130101')
  1499. df1['time2'] = Timestamp('20130102')
  1500. store.append('df_mixed_dtypes1', df1)
  1501. result = store.select('df_mixed_dtypes1').get_dtype_counts()
  1502. expected = Series({ 'float32' : 2, 'float64' : 1,'int32' : 1, 'bool' : 1,
  1503. 'int16' : 1, 'int8' : 1, 'int64' : 1, 'object' : 1,
  1504. 'datetime64[ns]' : 2})
  1505. result.sort()
  1506. expected.sort()
  1507. tm.assert_series_equal(result,expected)
  1508. def test_table_mixed_dtypes(self):
  1509. # frame
  1510. df = tm.makeDataFrame()
  1511. df['obj1'] = 'foo'
  1512. df['obj2'] = 'bar'
  1513. df['bool1'] = df['A'] > 0
  1514. df['bool2'] = df['B'] > 0
  1515. df['bool3'] = True
  1516. df['int1'] = 1
  1517. df['int2'] = 2
  1518. df['timestamp1'] = Timestamp('20010102')
  1519. df['timestamp2'] = Timestamp('20010103')
  1520. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  1521. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  1522. df.ix[3:6, ['obj1']] = np.nan
  1523. df = df.consolidate().convert_objects()
  1524. with ensure_clean_store(self.path) as store:
  1525. store.append('df1_mixed', df)
  1526. tm.assert_frame_equal(store.select('df1_mixed'), df)
  1527. # panel
  1528. wp = tm.makePanel()
  1529. wp['obj1'] = 'foo'
  1530. wp['obj2'] = 'bar'
  1531. wp['bool1'] = wp['ItemA'] > 0
  1532. wp['bool2'] = wp['ItemB'] > 0
  1533. wp['int1'] = 1
  1534. wp['int2'] = 2
  1535. wp = wp.consolidate()
  1536. with ensure_clean_store(self.path) as store:
  1537. store.append('p1_mixed', wp)
  1538. assert_panel_equal(store.select('p1_mixed'), wp)
  1539. # ndim
  1540. wp = tm.makePanel4D()
  1541. wp['obj1'] = 'foo'
  1542. wp['obj2'] = 'bar'
  1543. wp['bool1'] = wp['l1'] > 0
  1544. wp['bool2'] = wp['l2'] > 0
  1545. wp['int1'] = 1
  1546. wp['int2'] = 2
  1547. wp = wp.consolidate()
  1548. with ensure_clean_store(self.path) as store:
  1549. store.append('p4d_mixed', wp)
  1550. assert_panel4d_equal(store.select('p4d_mixed'), wp)
  1551. def test_unimplemented_dtypes_table_columns(self):
  1552. with ensure_clean_store(self.path) as store:
  1553. l = [('date', datetime.date(2001, 1, 2))]
  1554. # py3 ok for unicode
  1555. if not compat.PY3:
  1556. l.append(('unicode', u('\\u03c3')))
  1557. ### currently not supported dtypes ####
  1558. for n, f in l:
  1559. df = tm.makeDataFrame()
  1560. df[n] = f
  1561. self.assertRaises(
  1562. TypeError, store.append, 'df1_%s' % n, df)
  1563. # frame
  1564. df = tm.makeDataFrame()
  1565. df['obj1'] = 'foo'
  1566. df['obj2'] = 'bar'
  1567. df['datetime1'] = datetime.date(2001, 1, 2)
  1568. df = df.consolidate().convert_objects()
  1569. with ensure_clean_store(self.path) as store:
  1570. # this fails because we have a date in the object block......
  1571. self.assertRaises(TypeError, store.append, 'df_unimplemented', df)
  1572. def test_append_with_timezones_pytz(self):
  1573. from datetime import timedelta
  1574. def compare(a,b):
  1575. tm.assert_frame_equal(a,b)
  1576. # compare the zones on each element
  1577. for c in a.columns:
  1578. for i in a.index:
  1579. a_e = a[c][i]
  1580. b_e = b[c][i]
  1581. if not (a_e == b_e and a_e.tz == b_e.tz):
  1582. raise AssertionError("invalid tz comparsion [%s] [%s]" % (a_e,b_e))
  1583. # as columns
  1584. with ensure_clean_store(self.path) as store:
  1585. _maybe_remove(store, 'df_tz')
  1586. df = DataFrame(dict(A = [ Timestamp('20130102 2:00:00',tz='US/Eastern') + timedelta(hours=1)*i for i in range(5) ]))
  1587. store.append('df_tz',df,data_columns=['A'])
  1588. result = store['df_tz']
  1589. compare(result,df)
  1590. assert_frame_equal(result,df)
  1591. # select with tz aware
  1592. compare(store.select('df_tz',where=Term('A>=df.A[3]')),df[df.A>=df.A[3]])
  1593. _maybe_remove(store, 'df_tz')
  1594. # ensure we include dates in DST and STD time here.
  1595. df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130603',tz='US/Eastern')),index=range(5))
  1596. store.append('df_tz',df)
  1597. result = store['df_tz']
  1598. compare(result,df)
  1599. assert_frame_equal(result,df)
  1600. _maybe_remove(store, 'df_tz')
  1601. df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=range(5))
  1602. self.assertRaises(TypeError, store.append, 'df_tz', df)
  1603. # this is ok
  1604. _maybe_remove(store, 'df_tz')
  1605. store.append('df_tz',df,data_columns=['A','B'])
  1606. result = store['df_tz']
  1607. compare(result,df)
  1608. assert_frame_equal(result,df)
  1609. # can't append with diff timezone
  1610. df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=range(5))
  1611. self.assertRaises(ValueError, store.append, 'df_tz', df)
  1612. # as index
  1613. with ensure_clean_store(self.path) as store:
  1614. # GH 4098 example
  1615. df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern'))))
  1616. _maybe_remove(store, 'df')
  1617. store.put('df',df)
  1618. result = store.select('df')
  1619. assert_frame_equal(result,df)
  1620. _maybe_remove(store, 'df')
  1621. store.append('df',df)
  1622. result = store.select('df')
  1623. assert_frame_equal(result,df)
  1624. def test_append_with_timezones_dateutil(self):
  1625. from datetime import timedelta
  1626. tm._skip_if_no_dateutil()
  1627. # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows filename issues.
  1628. from pandas.tslib import maybe_get_tz
  1629. gettz = lambda x: maybe_get_tz('dateutil/' + x)
  1630. def compare(a, b):
  1631. tm.assert_frame_equal(a, b)
  1632. # compare the zones on each element
  1633. for c in a.columns:
  1634. for i in a.index:
  1635. a_e = a[c][i]
  1636. b_e = b[c][i]
  1637. if not (a_e == b_e and a_e.tz == b_e.tz):
  1638. raise AssertionError("invalid tz comparsion [%s] [%s]" % (a_e, b_e))
  1639. # as columns
  1640. with ensure_clean_store(self.path) as store:
  1641. _maybe_remove(store, 'df_tz')
  1642. df = DataFrame(dict(A=[ Timestamp('20130102 2:00:00', tz=gettz('US/Eastern')) + timedelta(hours=1) * i for i in range(5) ]))
  1643. store.append('df_tz', df, data_columns=['A'])
  1644. result = store['df_tz']
  1645. compare(result, df)
  1646. assert_frame_equal(result, df)
  1647. # select with tz aware
  1648. compare(store.select('df_tz', where=Term('A>=df.A[3]')), df[df.A >= df.A[3]])
  1649. _maybe_remove(store, 'df_tz')
  1650. # ensure we include dates in DST and STD time here.
  1651. df = DataFrame(dict(A=Timestamp('20130102', tz=gettz('US/Eastern')), B=Timestamp('20130603', tz=gettz('US/Eastern'))), index=range(5))
  1652. store.append('df_tz', df)
  1653. result = store['df_tz']
  1654. compare(result, df)
  1655. assert_frame_equal(result, df)
  1656. _maybe_remove(store, 'df_tz')
  1657. df = DataFrame(dict(A=Timestamp('20130102', tz=gettz('US/Eastern')), B=Timestamp('20130102', tz=gettz('EET'))), index=range(5))
  1658. self.assertRaises(TypeError, store.append, 'df_tz', df)
  1659. # this is ok
  1660. _maybe_remove(store, 'df_tz')
  1661. store.append('df_tz', df, data_columns=['A', 'B'])
  1662. result = store['df_tz']
  1663. compare(result, df)
  1664. assert_frame_equal(result, df)
  1665. # can't append with diff timezone
  1666. df = DataFrame(dict(A=Timestamp('20130102', tz=gettz('US/Eastern')), B=Timestamp('20130102', tz=gettz('CET'))), index=range(5))
  1667. self.assertRaises(ValueError, store.append, 'df_tz', df)
  1668. # as index
  1669. with ensure_clean_store(self.path) as store:
  1670. # GH 4098 example
  1671. df = DataFrame(dict(A=Series(lrange(3), index=date_range('2000-1-1', periods=3, freq='H', tz=gettz('US/Eastern')))))
  1672. _maybe_remove(store, 'df')
  1673. store.put('df', df)
  1674. result = store.select('df')
  1675. assert_frame_equal(result, df)
  1676. _maybe_remove(store, 'df')
  1677. store.append('df', df)
  1678. result = store.select('df')
  1679. assert_frame_equal(result, df)
  1680. def test_store_timezone(self):
  1681. # GH2852
  1682. # issue storing datetime.date with a timezone as it resets when read back in a new timezone
  1683. import platform
  1684. if platform.system() == "Windows":
  1685. raise nose.SkipTest("timezone setting not supported on windows")
  1686. import datetime
  1687. import time
  1688. import os
  1689. # original method
  1690. with ensure_clean_store(self.path) as store:
  1691. today = datetime.date(2013,9,10)
  1692. df = DataFrame([1,2,3], index = [today, today, today])
  1693. store['obj1'] = df
  1694. result = store['obj1']
  1695. assert_frame_equal(result, df)
  1696. # with tz setting
  1697. orig_tz = os.environ.get('TZ')
  1698. def setTZ(tz):
  1699. if tz is None:
  1700. try:
  1701. del os.environ['TZ']
  1702. except:
  1703. pass
  1704. else:
  1705. os.environ['TZ']=tz
  1706. time.tzset()
  1707. try:
  1708. with ensure_clean_store(self.path) as store:
  1709. setTZ('EST5EDT')
  1710. today = datetime.date(2013,9,10)
  1711. df = DataFrame([1,2,3], index = [today, today, today])
  1712. store['obj1'] = df
  1713. setTZ('CST6CDT')
  1714. result = store['obj1']
  1715. assert_frame_equal(result, df)
  1716. finally:
  1717. setTZ(orig_tz)
  1718. def test_append_with_timedelta(self):
  1719. tm._skip_if_not_numpy17_friendly()
  1720. # GH 3577
  1721. # append timedelta
  1722. from datetime import timedelta
  1723. df = DataFrame(dict(A = Timestamp('20130101'), B = [ Timestamp('20130101') + timedelta(days=i,seconds=10) for i in range(10) ]))
  1724. df['C'] = df['A']-df['B']
  1725. df.ix[3:5,'C'] = np.nan
  1726. with ensure_clean_store(self.path) as store:
  1727. # table
  1728. _maybe_remove(store, 'df')
  1729. store.append('df',df,data_columns=True)
  1730. result = store.select('df')
  1731. assert_frame_equal(result,df)
  1732. result = store.select('df',Term("C<100000"))
  1733. assert_frame_equal(result,df)
  1734. result = store.select('df',Term("C","<",-3*86400))
  1735. assert_frame_equal(result,df.iloc[3:])
  1736. result = store.select('df',"C<'-3D'")
  1737. assert_frame_equal(result,df.iloc[3:])
  1738. # a bit hacky here as we don't really deal with the NaT properly
  1739. result = store.select('df',"C<'-500000s'")
  1740. result = result.dropna(subset=['C'])
  1741. assert_frame_equal(result,df.iloc[6:])
  1742. result = store.select('df',"C<'-3.5D'")
  1743. result = result.iloc[1:]
  1744. assert_frame_equal(result,df.iloc[4:])
  1745. # fixed
  1746. _maybe_remove(store, 'df2')
  1747. store.put('df2',df)
  1748. result = store.select('df2')
  1749. assert_frame_equal(result,df)
  1750. def test_remove(self):
  1751. with ensure_clean_store(self.path) as store:
  1752. ts = tm.makeTimeSeries()
  1753. df = tm.makeDataFrame()
  1754. store['a'] = ts
  1755. store['b'] = df
  1756. _maybe_remove(store, 'a')
  1757. self.assertEqual(len(store), 1)
  1758. tm.assert_frame_equal(df, store['b'])
  1759. _maybe_remove(store, 'b')
  1760. self.assertEqual(len(store), 0)
  1761. # nonexistence
  1762. self.assertRaises(KeyError, store.remove, 'a_nonexistent_store')
  1763. # pathing
  1764. store['a'] = ts
  1765. store['b/foo'] = df
  1766. _maybe_remove(store, 'foo')
  1767. _maybe_remove(store, 'b/foo')
  1768. self.assertEqual(len(store), 1)
  1769. store['a'] = ts
  1770. store['b/foo'] = df
  1771. _maybe_remove(store, 'b')
  1772. self.assertEqual(len(store), 1)
  1773. # __delitem__
  1774. store['a'] = ts
  1775. store['b'] = df
  1776. del store['a']
  1777. del store['b']
  1778. self.assertEqual(len(store), 0)
  1779. def test_remove_where(self):
  1780. with ensure_clean_store(self.path) as store:
  1781. # non-existance
  1782. crit1 = Term('index>foo')
  1783. self.assertRaises(KeyError, store.remove, 'a', [crit1])
  1784. # try to remove non-table (with crit)
  1785. # non-table ok (where = None)
  1786. wp = tm.makePanel()
  1787. store.put('wp', wp, format='table')
  1788. store.remove('wp', ["minor_axis=['A', 'D']"])
  1789. rs = store.select('wp')
  1790. expected = wp.reindex(minor_axis=['B', 'C'])
  1791. assert_panel_equal(rs, expected)
  1792. # empty where
  1793. _maybe_remove(store, 'wp')
  1794. store.put('wp', wp, format='table')
  1795. # deleted number (entire table)
  1796. n = store.remove('wp', [])
  1797. assert(n == 120)
  1798. # non - empty where
  1799. _maybe_remove(store, 'wp')
  1800. store.put('wp', wp, format='table')
  1801. self.assertRaises(ValueError, store.remove,
  1802. 'wp', ['foo'])
  1803. # selectin non-table with a where
  1804. # store.put('wp2', wp, format='f')
  1805. # self.assertRaises(ValueError, store.remove,
  1806. # 'wp2', [('column', ['A', 'D'])])
  1807. def test_remove_startstop(self):
  1808. # GH #4835 and #6177
  1809. with ensure_clean_store(self.path) as store:
  1810. wp = tm.makePanel()
  1811. # start
  1812. store.put('wp1', wp, format='t')
  1813. n = store.remove('wp1', start=32)
  1814. #assert(n == 120-32)
  1815. result = store.select('wp1')
  1816. expected = wp.reindex(major_axis=wp.major_axis[:32//4])
  1817. assert_panel_equal(result, expected)
  1818. store.put('wp2', wp, format='t')
  1819. n = store.remove('wp2', start=-32)
  1820. #assert(n == 32)
  1821. result = store.select('wp2')
  1822. expected = wp.reindex(major_axis=wp.major_axis[:-32//4])
  1823. assert_panel_equal(result, expected)
  1824. # stop
  1825. store.put('wp3', wp, format='t')
  1826. n = store.remove('wp3', stop=32)
  1827. #assert(n == 32)
  1828. result = store.select('wp3')
  1829. expected = wp.reindex(major_axis=wp.major_axis[32//4:])
  1830. assert_panel_equal(result, expected)
  1831. store.put('wp4', wp, format='t')
  1832. n = store.remove('wp4', stop=-32)
  1833. #assert(n == 120-32)
  1834. result = store.select('wp4')
  1835. expected = wp.reindex(major_axis=wp.major_axis[-32//4:])
  1836. assert_panel_equal(result, expected)
  1837. # start n stop
  1838. store.put('wp5', wp, format='t')
  1839. n = store.remove('wp5', start=16, stop=-16)
  1840. #assert(n == 120-32)
  1841. result = store.select('wp5')
  1842. expected = wp.reindex(major_axis=wp.major_axis[:16//4]+wp.major_axis[-16//4:])
  1843. assert_panel_equal(result, expected)
  1844. store.put('wp6', wp, format='t')
  1845. n = store.remove('wp6', start=16, stop=16)
  1846. #assert(n == 0)
  1847. result = store.select('wp6')
  1848. expected = wp.reindex(major_axis=wp.major_axis)
  1849. assert_panel_equal(result, expected)
  1850. # with where
  1851. date = wp.major_axis.take(np.arange(0,30,3))
  1852. crit = Term('major_axis=date')
  1853. store.put('wp7', wp, format='t')
  1854. n = store.remove('wp7', where=[crit], stop=80)
  1855. #assert(n == 28)
  1856. result = store.select('wp7')
  1857. expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
  1858. assert_panel_equal(result, expected)
  1859. def test_remove_crit(self):
  1860. with ensure_clean_store(self.path) as store:
  1861. wp = tm.makePanel()
  1862. # group row removal
  1863. date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
  1864. crit4 = Term('major_axis=date4')
  1865. store.put('wp3', wp, format='t')
  1866. n = store.remove('wp3', where=[crit4])
  1867. assert(n == 36)
  1868. result = store.select('wp3')
  1869. expected = wp.reindex(major_axis=wp.major_axis - date4)
  1870. assert_panel_equal(result, expected)
  1871. # upper half
  1872. store.put('wp', wp, format='table')
  1873. date = wp.major_axis[len(wp.major_axis) // 2]
  1874. crit1 = Term('major_axis>date')
  1875. crit2 = Term("minor_axis=['A', 'D']")
  1876. n = store.remove('wp', where=[crit1])
  1877. assert(n == 56)
  1878. n = store.remove('wp', where=[crit2])
  1879. assert(n == 32)
  1880. result = store['wp']
  1881. expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
  1882. assert_panel_equal(result, expected)
  1883. # individual row elements
  1884. store.put('wp2', wp, format='table')
  1885. date1 = wp.major_axis[1:3]
  1886. crit1 = Term('major_axis=date1')
  1887. store.remove('wp2', where=[crit1])
  1888. result = store.select('wp2')
  1889. expected = wp.reindex(major_axis=wp.major_axis - date1)
  1890. assert_panel_equal(result, expected)
  1891. date2 = wp.major_axis[5]
  1892. crit2 = Term('major_axis=date2')
  1893. store.remove('wp2', where=[crit2])
  1894. result = store['wp2']
  1895. expected = wp.reindex(
  1896. major_axis=wp.major_axis - date1 - Index([date2]))
  1897. assert_panel_equal(result, expected)
  1898. date3 = [wp.major_axis[7], wp.major_axis[9]]
  1899. crit3 = Term('major_axis=date3')
  1900. store.remove('wp2', where=[crit3])
  1901. result = store['wp2']
  1902. expected = wp.reindex(
  1903. major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3))
  1904. assert_panel_equal(result, expected)
  1905. # corners
  1906. store.put('wp4', wp, format='table')
  1907. n = store.remove(
  1908. 'wp4', where=[Term('major_axis>wp.major_axis[-1]')])
  1909. result = store.select('wp4')
  1910. assert_panel_equal(result, wp)
  1911. def test_invalid_terms(self):
  1912. with ensure_clean_store(self.path) as store:
  1913. df = tm.makeTimeDataFrame()
  1914. df['string'] = 'foo'
  1915. df.ix[0:4,'string'] = 'bar'
  1916. wp = tm.makePanel()
  1917. p4d = tm.makePanel4D()
  1918. store.put('df', df, format='table')
  1919. store.put('wp', wp, format='table')
  1920. store.put('p4d', p4d, format='table')
  1921. # some invalid terms
  1922. self.assertRaises(ValueError, store.select, 'wp', "minor=['A', 'B']")
  1923. self.assertRaises(ValueError, store.select, 'wp', ["index=['20121114']"])
  1924. self.assertRaises(ValueError, store.select, 'wp', ["index=['20121114', '20121114']"])
  1925. self.assertRaises(TypeError, Term)
  1926. # more invalid
  1927. self.assertRaises(ValueError, store.select, 'df','df.index[3]')
  1928. self.assertRaises(SyntaxError, store.select, 'df','index>')
  1929. self.assertRaises(ValueError, store.select, 'wp', "major_axis<'20000108' & minor_axis['A', 'B']")
  1930. # from the docs
  1931. with ensure_clean_path(self.path) as path:
  1932. dfq = DataFrame(np.random.randn(10,4),columns=list('ABCD'),index=date_range('20130101',periods=10))
  1933. dfq.to_hdf(path,'dfq',format='table',data_columns=True)
  1934. # check ok
  1935. read_hdf(path,'dfq',where="index>Timestamp('20130104') & columns=['A', 'B']")
  1936. read_hdf(path,'dfq',where="A>0 or C>0")
  1937. # catch the invalid reference
  1938. with ensure_clean_path(self.path) as path:
  1939. dfq = DataFrame(np.random.randn(10,4),columns=list('ABCD'),index=date_range('20130101',periods=10))
  1940. dfq.to_hdf(path,'dfq',format='table')
  1941. self.assertRaises(ValueError, read_hdf, path,'dfq',where="A>0 or C>0")
  1942. def test_terms(self):
  1943. with ensure_clean_store(self.path) as store:
  1944. wp = tm.makePanel()
  1945. p4d = tm.makePanel4D()
  1946. wpneg = Panel.fromDict({-1: tm.makeDataFrame(), 0: tm.makeDataFrame(),
  1947. 1: tm.makeDataFrame()})
  1948. store.put('wp', wp, table=True)
  1949. store.put('p4d', p4d, table=True)
  1950. store.put('wpneg', wpneg, table=True)
  1951. # panel
  1952. result = store.select('wp', [Term(
  1953. 'major_axis<"20000108"'), Term("minor_axis=['A', 'B']")])
  1954. expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
  1955. assert_panel_equal(result, expected)
  1956. # with deprecation
  1957. result = store.select('wp', [Term(
  1958. 'major_axis','<',"20000108"), Term("minor_axis=['A', 'B']")])
  1959. expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
  1960. tm.assert_panel_equal(result, expected)
  1961. # p4d
  1962. result = store.select('p4d', [Term('major_axis<"20000108"'),
  1963. Term("minor_axis=['A', 'B']"),
  1964. Term("items=['ItemA', 'ItemB']")])
  1965. expected = p4d.truncate(after='20000108').reindex(
  1966. minor=['A', 'B'], items=['ItemA', 'ItemB'])
  1967. assert_panel4d_equal(result, expected)
  1968. # back compat invalid terms
  1969. terms = [
  1970. dict(field='major_axis', op='>', value='20121114'),
  1971. [ dict(field='major_axis', op='>', value='20121114') ],
  1972. [ "minor_axis=['A','B']", dict(field='major_axis', op='>', value='20121114') ]
  1973. ]
  1974. for t in terms:
  1975. with tm.assert_produces_warning(expected_warning=DeprecationWarning):
  1976. Term(t)
  1977. # valid terms
  1978. terms = [
  1979. ('major_axis=20121114'),
  1980. ('major_axis>20121114'),
  1981. (("major_axis=['20121114', '20121114']"),),
  1982. ('major_axis=datetime.datetime(2012, 11, 14)'),
  1983. 'major_axis> 20121114',
  1984. 'major_axis >20121114',
  1985. 'major_axis > 20121114',
  1986. (("minor_axis=['A', 'B']"),),
  1987. (("minor_axis=['A', 'B']"),),
  1988. ((("minor_axis==['A', 'B']"),),),
  1989. (("items=['ItemA', 'ItemB']"),),
  1990. ('items=ItemA'),
  1991. ]
  1992. for t in terms:
  1993. store.select('wp', t)
  1994. store.select('p4d', t)
  1995. # valid for p4d only
  1996. terms = [
  1997. (("labels=['l1', 'l2']"),),
  1998. Term("labels=['l1', 'l2']"),
  1999. ]
  2000. for t in terms:
  2001. store.select('p4d', t)
  2002. with tm.assertRaisesRegexp(TypeError, 'Only named functions are supported'):
  2003. store.select('wp', Term('major_axis == (lambda x: x)("20130101")'))
  2004. # check USub node parsing
  2005. res = store.select('wpneg', Term('items == -1'))
  2006. expected = Panel({-1: wpneg[-1]})
  2007. tm.assert_panel_equal(res, expected)
  2008. with tm.assertRaisesRegexp(NotImplementedError,
  2009. 'Unary addition not supported'):
  2010. store.select('wpneg', Term('items == +1'))
  2011. def test_term_compat(self):
  2012. with ensure_clean_store(self.path) as store:
  2013. wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
  2014. major_axis=date_range('1/1/2000', periods=5),
  2015. minor_axis=['A', 'B', 'C', 'D'])
  2016. store.append('wp',wp)
  2017. result = store.select('wp', [Term('major_axis>20000102'),
  2018. Term('minor_axis', '=', ['A','B']) ])
  2019. expected = wp.loc[:,wp.major_axis>Timestamp('20000102'),['A','B']]
  2020. assert_panel_equal(result, expected)
  2021. store.remove('wp', Term('major_axis>20000103'))
  2022. result = store.select('wp')
  2023. expected = wp.loc[:,wp.major_axis<=Timestamp('20000103'),:]
  2024. assert_panel_equal(result, expected)
  2025. with ensure_clean_store(self.path) as store:
  2026. wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
  2027. major_axis=date_range('1/1/2000', periods=5),
  2028. minor_axis=['A', 'B', 'C', 'D'])
  2029. store.append('wp',wp)
  2030. # stringified datetimes
  2031. result = store.select('wp', [Term('major_axis','>',datetime.datetime(2000,1,2))])
  2032. expected = wp.loc[:,wp.major_axis>Timestamp('20000102')]
  2033. assert_panel_equal(result, expected)
  2034. result = store.select('wp', [Term('major_axis','>',datetime.datetime(2000,1,2,0,0))])
  2035. expected = wp.loc[:,wp.major_axis>Timestamp('20000102')]
  2036. assert_panel_equal(result, expected)
  2037. result = store.select('wp', [Term('major_axis','=',[datetime.datetime(2000,1,2,0,0),datetime.datetime(2000,1,3,0,0)])])
  2038. expected = wp.loc[:,[Timestamp('20000102'),Timestamp('20000103')]]
  2039. assert_panel_equal(result, expected)
  2040. result = store.select('wp', [Term('minor_axis','=',['A','B'])])
  2041. expected = wp.loc[:,:,['A','B']]
  2042. assert_panel_equal(result, expected)
  2043. def test_backwards_compat_without_term_object(self):
  2044. with ensure_clean_store(self.path) as store:
  2045. wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
  2046. major_axis=date_range('1/1/2000', periods=5),
  2047. minor_axis=['A', 'B', 'C', 'D'])
  2048. store.append('wp',wp)
  2049. with tm.assert_produces_warning(expected_warning=DeprecationWarning):
  2050. result = store.select('wp', [('major_axis>20000102'),
  2051. ('minor_axis', '=', ['A','B']) ])
  2052. expected = wp.loc[:,wp.major_axis>Timestamp('20000102'),['A','B']]
  2053. assert_panel_equal(result, expected)
  2054. store.remove('wp', ('major_axis>20000103'))
  2055. result = store.select('wp')
  2056. expected = wp.loc[:,wp.major_axis<=Timestamp('20000103'),:]
  2057. assert_panel_equal(result, expected)
  2058. with ensure_clean_store(self.path) as store:
  2059. wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
  2060. major_axis=date_range('1/1/2000', periods=5),
  2061. minor_axis=['A', 'B', 'C', 'D'])
  2062. store.append('wp',wp)
  2063. # stringified datetimes
  2064. with tm.assert_produces_warning(expected_warning=DeprecationWarning):
  2065. result = store.select('wp', [('major_axis','>',datetime.datetime(2000,1,2))])
  2066. expected = wp.loc[:,wp.major_axis>Timestamp('20000102')]
  2067. assert_panel_equal(result, expected)
  2068. with tm.assert_produces_warning(expected_warning=DeprecationWarning):
  2069. result = store.select('wp', [('major_axis','>',datetime.datetime(2000,1,2,0,0))])
  2070. expected = wp.loc[:,wp.major_axis>Timestamp('20000102')]
  2071. assert_panel_equal(result, expected)
  2072. with tm.assert_produces_warning(expected_warning=DeprecationWarning):
  2073. result = store.select('wp', [('major_axis','=',[datetime.datetime(2000,1,2,0,0),
  2074. datetime.datetime(2000,1,3,0,0)])])
  2075. expected = wp.loc[:,[Timestamp('20000102'),Timestamp('20000103')]]
  2076. assert_panel_equal(result, expected)
  2077. with tm.assert_produces_warning(expected_warning=DeprecationWarning):
  2078. result = store.select('wp', [('minor_axis','=',['A','B'])])
  2079. expected = wp.loc[:,:,['A','B']]
  2080. assert_panel_equal(result, expected)
  2081. def test_same_name_scoping(self):
  2082. with ensure_clean_store(self.path) as store:
  2083. import pandas as pd
  2084. df = DataFrame(np.random.randn(20, 2),index=pd.date_range('20130101',periods=20))
  2085. store.put('df', df, table=True)
  2086. expected = df[df.index>pd.Timestamp('20130105')]
  2087. import datetime
  2088. result = store.select('df','index>datetime.datetime(2013,1,5)')
  2089. assert_frame_equal(result,expected)
  2090. from datetime import datetime
  2091. # technically an error, but allow it
  2092. result = store.select('df','index>datetime.datetime(2013,1,5)')
  2093. assert_frame_equal(result,expected)
  2094. result = store.select('df','index>datetime(2013,1,5)')
  2095. assert_frame_equal(result,expected)
  2096. def test_series(self):
  2097. s = tm.makeStringSeries()
  2098. self._check_roundtrip(s, tm.assert_series_equal)
  2099. ts = tm.makeTimeSeries()
  2100. self._check_roundtrip(ts, tm.assert_series_equal)
  2101. ts2 = Series(ts.index, Index(ts.index, dtype=object))
  2102. self._check_roundtrip(ts2, tm.assert_series_equal)
  2103. ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object),
  2104. dtype=object))
  2105. self._check_roundtrip(ts3, tm.assert_series_equal)
  2106. def test_sparse_series(self):
  2107. s = tm.makeStringSeries()
  2108. s[3:5] = np.nan
  2109. ss = s.to_sparse()
  2110. self._check_roundtrip(ss, tm.assert_series_equal,
  2111. check_series_type=True)
  2112. ss2 = s.to_sparse(kind='integer')
  2113. self._check_roundtrip(ss2, tm.assert_series_equal,
  2114. check_series_type=True)
  2115. ss3 = s.to_sparse(fill_value=0)
  2116. self._check_roundtrip(ss3, tm.assert_series_equal,
  2117. check_series_type=True)
  2118. def test_sparse_frame(self):
  2119. s = tm.makeDataFrame()
  2120. s.ix[3:5, 1:3] = np.nan
  2121. s.ix[8:10, -2] = np.nan
  2122. ss = s.to_sparse()
  2123. self._check_double_roundtrip(ss, tm.assert_frame_equal,
  2124. check_frame_type=True)
  2125. ss2 = s.to_sparse(kind='integer')
  2126. self._check_double_roundtrip(ss2, tm.assert_frame_equal,
  2127. check_frame_type=True)
  2128. ss3 = s.to_sparse(fill_value=0)
  2129. self._check_double_roundtrip(ss3, tm.assert_frame_equal,
  2130. check_frame_type=True)
  2131. def test_sparse_panel(self):
  2132. items = ['x', 'y', 'z']
  2133. p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items))
  2134. sp = p.to_sparse()
  2135. self._check_double_roundtrip(sp, assert_panel_equal,
  2136. check_panel_type=True)
  2137. sp2 = p.to_sparse(kind='integer')
  2138. self._check_double_roundtrip(sp2, assert_panel_equal,
  2139. check_panel_type=True)
  2140. sp3 = p.to_sparse(fill_value=0)
  2141. self._check_double_roundtrip(sp3, assert_panel_equal,
  2142. check_panel_type=True)
  2143. def test_float_index(self):
  2144. # GH #454
  2145. index = np.random.randn(10)
  2146. s = Series(np.random.randn(10), index=index)
  2147. self._check_roundtrip(s, tm.assert_series_equal)
  2148. def test_tuple_index(self):
  2149. # GH #492
  2150. col = np.arange(10)
  2151. idx = [(0., 1.), (2., 3.), (4., 5.)]
  2152. data = np.random.randn(30).reshape((3, 10))
  2153. DF = DataFrame(data, index=idx, columns=col)
  2154. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  2155. self._check_roundtrip(DF, tm.assert_frame_equal)
  2156. def test_index_types(self):
  2157. values = np.random.randn(2)
  2158. func = lambda l, r: tm.assert_series_equal(l, r,
  2159. check_dtype=True,
  2160. check_index_type=True,
  2161. check_series_type=True)
  2162. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  2163. ser = Series(values, [0, 'y'])
  2164. self._check_roundtrip(ser, func)
  2165. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  2166. ser = Series(values, [datetime.datetime.today(), 0])
  2167. self._check_roundtrip(ser, func)
  2168. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  2169. ser = Series(values, ['y', 0])
  2170. self._check_roundtrip(ser, func)
  2171. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  2172. ser = Series(values, [datetime.date.today(), 'a'])
  2173. self._check_roundtrip(ser, func)
  2174. with tm.assert_produces_warning(expected_warning=PerformanceWarning):
  2175. ser = Series(values, [1.23, 'b'])
  2176. self._check_roundtrip(ser, func)
  2177. ser = Series(values, [1, 1.53])
  2178. self._check_roundtrip(ser, func)
  2179. ser = Series(values, [1, 5])
  2180. self._check_roundtrip(ser, func)
  2181. ser = Series(values, [datetime.datetime(
  2182. 2012, 1, 1), datetime.datetime(2012, 1, 2)])
  2183. self._check_roundtrip(ser, func)
  2184. def test_timeseries_preepoch(self):
  2185. if sys.version_info[0] == 2 and sys.version_info[1] < 7:
  2186. raise nose.SkipTest("won't work on Python < 2.7")
  2187. dr = bdate_range('1/1/1940', '1/1/1960')
  2188. ts = Series(np.random.randn(len(dr)), index=dr)
  2189. try:
  2190. self._check_roundtrip(ts, tm.assert_series_equal)
  2191. except OverflowError:
  2192. raise nose.SkipTest('known failer on some windows platforms')
  2193. def test_frame(self):
  2194. df = tm.makeDataFrame()
  2195. # put in some random NAs
  2196. df.values[0, 0] = np.nan
  2197. df.values[5, 3] = np.nan
  2198. self._check_roundtrip_table(df, tm.assert_frame_equal)
  2199. self._check_roundtrip(df, tm.assert_frame_equal)
  2200. self._check_roundtrip_table(df, tm.assert_frame_equal,
  2201. compression=True)
  2202. self._check_roundtrip(df, tm.assert_frame_equal,
  2203. compression=True)
  2204. tdf = tm.makeTimeDataFrame()
  2205. self._check_roundtrip(tdf, tm.assert_frame_equal)
  2206. self._check_roundtrip(tdf, tm.assert_frame_equal,
  2207. compression=True)
  2208. with ensure_clean_store(self.path) as store:
  2209. # not consolidated
  2210. df['foo'] = np.random.randn(len(df))
  2211. store['df'] = df
  2212. recons = store['df']
  2213. self.assertTrue(recons._data.is_consolidated())
  2214. # empty
  2215. self._check_roundtrip(df[:0], tm.assert_frame_equal)
  2216. def test_empty_series_frame(self):
  2217. s0 = Series()
  2218. s1 = Series(name='myseries')
  2219. df0 = DataFrame()
  2220. df1 = DataFrame(index=['a', 'b', 'c'])
  2221. df2 = DataFrame(columns=['d', 'e', 'f'])
  2222. self._check_roundtrip(s0, tm.assert_series_equal)
  2223. self._check_roundtrip(s1, tm.assert_series_equal)
  2224. self._check_roundtrip(df0, tm.assert_frame_equal)
  2225. self._check_roundtrip(df1, tm.assert_frame_equal)
  2226. self._check_roundtrip(df2, tm.assert_frame_equal)
  2227. def test_empty_series(self):
  2228. for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]']:
  2229. s = Series(dtype=dtype)
  2230. self._check_roundtrip(s, tm.assert_series_equal)
  2231. def test_can_serialize_dates(self):
  2232. rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]
  2233. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  2234. self._check_roundtrip(frame, tm.assert_frame_equal)
  2235. def test_timezones(self):
  2236. rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
  2237. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  2238. with ensure_clean_store(self.path) as store:
  2239. store['frame'] = frame
  2240. recons = store['frame']
  2241. self.assertTrue(recons.index.equals(rng))
  2242. self.assertEqual(rng.tz, recons.index.tz)
  2243. def test_fixed_offset_tz(self):
  2244. rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
  2245. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  2246. with ensure_clean_store(self.path) as store:
  2247. store['frame'] = frame
  2248. recons = store['frame']
  2249. self.assertTrue(recons.index.equals(rng))
  2250. self.assertEqual(rng.tz, recons.index.tz)
  2251. def test_store_hierarchical(self):
  2252. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  2253. ['one', 'two', 'three']],
  2254. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  2255. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  2256. names=['foo', 'bar'])
  2257. frame = DataFrame(np.random.randn(10, 3), index=index,
  2258. columns=['A', 'B', 'C'])
  2259. self._check_roundtrip(frame, tm.assert_frame_equal)
  2260. self._check_roundtrip(frame.T, tm.assert_frame_equal)
  2261. self._check_roundtrip(frame['A'], tm.assert_series_equal)
  2262. # check that the names are stored
  2263. with ensure_clean_store(self.path) as store:
  2264. store['frame'] = frame
  2265. recons = store['frame']
  2266. assert(recons.index.names == ('foo', 'bar'))
  2267. def test_store_index_name(self):
  2268. df = tm.makeDataFrame()
  2269. df.index.name = 'foo'
  2270. with ensure_clean_store(self.path) as store:
  2271. store['frame'] = df
  2272. recons = store['frame']
  2273. assert(recons.index.name == 'foo')
  2274. def test_store_series_name(self):
  2275. df = tm.makeDataFrame()
  2276. series = df['A']
  2277. with ensure_clean_store(self.path) as store:
  2278. store['series'] = series
  2279. recons = store['series']
  2280. assert(recons.name == 'A')
  2281. def test_store_mixed(self):
  2282. def _make_one():
  2283. df = tm.makeDataFrame()
  2284. df['obj1'] = 'foo'
  2285. df['obj2'] = 'bar'
  2286. df['bool1'] = df['A'] > 0
  2287. df['bool2'] = df['B'] > 0
  2288. df['int1'] = 1
  2289. df['int2'] = 2
  2290. return df.consolidate()
  2291. df1 = _make_one()
  2292. df2 = _make_one()
  2293. self._check_roundtrip(df1, tm.assert_frame_equal)
  2294. self._check_roundtrip(df2, tm.assert_frame_equal)
  2295. with ensure_clean_store(self.path) as store:
  2296. store['obj'] = df1
  2297. tm.assert_frame_equal(store['obj'], df1)
  2298. store['obj'] = df2
  2299. tm.assert_frame_equal(store['obj'], df2)
  2300. # check that can store Series of all of these types
  2301. self._check_roundtrip(df1['obj1'], tm.assert_series_equal)
  2302. self._check_roundtrip(df1['bool1'], tm.assert_series_equal)
  2303. self._check_roundtrip(df1['int1'], tm.assert_series_equal)
  2304. # try with compression
  2305. self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
  2306. compression=True)
  2307. self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
  2308. compression=True)
  2309. self._check_roundtrip(df1['int1'], tm.assert_series_equal,
  2310. compression=True)
  2311. self._check_roundtrip(df1, tm.assert_frame_equal,
  2312. compression=True)
  2313. def test_wide(self):
  2314. wp = tm.makePanel()
  2315. self._check_roundtrip(wp, assert_panel_equal)
  2316. def test_wide_table(self):
  2317. wp = tm.makePanel()
  2318. self._check_roundtrip_table(wp, assert_panel_equal)
  2319. def test_select_with_dups(self):
  2320. # single dtypes
  2321. df = DataFrame(np.random.randn(10,4),columns=['A','A','B','B'])
  2322. df.index = date_range('20130101 9:30',periods=10,freq='T')
  2323. with ensure_clean_store(self.path) as store:
  2324. store.append('df',df)
  2325. result = store.select('df')
  2326. expected = df
  2327. assert_frame_equal(result,expected,by_blocks=True)
  2328. result = store.select('df',columns=df.columns)
  2329. expected = df
  2330. assert_frame_equal(result,expected,by_blocks=True)
  2331. result = store.select('df',columns=['A'])
  2332. expected = df.loc[:,['A']]
  2333. assert_frame_equal(result,expected)
  2334. # dups accross dtypes
  2335. df = concat([DataFrame(np.random.randn(10,4),columns=['A','A','B','B']),
  2336. DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C'])],
  2337. axis=1)
  2338. df.index = date_range('20130101 9:30',periods=10,freq='T')
  2339. with ensure_clean_store(self.path) as store:
  2340. store.append('df',df)
  2341. result = store.select('df')
  2342. expected = df
  2343. assert_frame_equal(result,expected,by_blocks=True)
  2344. result = store.select('df',columns=df.columns)
  2345. expected = df
  2346. assert_frame_equal(result,expected,by_blocks=True)
  2347. expected = df.loc[:,['A']]
  2348. result = store.select('df',columns=['A'])
  2349. assert_frame_equal(result,expected,by_blocks=True)
  2350. expected = df.loc[:,['B','A']]
  2351. result = store.select('df',columns=['B','A'])
  2352. assert_frame_equal(result,expected,by_blocks=True)
  2353. # duplicates on both index and columns
  2354. with ensure_clean_store(self.path) as store:
  2355. store.append('df',df)
  2356. store.append('df',df)
  2357. expected = df.loc[:,['B','A']]
  2358. expected = concat([expected, expected])
  2359. result = store.select('df',columns=['B','A'])
  2360. assert_frame_equal(result,expected,by_blocks=True)
  2361. def test_wide_table_dups(self):
  2362. wp = tm.makePanel()
  2363. with ensure_clean_store(self.path) as store:
  2364. store.put('panel', wp, format='table')
  2365. store.put('panel', wp, format='table', append=True)
  2366. with tm.assert_produces_warning(expected_warning=DuplicateWarning):
  2367. recons = store['panel']
  2368. assert_panel_equal(recons, wp)
  2369. def test_long(self):
  2370. def _check(left, right):
  2371. assert_panel_equal(left.to_panel(), right.to_panel())
  2372. wp = tm.makePanel()
  2373. self._check_roundtrip(wp.to_frame(), _check)
  2374. # empty
  2375. # self._check_roundtrip(wp.to_frame()[:0], _check)
  2376. def test_longpanel(self):
  2377. pass
  2378. def test_overwrite_node(self):
  2379. with ensure_clean_store(self.path) as store:
  2380. store['a'] = tm.makeTimeDataFrame()
  2381. ts = tm.makeTimeSeries()
  2382. store['a'] = ts
  2383. tm.assert_series_equal(store['a'], ts)
  2384. def test_sparse_with_compression(self):
  2385. # GH 2931
  2386. # make sparse dataframe
  2387. df = DataFrame(np.random.binomial(n=1, p=.01, size=(1e3, 10))).to_sparse(fill_value=0)
  2388. # case 1: store uncompressed
  2389. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2390. compression = False,
  2391. check_frame_type=True)
  2392. # case 2: store compressed (works)
  2393. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2394. compression = 'zlib',
  2395. check_frame_type=True)
  2396. # set one series to be completely sparse
  2397. df[0] = np.zeros(1e3)
  2398. # case 3: store df with completely sparse series uncompressed
  2399. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2400. compression = False,
  2401. check_frame_type=True)
  2402. # case 4: try storing df with completely sparse series compressed (fails)
  2403. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2404. compression = 'zlib',
  2405. check_frame_type=True)
  2406. def test_select(self):
  2407. wp = tm.makePanel()
  2408. with ensure_clean_store(self.path) as store:
  2409. # put/select ok
  2410. _maybe_remove(store, 'wp')
  2411. store.put('wp', wp, format='table')
  2412. store.select('wp')
  2413. # non-table ok (where = None)
  2414. _maybe_remove(store, 'wp')
  2415. store.put('wp2', wp)
  2416. store.select('wp2')
  2417. # selection on the non-indexable with a large number of columns
  2418. wp = Panel(
  2419. np.random.randn(100, 100, 100), items=['Item%03d' % i for i in range(100)],
  2420. major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in range(100)])
  2421. _maybe_remove(store, 'wp')
  2422. store.append('wp', wp)
  2423. items = ['Item%03d' % i for i in range(80)]
  2424. result = store.select('wp', Term('items=items'))
  2425. expected = wp.reindex(items=items)
  2426. assert_panel_equal(expected, result)
  2427. # selectin non-table with a where
  2428. # self.assertRaises(ValueError, store.select,
  2429. # 'wp2', ('column', ['A', 'D']))
  2430. # select with columns=
  2431. df = tm.makeTimeDataFrame()
  2432. _maybe_remove(store, 'df')
  2433. store.append('df', df)
  2434. result = store.select('df', columns=['A', 'B'])
  2435. expected = df.reindex(columns=['A', 'B'])
  2436. tm.assert_frame_equal(expected, result)
  2437. # equivalentsly
  2438. result = store.select('df', [("columns=['A', 'B']")])
  2439. expected = df.reindex(columns=['A', 'B'])
  2440. tm.assert_frame_equal(expected, result)
  2441. # with a data column
  2442. _maybe_remove(store, 'df')
  2443. store.append('df', df, data_columns=['A'])
  2444. result = store.select('df', ['A > 0'], columns=['A', 'B'])
  2445. expected = df[df.A > 0].reindex(columns=['A', 'B'])
  2446. tm.assert_frame_equal(expected, result)
  2447. # all a data columns
  2448. _maybe_remove(store, 'df')
  2449. store.append('df', df, data_columns=True)
  2450. result = store.select('df', ['A > 0'], columns=['A', 'B'])
  2451. expected = df[df.A > 0].reindex(columns=['A', 'B'])
  2452. tm.assert_frame_equal(expected, result)
  2453. # with a data column, but different columns
  2454. _maybe_remove(store, 'df')
  2455. store.append('df', df, data_columns=['A'])
  2456. result = store.select('df', ['A > 0'], columns=['C', 'D'])
  2457. expected = df[df.A > 0].reindex(columns=['C', 'D'])
  2458. tm.assert_frame_equal(expected, result)
  2459. def test_select_dtypes(self):
  2460. with ensure_clean_store(self.path) as store:
  2461. # with a Timestamp data column (GH #2637)
  2462. df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300)))
  2463. _maybe_remove(store, 'df')
  2464. store.append('df', df, data_columns=['ts', 'A'])
  2465. result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")])
  2466. expected = df[df.ts >= Timestamp('2012-02-01')]
  2467. tm.assert_frame_equal(expected, result)
  2468. # bool columns (GH #2849)
  2469. df = DataFrame(np.random.randn(5,2), columns =['A','B'])
  2470. df['object'] = 'foo'
  2471. df.ix[4:5,'object'] = 'bar'
  2472. df['boolv'] = df['A'] > 0
  2473. _maybe_remove(store, 'df')
  2474. store.append('df', df, data_columns = True)
  2475. expected = df[df.boolv == True].reindex(columns=['A','boolv'])
  2476. for v in [True,'true',1]:
  2477. result = store.select('df', Term('boolv == %s' % str(v)), columns = ['A','boolv'])
  2478. tm.assert_frame_equal(expected, result)
  2479. expected = df[df.boolv == False ].reindex(columns=['A','boolv'])
  2480. for v in [False,'false',0]:
  2481. result = store.select('df', Term('boolv == %s' % str(v)), columns = ['A','boolv'])
  2482. tm.assert_frame_equal(expected, result)
  2483. # integer index
  2484. df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
  2485. _maybe_remove(store, 'df_int')
  2486. store.append('df_int', df)
  2487. result = store.select(
  2488. 'df_int', [Term("index<10"), Term("columns=['A']")])
  2489. expected = df.reindex(index=list(df.index)[0:10],columns=['A'])
  2490. tm.assert_frame_equal(expected, result)
  2491. # float index
  2492. df = DataFrame(dict(A=np.random.rand(
  2493. 20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
  2494. _maybe_remove(store, 'df_float')
  2495. store.append('df_float', df)
  2496. result = store.select(
  2497. 'df_float', [Term("index<10.0"), Term("columns=['A']")])
  2498. expected = df.reindex(index=list(df.index)[0:10],columns=['A'])
  2499. tm.assert_frame_equal(expected, result)
  2500. with ensure_clean_store(self.path) as store:
  2501. # floats w/o NaN
  2502. df = DataFrame(dict(cols = range(11), values = range(11)),dtype='float64')
  2503. df['cols'] = (df['cols']+10).apply(str)
  2504. store.append('df1',df,data_columns=True)
  2505. result = store.select(
  2506. 'df1', where='values>2.0')
  2507. expected = df[df['values']>2.0]
  2508. tm.assert_frame_equal(expected, result)
  2509. # floats with NaN
  2510. df.iloc[0] = np.nan
  2511. expected = df[df['values']>2.0]
  2512. store.append('df2',df,data_columns=True,index=False)
  2513. result = store.select(
  2514. 'df2', where='values>2.0')
  2515. tm.assert_frame_equal(expected, result)
  2516. # https://github.com/PyTables/PyTables/issues/282
  2517. # bug in selection when 0th row has a np.nan and an index
  2518. #store.append('df3',df,data_columns=True)
  2519. #result = store.select(
  2520. # 'df3', where='values>2.0')
  2521. #tm.assert_frame_equal(expected, result)
  2522. # not in first position float with NaN ok too
  2523. df = DataFrame(dict(cols = range(11), values = range(11)),dtype='float64')
  2524. df['cols'] = (df['cols']+10).apply(str)
  2525. df.iloc[1] = np.nan
  2526. expected = df[df['values']>2.0]
  2527. store.append('df4',df,data_columns=True)
  2528. result = store.select(
  2529. 'df4', where='values>2.0')
  2530. tm.assert_frame_equal(expected, result)
  2531. def test_select_with_many_inputs(self):
  2532. with ensure_clean_store(self.path) as store:
  2533. df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300),
  2534. A=np.random.randn(300),
  2535. B=range(300),
  2536. users = ['a']*50 + ['b']*50 + ['c']*100 + ['a%03d' % i for i in range(100)]))
  2537. _maybe_remove(store, 'df')
  2538. store.append('df', df, data_columns=['ts', 'A', 'B', 'users'])
  2539. # regular select
  2540. result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")])
  2541. expected = df[df.ts >= Timestamp('2012-02-01')]
  2542. tm.assert_frame_equal(expected, result)
  2543. # small selector
  2544. result = store.select('df', [Term("ts>=Timestamp('2012-02-01') & users=['a','b','c']")])
  2545. expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(['a','b','c']) ]
  2546. tm.assert_frame_equal(expected, result)
  2547. # big selector along the columns
  2548. selector = [ 'a','b','c' ] + [ 'a%03d' % i for i in range(60) ]
  2549. result = store.select('df', [Term("ts>=Timestamp('2012-02-01')"),Term('users=selector')])
  2550. expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector) ]
  2551. tm.assert_frame_equal(expected, result)
  2552. selector = range(100,200)
  2553. result = store.select('df', [Term('B=selector')])
  2554. expected = df[ df.B.isin(selector) ]
  2555. tm.assert_frame_equal(expected, result)
  2556. self.assertEqual(len(result), 100)
  2557. # big selector along the index
  2558. selector = Index(df.ts[0:100].values)
  2559. result = store.select('df', [Term('ts=selector')])
  2560. expected = df[ df.ts.isin(selector.values) ]
  2561. tm.assert_frame_equal(expected, result)
  2562. self.assertEqual(len(result), 100)
  2563. def test_select_iterator(self):
  2564. # single table
  2565. with ensure_clean_store(self.path) as store:
  2566. df = tm.makeTimeDataFrame(500)
  2567. _maybe_remove(store, 'df')
  2568. store.append('df', df)
  2569. expected = store.select('df')
  2570. results = []
  2571. for s in store.select('df',iterator=True):
  2572. results.append(s)
  2573. result = concat(results)
  2574. tm.assert_frame_equal(expected, result)
  2575. results = []
  2576. for s in store.select('df',chunksize=100):
  2577. results.append(s)
  2578. self.assertEqual(len(results), 5)
  2579. result = concat(results)
  2580. tm.assert_frame_equal(expected, result)
  2581. results = []
  2582. for s in store.select('df',chunksize=150):
  2583. results.append(s)
  2584. result = concat(results)
  2585. tm.assert_frame_equal(result, expected)
  2586. with ensure_clean_path(self.path) as path:
  2587. df = tm.makeTimeDataFrame(500)
  2588. df.to_hdf(path,'df_non_table')
  2589. self.assertRaises(TypeError, read_hdf, path,'df_non_table',chunksize=100)
  2590. self.assertRaises(TypeError, read_hdf, path,'df_non_table',iterator=True)
  2591. with ensure_clean_path(self.path) as path:
  2592. df = tm.makeTimeDataFrame(500)
  2593. df.to_hdf(path,'df',format='table')
  2594. results = []
  2595. for x in read_hdf(path,'df',chunksize=100):
  2596. results.append(x)
  2597. self.assertEqual(len(results), 5)
  2598. result = concat(results)
  2599. tm.assert_frame_equal(result, df)
  2600. tm.assert_frame_equal(result, read_hdf(path,'df'))
  2601. # multiple
  2602. with ensure_clean_store(self.path) as store:
  2603. df1 = tm.makeTimeDataFrame(500)
  2604. store.append('df1',df1,data_columns=True)
  2605. df2 = tm.makeTimeDataFrame(500).rename(columns=lambda x: "%s_2" % x)
  2606. df2['foo'] = 'bar'
  2607. store.append('df2',df2)
  2608. df = concat([df1, df2], axis=1)
  2609. # full selection
  2610. expected = store.select_as_multiple(
  2611. ['df1', 'df2'], selector='df1')
  2612. results = []
  2613. for s in store.select_as_multiple(
  2614. ['df1', 'df2'], selector='df1', chunksize=150):
  2615. results.append(s)
  2616. result = concat(results)
  2617. tm.assert_frame_equal(expected, result)
  2618. # where selection
  2619. #expected = store.select_as_multiple(
  2620. # ['df1', 'df2'], where= Term('A>0'), selector='df1')
  2621. #results = []
  2622. #for s in store.select_as_multiple(
  2623. # ['df1', 'df2'], where= Term('A>0'), selector='df1', chunksize=25):
  2624. # results.append(s)
  2625. #result = concat(results)
  2626. #tm.assert_frame_equal(expected, result)
  2627. def test_retain_index_attributes(self):
  2628. # GH 3499, losing frequency info on index recreation
  2629. df = DataFrame(dict(A = Series(lrange(3),
  2630. index=date_range('2000-1-1',periods=3,freq='H'))))
  2631. with ensure_clean_store(self.path) as store:
  2632. _maybe_remove(store,'data')
  2633. store.put('data', df, format='table')
  2634. result = store.get('data')
  2635. tm.assert_frame_equal(df,result)
  2636. for attr in ['freq','tz','name']:
  2637. for idx in ['index','columns']:
  2638. self.assertEqual(getattr(getattr(df,idx),attr,None),
  2639. getattr(getattr(result,idx),attr,None))
  2640. # try to append a table with a different frequency
  2641. with tm.assert_produces_warning(expected_warning=AttributeConflictWarning):
  2642. df2 = DataFrame(dict(A = Series(lrange(3),
  2643. index=date_range('2002-1-1',periods=3,freq='D'))))
  2644. store.append('data',df2)
  2645. self.assertIsNone(store.get_storer('data').info['index']['freq'])
  2646. # this is ok
  2647. _maybe_remove(store,'df2')
  2648. df2 = DataFrame(dict(A = Series(lrange(3),
  2649. index=[Timestamp('20010101'),Timestamp('20010102'),Timestamp('20020101')])))
  2650. store.append('df2',df2)
  2651. df3 = DataFrame(dict(A = Series(lrange(3),index=date_range('2002-1-1',periods=3,freq='D'))))
  2652. store.append('df2',df3)
  2653. def test_retain_index_attributes2(self):
  2654. with ensure_clean_path(self.path) as path:
  2655. with tm.assert_produces_warning(expected_warning=AttributeConflictWarning):
  2656. df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H'))))
  2657. df.to_hdf(path,'data',mode='w',append=True)
  2658. df2 = DataFrame(dict(A = Series(lrange(3), index=date_range('2002-1-1',periods=3,freq='D'))))
  2659. df2.to_hdf(path,'data',append=True)
  2660. idx = date_range('2000-1-1',periods=3,freq='H')
  2661. idx.name = 'foo'
  2662. df = DataFrame(dict(A = Series(lrange(3), index=idx)))
  2663. df.to_hdf(path,'data',mode='w',append=True)
  2664. self.assertEqual(read_hdf(path,'data').index.name, 'foo')
  2665. with tm.assert_produces_warning(expected_warning=AttributeConflictWarning):
  2666. idx2 = date_range('2001-1-1',periods=3,freq='H')
  2667. idx2.name = 'bar'
  2668. df2 = DataFrame(dict(A = Series(lrange(3), index=idx2)))
  2669. df2.to_hdf(path,'data',append=True)
  2670. self.assertIsNone(read_hdf(path,'data').index.name)
  2671. def test_panel_select(self):
  2672. wp = tm.makePanel()
  2673. with ensure_clean_store(self.path) as store:
  2674. store.put('wp', wp, format='table')
  2675. date = wp.major_axis[len(wp.major_axis) // 2]
  2676. crit1 = ('major_axis>=date')
  2677. crit2 = ("minor_axis=['A', 'D']")
  2678. result = store.select('wp', [crit1, crit2])
  2679. expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
  2680. assert_panel_equal(result, expected)
  2681. result = store.select(
  2682. 'wp', ['major_axis>="20000124"', ("minor_axis=['A', 'B']")])
  2683. expected = wp.truncate(before='20000124').reindex(minor=['A', 'B'])
  2684. assert_panel_equal(result, expected)
  2685. def test_frame_select(self):
  2686. df = tm.makeTimeDataFrame()
  2687. with ensure_clean_store(self.path) as store:
  2688. store.put('frame', df,format='table')
  2689. date = df.index[len(df) // 2]
  2690. crit1 = Term('index>=date')
  2691. self.assertEqual(crit1.env.scope['date'], date)
  2692. crit2 = ("columns=['A', 'D']")
  2693. crit3 = ('columns=A')
  2694. result = store.select('frame', [crit1, crit2])
  2695. expected = df.ix[date:, ['A', 'D']]
  2696. tm.assert_frame_equal(result, expected)
  2697. result = store.select('frame', [crit3])
  2698. expected = df.ix[:, ['A']]
  2699. tm.assert_frame_equal(result, expected)
  2700. # invalid terms
  2701. df = tm.makeTimeDataFrame()
  2702. store.append('df_time', df)
  2703. self.assertRaises(
  2704. ValueError, store.select, 'df_time', [Term("index>0")])
  2705. # can't select if not written as table
  2706. # store['frame'] = df
  2707. # self.assertRaises(ValueError, store.select,
  2708. # 'frame', [crit1, crit2])
  2709. def test_frame_select_complex(self):
  2710. # select via complex criteria
  2711. df = tm.makeTimeDataFrame()
  2712. df['string'] = 'foo'
  2713. df.loc[df.index[0:4],'string'] = 'bar'
  2714. with ensure_clean_store(self.path) as store:
  2715. store.put('df', df, table=True, data_columns=['string'])
  2716. # empty
  2717. result = store.select('df', 'index>df.index[3] & string="bar"')
  2718. expected = df.loc[(df.index>df.index[3]) & (df.string=='bar')]
  2719. tm.assert_frame_equal(result, expected)
  2720. result = store.select('df', 'index>df.index[3] & string="foo"')
  2721. expected = df.loc[(df.index>df.index[3]) & (df.string=='foo')]
  2722. tm.assert_frame_equal(result, expected)
  2723. # or
  2724. result = store.select('df', 'index>df.index[3] | string="bar"')
  2725. expected = df.loc[(df.index>df.index[3]) | (df.string=='bar')]
  2726. tm.assert_frame_equal(result, expected)
  2727. result = store.select('df', '(index>df.index[3] & index<=df.index[6]) | string="bar"')
  2728. expected = df.loc[((df.index>df.index[3]) & (df.index<=df.index[6])) | (df.string=='bar')]
  2729. tm.assert_frame_equal(result, expected)
  2730. # invert
  2731. result = store.select('df', 'string!="bar"')
  2732. expected = df.loc[df.string!='bar']
  2733. tm.assert_frame_equal(result, expected)
  2734. # invert not implemented in numexpr :(
  2735. self.assertRaises(NotImplementedError, store.select, 'df', '~(string="bar")')
  2736. # invert ok for filters
  2737. result = store.select('df', "~(columns=['A','B'])")
  2738. expected = df.loc[:,df.columns-['A','B']]
  2739. tm.assert_frame_equal(result, expected)
  2740. # in
  2741. result = store.select('df', "index>df.index[3] & columns in ['A','B']")
  2742. expected = df.loc[df.index>df.index[3]].reindex(columns=['A','B'])
  2743. tm.assert_frame_equal(result, expected)
  2744. def test_frame_select_complex2(self):
  2745. with ensure_clean_path(['parms.hdf','hist.hdf']) as paths:
  2746. pp, hh = paths
  2747. # use non-trivial selection criteria
  2748. parms = DataFrame({ 'A' : [1,1,2,2,3] })
  2749. parms.to_hdf(pp,'df',mode='w',format='table',data_columns=['A'])
  2750. selection = read_hdf(pp,'df',where='A=[2,3]')
  2751. hist = DataFrame(np.random.randn(25,1),columns=['data'],
  2752. index=MultiIndex.from_tuples([ (i,j) for i in range(5) for j in range(5) ],
  2753. names=['l1','l2']))
  2754. hist.to_hdf(hh,'df',mode='w',format='table')
  2755. expected = read_hdf(hh,'df',where=Term('l1','=',[2,3,4]))
  2756. # list like
  2757. result = read_hdf(hh,'df',where=Term('l1','=',selection.index.tolist()))
  2758. assert_frame_equal(result, expected)
  2759. l = selection.index.tolist()
  2760. # sccope with list like
  2761. store = HDFStore(hh)
  2762. result = store.select('df',where='l1=l')
  2763. assert_frame_equal(result, expected)
  2764. store.close()
  2765. result = read_hdf(hh,'df',where='l1=l')
  2766. assert_frame_equal(result, expected)
  2767. # index
  2768. index = selection.index
  2769. result = read_hdf(hh,'df',where='l1=index')
  2770. assert_frame_equal(result, expected)
  2771. result = read_hdf(hh,'df',where='l1=selection.index')
  2772. assert_frame_equal(result, expected)
  2773. result = read_hdf(hh,'df',where='l1=selection.index.tolist()')
  2774. assert_frame_equal(result, expected)
  2775. result = read_hdf(hh,'df',where='l1=list(selection.index)')
  2776. assert_frame_equal(result, expected)
  2777. # sccope with index
  2778. store = HDFStore(hh)
  2779. result = store.select('df',where='l1=index')
  2780. assert_frame_equal(result, expected)
  2781. result = store.select('df',where='l1=selection.index')
  2782. assert_frame_equal(result, expected)
  2783. result = store.select('df',where='l1=selection.index.tolist()')
  2784. assert_frame_equal(result, expected)
  2785. result = store.select('df',where='l1=list(selection.index)')
  2786. assert_frame_equal(result, expected)
  2787. store.close()
  2788. def test_invalid_filtering(self):
  2789. # can't use more than one filter (atm)
  2790. df = tm.makeTimeDataFrame()
  2791. with ensure_clean_store(self.path) as store:
  2792. store.put('df', df, table=True)
  2793. # not implemented
  2794. self.assertRaises(NotImplementedError, store.select, 'df', "columns=['A'] | columns=['B']")
  2795. # in theory we could deal with this
  2796. self.assertRaises(NotImplementedError, store.select, 'df', "columns=['A','B'] & columns=['C']")
  2797. def test_string_select(self):
  2798. # GH 2973
  2799. with ensure_clean_store(self.path) as store:
  2800. df = tm.makeTimeDataFrame()
  2801. # test string ==/!=
  2802. df['x'] = 'none'
  2803. df.ix[2:7,'x'] = ''
  2804. store.append('df',df,data_columns=['x'])
  2805. result = store.select('df',Term('x=none'))
  2806. expected = df[df.x == 'none']
  2807. assert_frame_equal(result,expected)
  2808. try:
  2809. result = store.select('df',Term('x!=none'))
  2810. expected = df[df.x != 'none']
  2811. assert_frame_equal(result,expected)
  2812. except Exception as detail:
  2813. com.pprint_thing("[{0}]".format(detail))
  2814. com.pprint_thing(store)
  2815. com.pprint_thing(expected)
  2816. df2 = df.copy()
  2817. df2.loc[df2.x=='','x'] = np.nan
  2818. store.append('df2',df2,data_columns=['x'])
  2819. result = store.select('df2',Term('x!=none'))
  2820. expected = df2[isnull(df2.x)]
  2821. assert_frame_equal(result,expected)
  2822. # int ==/!=
  2823. df['int'] = 1
  2824. df.ix[2:7,'int'] = 2
  2825. store.append('df3',df,data_columns=['int'])
  2826. result = store.select('df3',Term('int=2'))
  2827. expected = df[df.int==2]
  2828. assert_frame_equal(result,expected)
  2829. result = store.select('df3',Term('int!=2'))
  2830. expected = df[df.int!=2]
  2831. assert_frame_equal(result,expected)
  2832. def test_read_column(self):
  2833. df = tm.makeTimeDataFrame()
  2834. with ensure_clean_store(self.path) as store:
  2835. _maybe_remove(store, 'df')
  2836. store.append('df', df)
  2837. # error
  2838. self.assertRaises(KeyError, store.select_column, 'df', 'foo')
  2839. def f():
  2840. store.select_column('df', 'index', where = ['index>5'])
  2841. self.assertRaises(Exception, f)
  2842. # valid
  2843. result = store.select_column('df', 'index')
  2844. tm.assert_almost_equal(result.values, Series(df.index).values)
  2845. self.assertIsInstance(result,Series)
  2846. # not a data indexable column
  2847. self.assertRaises(
  2848. ValueError, store.select_column, 'df', 'values_block_0')
  2849. # a data column
  2850. df2 = df.copy()
  2851. df2['string'] = 'foo'
  2852. store.append('df2', df2, data_columns=['string'])
  2853. result = store.select_column('df2', 'string')
  2854. tm.assert_almost_equal(result.values, df2['string'].values)
  2855. # a data column with NaNs, result excludes the NaNs
  2856. df3 = df.copy()
  2857. df3['string'] = 'foo'
  2858. df3.ix[4:6, 'string'] = np.nan
  2859. store.append('df3', df3, data_columns=['string'])
  2860. result = store.select_column('df3', 'string')
  2861. tm.assert_almost_equal(result.values, df3['string'].values)
  2862. # start/stop
  2863. result = store.select_column('df3', 'string', start=2)
  2864. tm.assert_almost_equal(result.values, df3['string'].values[2:])
  2865. result = store.select_column('df3', 'string', start=-2)
  2866. tm.assert_almost_equal(result.values, df3['string'].values[-2:])
  2867. result = store.select_column('df3', 'string', stop=2)
  2868. tm.assert_almost_equal(result.values, df3['string'].values[:2])
  2869. result = store.select_column('df3', 'string', stop=-2)
  2870. tm.assert_almost_equal(result.values, df3['string'].values[:-2])
  2871. result = store.select_column('df3', 'string', start=2, stop=-2)
  2872. tm.assert_almost_equal(result.values, df3['string'].values[2:-2])
  2873. result = store.select_column('df3', 'string', start=-2, stop=2)
  2874. tm.assert_almost_equal(result.values, df3['string'].values[-2:2])
  2875. def test_coordinates(self):
  2876. df = tm.makeTimeDataFrame()
  2877. with ensure_clean_store(self.path) as store:
  2878. _maybe_remove(store, 'df')
  2879. store.append('df', df)
  2880. # all
  2881. c = store.select_as_coordinates('df')
  2882. assert((c.values == np.arange(len(df.index))).all() == True)
  2883. # get coordinates back & test vs frame
  2884. _maybe_remove(store, 'df')
  2885. df = DataFrame(dict(A=lrange(5), B=lrange(5)))
  2886. store.append('df', df)
  2887. c = store.select_as_coordinates('df', ['index<3'])
  2888. assert((c.values == np.arange(3)).all() == True)
  2889. result = store.select('df', where=c)
  2890. expected = df.ix[0:2, :]
  2891. tm.assert_frame_equal(result, expected)
  2892. c = store.select_as_coordinates('df', ['index>=3', 'index<=4'])
  2893. assert((c.values == np.arange(2) + 3).all() == True)
  2894. result = store.select('df', where=c)
  2895. expected = df.ix[3:4, :]
  2896. tm.assert_frame_equal(result, expected)
  2897. self.assertIsInstance(c, Index)
  2898. # multiple tables
  2899. _maybe_remove(store, 'df1')
  2900. _maybe_remove(store, 'df2')
  2901. df1 = tm.makeTimeDataFrame()
  2902. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  2903. store.append('df1', df1, data_columns=['A', 'B'])
  2904. store.append('df2', df2)
  2905. c = store.select_as_coordinates('df1', ['A>0', 'B>0'])
  2906. df1_result = store.select('df1', c)
  2907. df2_result = store.select('df2', c)
  2908. result = concat([df1_result, df2_result], axis=1)
  2909. expected = concat([df1, df2], axis=1)
  2910. expected = expected[(expected.A > 0) & (expected.B > 0)]
  2911. tm.assert_frame_equal(result, expected)
  2912. # pass array/mask as the coordinates
  2913. with ensure_clean_store(self.path) as store:
  2914. df = DataFrame(np.random.randn(1000,2),index=date_range('20000101',periods=1000))
  2915. store.append('df',df)
  2916. c = store.select_column('df','index')
  2917. where = c[DatetimeIndex(c).month==5].index
  2918. expected = df.iloc[where]
  2919. # locations
  2920. result = store.select('df',where=where)
  2921. tm.assert_frame_equal(result,expected)
  2922. # boolean
  2923. result = store.select('df',where=where)
  2924. tm.assert_frame_equal(result,expected)
  2925. # invalid
  2926. self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df),dtype='float64'))
  2927. self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)+1))
  2928. self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)),start=5)
  2929. self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)),start=5,stop=10)
  2930. # selection with filter
  2931. selection = date_range('20000101',periods=500)
  2932. result = store.select('df', where='index in selection')
  2933. expected = df[df.index.isin(selection)]
  2934. tm.assert_frame_equal(result,expected)
  2935. # list
  2936. df = DataFrame(np.random.randn(10,2))
  2937. store.append('df2',df)
  2938. result = store.select('df2',where=[0,3,5])
  2939. expected = df.iloc[[0,3,5]]
  2940. tm.assert_frame_equal(result,expected)
  2941. # boolean
  2942. where = [True] * 10
  2943. where[-2] = False
  2944. result = store.select('df2',where=where)
  2945. expected = df.loc[where]
  2946. tm.assert_frame_equal(result,expected)
  2947. # start/stop
  2948. result = store.select('df2', start=5, stop=10)
  2949. expected = df[5:10]
  2950. tm.assert_frame_equal(result,expected)
  2951. def test_append_to_multiple(self):
  2952. df1 = tm.makeTimeDataFrame()
  2953. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  2954. df2['foo'] = 'bar'
  2955. df = concat([df1, df2], axis=1)
  2956. with ensure_clean_store(self.path) as store:
  2957. # exceptions
  2958. self.assertRaises(ValueError, store.append_to_multiple,
  2959. {'df1': ['A', 'B'], 'df2': None}, df, selector='df3')
  2960. self.assertRaises(ValueError, store.append_to_multiple,
  2961. {'df1': None, 'df2': None}, df, selector='df3')
  2962. self.assertRaises(
  2963. ValueError, store.append_to_multiple, 'df1', df, 'df1')
  2964. # regular operation
  2965. store.append_to_multiple(
  2966. {'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
  2967. result = store.select_as_multiple(
  2968. ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
  2969. expected = df[(df.A > 0) & (df.B > 0)]
  2970. tm.assert_frame_equal(result, expected)
  2971. def test_append_to_multiple_dropna(self):
  2972. df1 = tm.makeTimeDataFrame()
  2973. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  2974. df1.ix[1, ['A', 'B']] = np.nan
  2975. df = concat([df1, df2], axis=1)
  2976. with ensure_clean_store(self.path) as store:
  2977. # dropna=True should guarantee rows are synchronized
  2978. store.append_to_multiple(
  2979. {'df1': ['A', 'B'], 'df2': None}, df, selector='df1',
  2980. dropna=True)
  2981. result = store.select_as_multiple(['df1', 'df2'])
  2982. expected = df.dropna()
  2983. tm.assert_frame_equal(result, expected)
  2984. tm.assert_index_equal(store.select('df1').index,
  2985. store.select('df2').index)
  2986. # dropna=False shouldn't synchronize row indexes
  2987. store.append_to_multiple(
  2988. {'df1': ['A', 'B'], 'df2': None}, df, selector='df1',
  2989. dropna=False)
  2990. self.assertRaises(
  2991. ValueError, store.select_as_multiple, ['df1', 'df2'])
  2992. assert not store.select('df1').index.equals(
  2993. store.select('df2').index)
  2994. def test_select_as_multiple(self):
  2995. df1 = tm.makeTimeDataFrame()
  2996. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  2997. df2['foo'] = 'bar'
  2998. with ensure_clean_store(self.path) as store:
  2999. # no tables stored
  3000. self.assertRaises(Exception, store.select_as_multiple,
  3001. None, where=['A>0', 'B>0'], selector='df1')
  3002. store.append('df1', df1, data_columns=['A', 'B'])
  3003. store.append('df2', df2)
  3004. # exceptions
  3005. self.assertRaises(Exception, store.select_as_multiple,
  3006. None, where=['A>0', 'B>0'], selector='df1')
  3007. self.assertRaises(Exception, store.select_as_multiple,
  3008. [None], where=['A>0', 'B>0'], selector='df1')
  3009. self.assertRaises(KeyError, store.select_as_multiple,
  3010. ['df1','df3'], where=['A>0', 'B>0'], selector='df1')
  3011. self.assertRaises(KeyError, store.select_as_multiple,
  3012. ['df3'], where=['A>0', 'B>0'], selector='df1')
  3013. self.assertRaises(KeyError, store.select_as_multiple,
  3014. ['df1','df2'], where=['A>0', 'B>0'], selector='df4')
  3015. # default select
  3016. result = store.select('df1', ['A>0', 'B>0'])
  3017. expected = store.select_as_multiple(
  3018. ['df1'], where=['A>0', 'B>0'], selector='df1')
  3019. tm.assert_frame_equal(result, expected)
  3020. expected = store.select_as_multiple(
  3021. 'df1', where=['A>0', 'B>0'], selector='df1')
  3022. tm.assert_frame_equal(result, expected)
  3023. # multiple
  3024. result = store.select_as_multiple(
  3025. ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
  3026. expected = concat([df1, df2], axis=1)
  3027. expected = expected[(expected.A > 0) & (expected.B > 0)]
  3028. tm.assert_frame_equal(result, expected)
  3029. # multiple (diff selector)
  3030. result = store.select_as_multiple(['df1', 'df2'], where=[Term(
  3031. 'index>df2.index[4]')], selector='df2')
  3032. expected = concat([df1, df2], axis=1)
  3033. expected = expected[5:]
  3034. tm.assert_frame_equal(result, expected)
  3035. # test excpection for diff rows
  3036. store.append('df3', tm.makeTimeDataFrame(nper=50))
  3037. self.assertRaises(ValueError, store.select_as_multiple,
  3038. ['df1','df3'], where=['A>0', 'B>0'], selector='df1')
  3039. def test_nan_selection_bug_4858(self):
  3040. # GH 4858; nan selection bug, only works for pytables >= 3.1
  3041. if LooseVersion(tables.__version__) < '3.1.0':
  3042. raise nose.SkipTest('tables version does not support fix for nan selection bug: GH 4858')
  3043. with ensure_clean_store(self.path) as store:
  3044. df = DataFrame(dict(cols = range(6), values = range(6)), dtype='float64')
  3045. df['cols'] = (df['cols']+10).apply(str)
  3046. df.iloc[0] = np.nan
  3047. expected = DataFrame(dict(cols = ['13.0','14.0','15.0'], values = [3.,4.,5.]), index=[3,4,5])
  3048. # write w/o the index on that particular column
  3049. store.append('df',df, data_columns=True,index=['cols'])
  3050. result = store.select('df',where='values>2.0')
  3051. assert_frame_equal(result,expected)
  3052. def test_start_stop(self):
  3053. with ensure_clean_store(self.path) as store:
  3054. df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
  3055. store.append('df', df)
  3056. result = store.select(
  3057. 'df', [Term("columns=['A']")], start=0, stop=5)
  3058. expected = df.ix[0:4, ['A']]
  3059. tm.assert_frame_equal(result, expected)
  3060. # out of range
  3061. result = store.select(
  3062. 'df', [Term("columns=['A']")], start=30, stop=40)
  3063. assert(len(result) == 0)
  3064. assert(type(result) == DataFrame)
  3065. def test_select_filter_corner(self):
  3066. df = DataFrame(np.random.randn(50, 100))
  3067. df.index = ['%.3d' % c for c in df.index]
  3068. df.columns = ['%.3d' % c for c in df.columns]
  3069. with ensure_clean_store(self.path) as store:
  3070. store.put('frame', df, format='table')
  3071. crit = Term('columns=df.columns[:75]')
  3072. result = store.select('frame', [crit])
  3073. tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
  3074. crit = Term('columns=df.columns[:75:2]')
  3075. result = store.select('frame', [crit])
  3076. tm.assert_frame_equal(result, df.ix[:, df.columns[:75:2]])
  3077. def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
  3078. options = {}
  3079. if compression:
  3080. options['complib'] = _default_compressor
  3081. with ensure_clean_store(self.path, 'w', **options) as store:
  3082. store['obj'] = obj
  3083. retrieved = store['obj']
  3084. comparator(retrieved, obj, **kwargs)
  3085. def _check_double_roundtrip(self, obj, comparator, compression=False,
  3086. **kwargs):
  3087. options = {}
  3088. if compression:
  3089. options['complib'] = compression or _default_compressor
  3090. with ensure_clean_store(self.path, 'w', **options) as store:
  3091. store['obj'] = obj
  3092. retrieved = store['obj']
  3093. comparator(retrieved, obj, **kwargs)
  3094. store['obj'] = retrieved
  3095. again = store['obj']
  3096. comparator(again, obj, **kwargs)
  3097. def _check_roundtrip_table(self, obj, comparator, compression=False):
  3098. options = {}
  3099. if compression:
  3100. options['complib'] = _default_compressor
  3101. with ensure_clean_store(self.path, 'w', **options) as store:
  3102. store.put('obj', obj, format='table')
  3103. retrieved = store['obj']
  3104. # sorted_obj = _test_sort(obj)
  3105. comparator(retrieved, obj)
  3106. def test_multiple_open_close(self):
  3107. # GH 4409, open & close multiple times
  3108. with ensure_clean_path(self.path) as path:
  3109. df = tm.makeDataFrame()
  3110. df.to_hdf(path,'df',mode='w',format='table')
  3111. # single
  3112. store = HDFStore(path)
  3113. self.assertNotIn('CLOSED', str(store))
  3114. self.assertTrue(store.is_open)
  3115. store.close()
  3116. self.assertIn('CLOSED', str(store))
  3117. self.assertFalse(store.is_open)
  3118. with ensure_clean_path(self.path) as path:
  3119. if pytables._table_file_open_policy_is_strict:
  3120. # multiples
  3121. store1 = HDFStore(path)
  3122. def f():
  3123. HDFStore(path)
  3124. self.assertRaises(ValueError, f)
  3125. store1.close()
  3126. else:
  3127. # multiples
  3128. store1 = HDFStore(path)
  3129. store2 = HDFStore(path)
  3130. self.assertNotIn('CLOSED', str(store1))
  3131. self.assertNotIn('CLOSED', str(store2))
  3132. self.assertTrue(store1.is_open)
  3133. self.assertTrue(store2.is_open)
  3134. store1.close()
  3135. self.assertIn('CLOSED', str(store1))
  3136. self.assertFalse(store1.is_open)
  3137. self.assertNotIn('CLOSED', str(store2))
  3138. self.assertTrue(store2.is_open)
  3139. store2.close()
  3140. self.assertIn('CLOSED', str(store1))
  3141. self.assertIn('CLOSED', str(store2))
  3142. self.assertFalse(store1.is_open)
  3143. self.assertFalse(store2.is_open)
  3144. # nested close
  3145. store = HDFStore(path,mode='w')
  3146. store.append('df',df)
  3147. store2 = HDFStore(path)
  3148. store2.append('df2',df)
  3149. store2.close()
  3150. self.assertIn('CLOSED', str(store2))
  3151. self.assertFalse(store2.is_open)
  3152. store.close()
  3153. self.assertIn('CLOSED', str(store))
  3154. self.assertFalse(store.is_open)
  3155. # double closing
  3156. store = HDFStore(path,mode='w')
  3157. store.append('df', df)
  3158. store2 = HDFStore(path)
  3159. store.close()
  3160. self.assertIn('CLOSED', str(store))
  3161. self.assertFalse(store.is_open)
  3162. store2.close()
  3163. self.assertIn('CLOSED', str(store2))
  3164. self.assertFalse(store2.is_open)
  3165. # ops on a closed store
  3166. with ensure_clean_path(self.path) as path:
  3167. df = tm.makeDataFrame()
  3168. df.to_hdf(path,'df',mode='w',format='table')
  3169. store = HDFStore(path)
  3170. store.close()
  3171. self.assertRaises(ClosedFileError, store.keys)
  3172. self.assertRaises(ClosedFileError, lambda : 'df' in store)
  3173. self.assertRaises(ClosedFileError, lambda : len(store))
  3174. self.assertRaises(ClosedFileError, lambda : store['df'])
  3175. self.assertRaises(ClosedFileError, lambda : store.df)
  3176. self.assertRaises(ClosedFileError, store.select, 'df')
  3177. self.assertRaises(ClosedFileError, store.get, 'df')
  3178. self.assertRaises(ClosedFileError, store.append, 'df2', df)
  3179. self.assertRaises(ClosedFileError, store.put, 'df3', df)
  3180. self.assertRaises(ClosedFileError, store.get_storer, 'df2')
  3181. self.assertRaises(ClosedFileError, store.remove, 'df2')
  3182. def f():
  3183. store.select('df')
  3184. tm.assertRaisesRegexp(ClosedFileError, 'file is not open', f)
  3185. def test_pytables_native_read(self):
  3186. try:
  3187. store = HDFStore(tm.get_data_path('legacy_hdf/pytables_native.h5'), 'r')
  3188. d2 = store['detector/readout']
  3189. assert isinstance(d2, DataFrame)
  3190. finally:
  3191. safe_close(store)
  3192. try:
  3193. store = HDFStore(tm.get_data_path('legacy_hdf/pytables_native2.h5'), 'r')
  3194. str(store)
  3195. d1 = store['detector']
  3196. assert isinstance(d1, DataFrame)
  3197. finally:
  3198. safe_close(store)
  3199. def test_legacy_read(self):
  3200. try:
  3201. store = HDFStore(tm.get_data_path('legacy_hdf/legacy.h5'), 'r')
  3202. store['a']
  3203. store['b']
  3204. store['c']
  3205. store['d']
  3206. finally:
  3207. safe_close(store)
  3208. def test_legacy_table_read(self):
  3209. # legacy table types
  3210. try:
  3211. store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table.h5'), 'r')
  3212. store.select('df1')
  3213. store.select('df2')
  3214. store.select('wp1')
  3215. # force the frame
  3216. store.select('df2', typ='legacy_frame')
  3217. # old version warning
  3218. with tm.assert_produces_warning(expected_warning=IncompatibilityWarning):
  3219. self.assertRaises(
  3220. Exception, store.select, 'wp1', Term('minor_axis=B'))
  3221. df2 = store.select('df2')
  3222. result = store.select('df2', Term('index>df2.index[2]'))
  3223. expected = df2[df2.index > df2.index[2]]
  3224. assert_frame_equal(expected, result)
  3225. finally:
  3226. safe_close(store)
  3227. def test_legacy_0_10_read(self):
  3228. # legacy from 0.10
  3229. try:
  3230. store = HDFStore(tm.get_data_path('legacy_hdf/legacy_0.10.h5'), 'r')
  3231. str(store)
  3232. for k in store.keys():
  3233. store.select(k)
  3234. finally:
  3235. safe_close(store)
  3236. def test_legacy_0_11_read(self):
  3237. # legacy from 0.11
  3238. try:
  3239. path = os.path.join('legacy_hdf', 'legacy_table_0.11.h5')
  3240. store = HDFStore(tm.get_data_path(path), 'r')
  3241. str(store)
  3242. assert 'df' in store
  3243. assert 'df1' in store
  3244. assert 'mi' in store
  3245. df = store.select('df')
  3246. df1 = store.select('df1')
  3247. mi = store.select('mi')
  3248. assert isinstance(df, DataFrame)
  3249. assert isinstance(df1, DataFrame)
  3250. assert isinstance(mi, DataFrame)
  3251. finally:
  3252. safe_close(store)
  3253. def test_copy(self):
  3254. def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
  3255. try:
  3256. if f is None:
  3257. f = tm.get_data_path(os.path.join('legacy_hdf',
  3258. 'legacy_0.10.h5'))
  3259. store = HDFStore(f, 'r')
  3260. if new_f is None:
  3261. import tempfile
  3262. fd, new_f = tempfile.mkstemp()
  3263. tstore = store.copy(new_f, keys = keys, propindexes = propindexes, **kwargs)
  3264. # check keys
  3265. if keys is None:
  3266. keys = store.keys()
  3267. self.assertEqual(set(keys), set(tstore.keys()))
  3268. # check indicies & nrows
  3269. for k in tstore.keys():
  3270. if tstore.get_storer(k).is_table:
  3271. new_t = tstore.get_storer(k)
  3272. orig_t = store.get_storer(k)
  3273. self.assertEqual(orig_t.nrows, new_t.nrows)
  3274. # check propindixes
  3275. if propindexes:
  3276. for a in orig_t.axes:
  3277. if a.is_indexed:
  3278. self.assertTrue(new_t[a.name].is_indexed)
  3279. finally:
  3280. safe_close(store)
  3281. safe_close(tstore)
  3282. try:
  3283. os.close(fd)
  3284. except:
  3285. pass
  3286. safe_remove(new_f)
  3287. do_copy()
  3288. do_copy(keys = ['/a','/b','/df1_mixed'])
  3289. do_copy(propindexes = False)
  3290. # new table
  3291. df = tm.makeDataFrame()
  3292. try:
  3293. st = HDFStore(self.path)
  3294. st.append('df', df, data_columns = ['A'])
  3295. st.close()
  3296. do_copy(f = self.path)
  3297. do_copy(f = self.path, propindexes = False)
  3298. finally:
  3299. safe_remove(self.path)
  3300. def test_legacy_table_write(self):
  3301. raise nose.SkipTest("skipping for now")
  3302. store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a')
  3303. df = tm.makeDataFrame()
  3304. wp = tm.makePanel()
  3305. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  3306. ['one', 'two', 'three']],
  3307. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  3308. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  3309. names=['foo', 'bar'])
  3310. df = DataFrame(np.random.randn(10, 3), index=index,
  3311. columns=['A', 'B', 'C'])
  3312. store.append('mi', df)
  3313. df = DataFrame(dict(A = 'foo', B = 'bar'),index=lrange(10))
  3314. store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 })
  3315. store.append('wp', wp)
  3316. store.close()
  3317. def test_store_datetime_fractional_secs(self):
  3318. with ensure_clean_store(self.path) as store:
  3319. dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
  3320. series = Series([0], [dt])
  3321. store['a'] = series
  3322. self.assertEqual(store['a'].index[0], dt)
  3323. def test_tseries_indices_series(self):
  3324. with ensure_clean_store(self.path) as store:
  3325. idx = tm.makeDateIndex(10)
  3326. ser = Series(np.random.randn(len(idx)), idx)
  3327. store['a'] = ser
  3328. result = store['a']
  3329. assert_series_equal(result, ser)
  3330. self.assertEqual(type(result.index), type(ser.index))
  3331. self.assertEqual(result.index.freq, ser.index.freq)
  3332. idx = tm.makePeriodIndex(10)
  3333. ser = Series(np.random.randn(len(idx)), idx)
  3334. store['a'] = ser
  3335. result = store['a']
  3336. assert_series_equal(result, ser)
  3337. self.assertEqual(type(result.index), type(ser.index))
  3338. self.assertEqual(result.index.freq, ser.index.freq)
  3339. def test_tseries_indices_frame(self):
  3340. with ensure_clean_store(self.path) as store:
  3341. idx = tm.makeDateIndex(10)
  3342. df = DataFrame(np.random.randn(len(idx), 3), index=idx)
  3343. store['a'] = df
  3344. result = store['a']
  3345. assert_frame_equal(result, df)
  3346. self.assertEqual(type(result.index), type(df.index))
  3347. self.assertEqual(result.index.freq, df.index.freq)
  3348. idx = tm.makePeriodIndex(10)
  3349. df = DataFrame(np.random.randn(len(idx), 3), idx)
  3350. store['a'] = df
  3351. result = store['a']
  3352. assert_frame_equal(result, df)
  3353. self.assertEqual(type(result.index), type(df.index))
  3354. self.assertEqual(result.index.freq, df.index.freq)
  3355. def test_unicode_index(self):
  3356. unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]
  3357. def f():
  3358. s = Series(np.random.randn(len(unicode_values)), unicode_values)
  3359. self._check_roundtrip(s, tm.assert_series_equal)
  3360. compat_assert_produces_warning(PerformanceWarning,f)
  3361. def test_store_datetime_mixed(self):
  3362. df = DataFrame(
  3363. {'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']})
  3364. ts = tm.makeTimeSeries()
  3365. df['d'] = ts.index[:3]
  3366. self._check_roundtrip(df, tm.assert_frame_equal)
  3367. # def test_cant_write_multiindex_table(self):
  3368. # # for now, #1848
  3369. # df = DataFrame(np.random.randn(10, 4),
  3370. # index=[np.arange(5).repeat(2),
  3371. # np.tile(np.arange(2), 5)])
  3372. # self.assertRaises(Exception, store.put, 'foo', df, format='table')
  3373. def test_append_with_diff_col_name_types_raises_value_error(self):
  3374. df = DataFrame(np.random.randn(10, 1))
  3375. df2 = DataFrame({'a': np.random.randn(10)})
  3376. df3 = DataFrame({(1, 2): np.random.randn(10)})
  3377. df4 = DataFrame({('1', 2): np.random.randn(10)})
  3378. df5 = DataFrame({('1', 2, object): np.random.randn(10)})
  3379. with ensure_clean_store(self.path) as store:
  3380. name = 'df_%s' % tm.rands(10)
  3381. store.append(name, df)
  3382. for d in (df2, df3, df4, df5):
  3383. with tm.assertRaises(ValueError):
  3384. store.append(name, d)
  3385. def test_query_with_nested_special_character(self):
  3386. df = DataFrame({'a': ['a', 'a', 'c', 'b', 'test & test', 'c' , 'b', 'e'],
  3387. 'b': [1, 2, 3, 4, 5, 6, 7, 8]})
  3388. expected = df[df.a == 'test & test']
  3389. with ensure_clean_store(self.path) as store:
  3390. store.append('test', df, format='table', data_columns=True)
  3391. result = store.select('test', 'a = "test & test"')
  3392. tm.assert_frame_equal(expected, result)
  3393. def _test_sort(obj):
  3394. if isinstance(obj, DataFrame):
  3395. return obj.reindex(sorted(obj.index))
  3396. elif isinstance(obj, Panel):
  3397. return obj.reindex(major=sorted(obj.major_axis))
  3398. else:
  3399. raise ValueError('type not supported here')
  3400. if __name__ == '__main__':
  3401. import nose
  3402. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  3403. exit=False)