/pandas/io/tests/test_pytables.py

http://github.com/wesm/pandas · Python · 5436 lines · 5393 code · 30 blank · 13 comment · 23 complexity · 069156a1606495477320752b993ba0f0 MD5 · raw file

Large files are truncated click here to view the full file

  1. import nose
  2. import sys
  3. import os
  4. import warnings
  5. import tempfile
  6. from contextlib import contextmanager
  7. import datetime
  8. import numpy as np
  9. import pandas
  10. import pandas as pd
  11. from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index,
  12. RangeIndex, Categorical, bdate_range,
  13. date_range, timedelta_range, Index, DatetimeIndex,
  14. isnull)
  15. from pandas.compat import is_platform_windows, PY3, PY35
  16. from pandas.formats.printing import pprint_thing
  17. from pandas.io.pytables import _tables, TableIterator
  18. try:
  19. _tables()
  20. except ImportError as e:
  21. raise nose.SkipTest(e)
  22. from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf,
  23. IncompatibilityWarning, PerformanceWarning,
  24. AttributeConflictWarning, DuplicateWarning,
  25. PossibleDataLossError, ClosedFileError)
  26. from pandas.io import pytables as pytables
  27. import pandas.util.testing as tm
  28. from pandas.util.testing import (assert_panel4d_equal,
  29. assert_panel_equal,
  30. assert_frame_equal,
  31. assert_series_equal,
  32. assert_produces_warning,
  33. set_timezone)
  34. from pandas import concat, Timestamp
  35. from pandas import compat
  36. from pandas.compat import range, lrange, u
  37. try:
  38. import tables
  39. except ImportError:
  40. raise nose.SkipTest('no pytables')
  41. from distutils.version import LooseVersion
  42. _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2'
  43. else 'zlib')
  44. _multiprocess_can_split_ = False
  45. # testing on windows/py3 seems to fault
  46. # for using compression
  47. skip_compression = PY3 and is_platform_windows()
  48. # contextmanager to ensure the file cleanup
  49. def safe_remove(path):
  50. if path is not None:
  51. try:
  52. os.remove(path)
  53. except:
  54. pass
  55. def safe_close(store):
  56. try:
  57. if store is not None:
  58. store.close()
  59. except:
  60. pass
  61. def create_tempfile(path):
  62. """ create an unopened named temporary file """
  63. return os.path.join(tempfile.gettempdir(), path)
  64. @contextmanager
  65. def ensure_clean_store(path, mode='a', complevel=None, complib=None,
  66. fletcher32=False):
  67. try:
  68. # put in the temporary path if we don't have one already
  69. if not len(os.path.dirname(path)):
  70. path = create_tempfile(path)
  71. store = HDFStore(path, mode=mode, complevel=complevel,
  72. complib=complib, fletcher32=False)
  73. yield store
  74. finally:
  75. safe_close(store)
  76. if mode == 'w' or mode == 'a':
  77. safe_remove(path)
  78. @contextmanager
  79. def ensure_clean_path(path):
  80. """
  81. return essentially a named temporary file that is not opened
  82. and deleted on existing; if path is a list, then create and
  83. return list of filenames
  84. """
  85. try:
  86. if isinstance(path, list):
  87. filenames = [create_tempfile(p) for p in path]
  88. yield filenames
  89. else:
  90. filenames = [create_tempfile(path)]
  91. yield filenames[0]
  92. finally:
  93. for f in filenames:
  94. safe_remove(f)
  95. # set these parameters so we don't have file sharing
  96. tables.parameters.MAX_NUMEXPR_THREADS = 1
  97. tables.parameters.MAX_BLOSC_THREADS = 1
  98. tables.parameters.MAX_THREADS = 1
  99. def _maybe_remove(store, key):
  100. """For tests using tables, try removing the table to be sure there is
  101. no content from previous tests using the same table name."""
  102. try:
  103. store.remove(key)
  104. except:
  105. pass
  106. @contextmanager
  107. def compat_assert_produces_warning(w):
  108. """ don't produce a warning under PY3 """
  109. if compat.PY3:
  110. yield
  111. else:
  112. with tm.assert_produces_warning(expected_warning=w,
  113. check_stacklevel=False):
  114. yield
  115. class Base(tm.TestCase):
  116. @classmethod
  117. def setUpClass(cls):
  118. super(Base, cls).setUpClass()
  119. # Pytables 3.0.0 deprecates lots of things
  120. tm.reset_testing_mode()
  121. @classmethod
  122. def tearDownClass(cls):
  123. super(Base, cls).tearDownClass()
  124. # Pytables 3.0.0 deprecates lots of things
  125. tm.set_testing_mode()
  126. def setUp(self):
  127. warnings.filterwarnings(action='ignore', category=FutureWarning)
  128. self.path = 'tmp.__%s__.h5' % tm.rands(10)
  129. def tearDown(self):
  130. pass
  131. class TestHDFStore(Base, tm.TestCase):
  132. def test_factory_fun(self):
  133. path = create_tempfile(self.path)
  134. try:
  135. with get_store(path) as tbl:
  136. raise ValueError('blah')
  137. except ValueError:
  138. pass
  139. finally:
  140. safe_remove(path)
  141. try:
  142. with get_store(path) as tbl:
  143. tbl['a'] = tm.makeDataFrame()
  144. with get_store(path) as tbl:
  145. self.assertEqual(len(tbl), 1)
  146. self.assertEqual(type(tbl['a']), DataFrame)
  147. finally:
  148. safe_remove(self.path)
  149. def test_context(self):
  150. path = create_tempfile(self.path)
  151. try:
  152. with HDFStore(path) as tbl:
  153. raise ValueError('blah')
  154. except ValueError:
  155. pass
  156. finally:
  157. safe_remove(path)
  158. try:
  159. with HDFStore(path) as tbl:
  160. tbl['a'] = tm.makeDataFrame()
  161. with HDFStore(path) as tbl:
  162. self.assertEqual(len(tbl), 1)
  163. self.assertEqual(type(tbl['a']), DataFrame)
  164. finally:
  165. safe_remove(path)
  166. def test_conv_read_write(self):
  167. path = create_tempfile(self.path)
  168. try:
  169. def roundtrip(key, obj, **kwargs):
  170. obj.to_hdf(path, key, **kwargs)
  171. return read_hdf(path, key)
  172. o = tm.makeTimeSeries()
  173. assert_series_equal(o, roundtrip('series', o))
  174. o = tm.makeStringSeries()
  175. assert_series_equal(o, roundtrip('string_series', o))
  176. o = tm.makeDataFrame()
  177. assert_frame_equal(o, roundtrip('frame', o))
  178. o = tm.makePanel()
  179. assert_panel_equal(o, roundtrip('panel', o))
  180. # table
  181. df = DataFrame(dict(A=lrange(5), B=lrange(5)))
  182. df.to_hdf(path, 'table', append=True)
  183. result = read_hdf(path, 'table', where=['index>2'])
  184. assert_frame_equal(df[df.index > 2], result)
  185. finally:
  186. safe_remove(path)
  187. def test_long_strings(self):
  188. # GH6166
  189. # unconversion of long strings was being chopped in earlier
  190. # versions of numpy < 1.7.2
  191. df = DataFrame({'a': tm.rands_array(100, size=10)},
  192. index=tm.rands_array(100, size=10))
  193. with ensure_clean_store(self.path) as store:
  194. store.append('df', df, data_columns=['a'])
  195. result = store.select('df')
  196. assert_frame_equal(df, result)
  197. def test_api(self):
  198. # GH4584
  199. # API issue when to_hdf doesn't acdept append AND format args
  200. with ensure_clean_path(self.path) as path:
  201. df = tm.makeDataFrame()
  202. df.iloc[:10].to_hdf(path, 'df', append=True, format='table')
  203. df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
  204. assert_frame_equal(read_hdf(path, 'df'), df)
  205. # append to False
  206. df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
  207. df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
  208. assert_frame_equal(read_hdf(path, 'df'), df)
  209. with ensure_clean_path(self.path) as path:
  210. df = tm.makeDataFrame()
  211. df.iloc[:10].to_hdf(path, 'df', append=True)
  212. df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
  213. assert_frame_equal(read_hdf(path, 'df'), df)
  214. # append to False
  215. df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
  216. df.iloc[10:].to_hdf(path, 'df', append=True)
  217. assert_frame_equal(read_hdf(path, 'df'), df)
  218. with ensure_clean_path(self.path) as path:
  219. df = tm.makeDataFrame()
  220. df.to_hdf(path, 'df', append=False, format='fixed')
  221. assert_frame_equal(read_hdf(path, 'df'), df)
  222. df.to_hdf(path, 'df', append=False, format='f')
  223. assert_frame_equal(read_hdf(path, 'df'), df)
  224. df.to_hdf(path, 'df', append=False)
  225. assert_frame_equal(read_hdf(path, 'df'), df)
  226. df.to_hdf(path, 'df')
  227. assert_frame_equal(read_hdf(path, 'df'), df)
  228. with ensure_clean_store(self.path) as store:
  229. path = store._path
  230. df = tm.makeDataFrame()
  231. _maybe_remove(store, 'df')
  232. store.append('df', df.iloc[:10], append=True, format='table')
  233. store.append('df', df.iloc[10:], append=True, format='table')
  234. assert_frame_equal(store.select('df'), df)
  235. # append to False
  236. _maybe_remove(store, 'df')
  237. store.append('df', df.iloc[:10], append=False, format='table')
  238. store.append('df', df.iloc[10:], append=True, format='table')
  239. assert_frame_equal(store.select('df'), df)
  240. # formats
  241. _maybe_remove(store, 'df')
  242. store.append('df', df.iloc[:10], append=False, format='table')
  243. store.append('df', df.iloc[10:], append=True, format='table')
  244. assert_frame_equal(store.select('df'), df)
  245. _maybe_remove(store, 'df')
  246. store.append('df', df.iloc[:10], append=False, format='table')
  247. store.append('df', df.iloc[10:], append=True, format=None)
  248. assert_frame_equal(store.select('df'), df)
  249. with ensure_clean_path(self.path) as path:
  250. # invalid
  251. df = tm.makeDataFrame()
  252. self.assertRaises(ValueError, df.to_hdf, path,
  253. 'df', append=True, format='f')
  254. self.assertRaises(ValueError, df.to_hdf, path,
  255. 'df', append=True, format='fixed')
  256. self.assertRaises(TypeError, df.to_hdf, path,
  257. 'df', append=True, format='foo')
  258. self.assertRaises(TypeError, df.to_hdf, path,
  259. 'df', append=False, format='bar')
  260. # File path doesn't exist
  261. path = ""
  262. self.assertRaises(IOError, read_hdf, path, 'df')
  263. def test_api_default_format(self):
  264. # default_format option
  265. with ensure_clean_store(self.path) as store:
  266. df = tm.makeDataFrame()
  267. pandas.set_option('io.hdf.default_format', 'fixed')
  268. _maybe_remove(store, 'df')
  269. store.put('df', df)
  270. self.assertFalse(store.get_storer('df').is_table)
  271. self.assertRaises(ValueError, store.append, 'df2', df)
  272. pandas.set_option('io.hdf.default_format', 'table')
  273. _maybe_remove(store, 'df')
  274. store.put('df', df)
  275. self.assertTrue(store.get_storer('df').is_table)
  276. _maybe_remove(store, 'df2')
  277. store.append('df2', df)
  278. self.assertTrue(store.get_storer('df').is_table)
  279. pandas.set_option('io.hdf.default_format', None)
  280. with ensure_clean_path(self.path) as path:
  281. df = tm.makeDataFrame()
  282. pandas.set_option('io.hdf.default_format', 'fixed')
  283. df.to_hdf(path, 'df')
  284. with get_store(path) as store:
  285. self.assertFalse(store.get_storer('df').is_table)
  286. self.assertRaises(ValueError, df.to_hdf, path, 'df2', append=True)
  287. pandas.set_option('io.hdf.default_format', 'table')
  288. df.to_hdf(path, 'df3')
  289. with HDFStore(path) as store:
  290. self.assertTrue(store.get_storer('df3').is_table)
  291. df.to_hdf(path, 'df4', append=True)
  292. with HDFStore(path) as store:
  293. self.assertTrue(store.get_storer('df4').is_table)
  294. pandas.set_option('io.hdf.default_format', None)
  295. def test_keys(self):
  296. with ensure_clean_store(self.path) as store:
  297. store['a'] = tm.makeTimeSeries()
  298. store['b'] = tm.makeStringSeries()
  299. store['c'] = tm.makeDataFrame()
  300. store['d'] = tm.makePanel()
  301. store['foo/bar'] = tm.makePanel()
  302. self.assertEqual(len(store), 5)
  303. expected = set(['/a', '/b', '/c', '/d', '/foo/bar'])
  304. self.assertTrue(set(store.keys()) == expected)
  305. self.assertTrue(set(store) == expected)
  306. def test_iter_empty(self):
  307. with ensure_clean_store(self.path) as store:
  308. # GH 12221
  309. self.assertTrue(list(store) == [])
  310. def test_repr(self):
  311. with ensure_clean_store(self.path) as store:
  312. repr(store)
  313. store['a'] = tm.makeTimeSeries()
  314. store['b'] = tm.makeStringSeries()
  315. store['c'] = tm.makeDataFrame()
  316. store['d'] = tm.makePanel()
  317. store['foo/bar'] = tm.makePanel()
  318. store.append('e', tm.makePanel())
  319. df = tm.makeDataFrame()
  320. df['obj1'] = 'foo'
  321. df['obj2'] = 'bar'
  322. df['bool1'] = df['A'] > 0
  323. df['bool2'] = df['B'] > 0
  324. df['bool3'] = True
  325. df['int1'] = 1
  326. df['int2'] = 2
  327. df['timestamp1'] = Timestamp('20010102')
  328. df['timestamp2'] = Timestamp('20010103')
  329. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  330. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  331. df.ix[3:6, ['obj1']] = np.nan
  332. df = df.consolidate()._convert(datetime=True)
  333. warnings.filterwarnings('ignore', category=PerformanceWarning)
  334. store['df'] = df
  335. warnings.filterwarnings('always', category=PerformanceWarning)
  336. # make a random group in hdf space
  337. store._handle.create_group(store._handle.root, 'bah')
  338. repr(store)
  339. str(store)
  340. # storers
  341. with ensure_clean_store(self.path) as store:
  342. df = tm.makeDataFrame()
  343. store.append('df', df)
  344. s = store.get_storer('df')
  345. repr(s)
  346. str(s)
  347. def test_contains(self):
  348. with ensure_clean_store(self.path) as store:
  349. store['a'] = tm.makeTimeSeries()
  350. store['b'] = tm.makeDataFrame()
  351. store['foo/bar'] = tm.makeDataFrame()
  352. self.assertIn('a', store)
  353. self.assertIn('b', store)
  354. self.assertNotIn('c', store)
  355. self.assertIn('foo/bar', store)
  356. self.assertIn('/foo/bar', store)
  357. self.assertNotIn('/foo/b', store)
  358. self.assertNotIn('bar', store)
  359. # GH 2694
  360. warnings.filterwarnings(
  361. 'ignore', category=tables.NaturalNameWarning)
  362. store['node())'] = tm.makeDataFrame()
  363. self.assertIn('node())', store)
  364. def test_versioning(self):
  365. with ensure_clean_store(self.path) as store:
  366. store['a'] = tm.makeTimeSeries()
  367. store['b'] = tm.makeDataFrame()
  368. df = tm.makeTimeDataFrame()
  369. _maybe_remove(store, 'df1')
  370. store.append('df1', df[:10])
  371. store.append('df1', df[10:])
  372. self.assertEqual(store.root.a._v_attrs.pandas_version, '0.15.2')
  373. self.assertEqual(store.root.b._v_attrs.pandas_version, '0.15.2')
  374. self.assertEqual(store.root.df1._v_attrs.pandas_version, '0.15.2')
  375. # write a file and wipe its versioning
  376. _maybe_remove(store, 'df2')
  377. store.append('df2', df)
  378. # this is an error because its table_type is appendable, but no
  379. # version info
  380. store.get_node('df2')._v_attrs.pandas_version = None
  381. self.assertRaises(Exception, store.select, 'df2')
  382. def test_mode(self):
  383. df = tm.makeTimeDataFrame()
  384. def check(mode):
  385. with ensure_clean_path(self.path) as path:
  386. # constructor
  387. if mode in ['r', 'r+']:
  388. self.assertRaises(IOError, HDFStore, path, mode=mode)
  389. else:
  390. store = HDFStore(path, mode=mode)
  391. self.assertEqual(store._handle.mode, mode)
  392. store.close()
  393. with ensure_clean_path(self.path) as path:
  394. # context
  395. if mode in ['r', 'r+']:
  396. def f():
  397. with HDFStore(path, mode=mode) as store: # noqa
  398. pass
  399. self.assertRaises(IOError, f)
  400. else:
  401. with HDFStore(path, mode=mode) as store:
  402. self.assertEqual(store._handle.mode, mode)
  403. with ensure_clean_path(self.path) as path:
  404. # conv write
  405. if mode in ['r', 'r+']:
  406. self.assertRaises(IOError, df.to_hdf,
  407. path, 'df', mode=mode)
  408. df.to_hdf(path, 'df', mode='w')
  409. else:
  410. df.to_hdf(path, 'df', mode=mode)
  411. # conv read
  412. if mode in ['w']:
  413. self.assertRaises(ValueError, read_hdf,
  414. path, 'df', mode=mode)
  415. else:
  416. result = read_hdf(path, 'df', mode=mode)
  417. assert_frame_equal(result, df)
  418. def check_default_mode():
  419. # read_hdf uses default mode
  420. with ensure_clean_path(self.path) as path:
  421. df.to_hdf(path, 'df', mode='w')
  422. result = read_hdf(path, 'df')
  423. assert_frame_equal(result, df)
  424. check('r')
  425. check('r+')
  426. check('a')
  427. check('w')
  428. check_default_mode()
  429. def test_reopen_handle(self):
  430. with ensure_clean_path(self.path) as path:
  431. store = HDFStore(path, mode='a')
  432. store['a'] = tm.makeTimeSeries()
  433. # invalid mode change
  434. self.assertRaises(PossibleDataLossError, store.open, 'w')
  435. store.close()
  436. self.assertFalse(store.is_open)
  437. # truncation ok here
  438. store.open('w')
  439. self.assertTrue(store.is_open)
  440. self.assertEqual(len(store), 0)
  441. store.close()
  442. self.assertFalse(store.is_open)
  443. store = HDFStore(path, mode='a')
  444. store['a'] = tm.makeTimeSeries()
  445. # reopen as read
  446. store.open('r')
  447. self.assertTrue(store.is_open)
  448. self.assertEqual(len(store), 1)
  449. self.assertEqual(store._mode, 'r')
  450. store.close()
  451. self.assertFalse(store.is_open)
  452. # reopen as append
  453. store.open('a')
  454. self.assertTrue(store.is_open)
  455. self.assertEqual(len(store), 1)
  456. self.assertEqual(store._mode, 'a')
  457. store.close()
  458. self.assertFalse(store.is_open)
  459. # reopen as append (again)
  460. store.open('a')
  461. self.assertTrue(store.is_open)
  462. self.assertEqual(len(store), 1)
  463. self.assertEqual(store._mode, 'a')
  464. store.close()
  465. self.assertFalse(store.is_open)
  466. def test_open_args(self):
  467. with ensure_clean_path(self.path) as path:
  468. df = tm.makeDataFrame()
  469. # create an in memory store
  470. store = HDFStore(path, mode='a', driver='H5FD_CORE',
  471. driver_core_backing_store=0)
  472. store['df'] = df
  473. store.append('df2', df)
  474. tm.assert_frame_equal(store['df'], df)
  475. tm.assert_frame_equal(store['df2'], df)
  476. store.close()
  477. # the file should not have actually been written
  478. self.assertFalse(os.path.exists(path))
  479. def test_flush(self):
  480. with ensure_clean_store(self.path) as store:
  481. store['a'] = tm.makeTimeSeries()
  482. store.flush()
  483. store.flush(fsync=True)
  484. def test_get(self):
  485. with ensure_clean_store(self.path) as store:
  486. store['a'] = tm.makeTimeSeries()
  487. left = store.get('a')
  488. right = store['a']
  489. tm.assert_series_equal(left, right)
  490. left = store.get('/a')
  491. right = store['/a']
  492. tm.assert_series_equal(left, right)
  493. self.assertRaises(KeyError, store.get, 'b')
  494. def test_getattr(self):
  495. with ensure_clean_store(self.path) as store:
  496. s = tm.makeTimeSeries()
  497. store['a'] = s
  498. # test attribute access
  499. result = store.a
  500. tm.assert_series_equal(result, s)
  501. result = getattr(store, 'a')
  502. tm.assert_series_equal(result, s)
  503. df = tm.makeTimeDataFrame()
  504. store['df'] = df
  505. result = store.df
  506. tm.assert_frame_equal(result, df)
  507. # errors
  508. self.assertRaises(AttributeError, getattr, store, 'd')
  509. for x in ['mode', 'path', 'handle', 'complib']:
  510. self.assertRaises(AttributeError, getattr, store, x)
  511. # not stores
  512. for x in ['mode', 'path', 'handle', 'complib']:
  513. getattr(store, "_%s" % x)
  514. def test_put(self):
  515. with ensure_clean_store(self.path) as store:
  516. ts = tm.makeTimeSeries()
  517. df = tm.makeTimeDataFrame()
  518. store['a'] = ts
  519. store['b'] = df[:10]
  520. store['foo/bar/bah'] = df[:10]
  521. store['foo'] = df[:10]
  522. store['/foo'] = df[:10]
  523. store.put('c', df[:10], format='table')
  524. # not OK, not a table
  525. self.assertRaises(
  526. ValueError, store.put, 'b', df[10:], append=True)
  527. # node does not currently exist, test _is_table_type returns False
  528. # in this case
  529. # _maybe_remove(store, 'f')
  530. # self.assertRaises(ValueError, store.put, 'f', df[10:],
  531. # append=True)
  532. # can't put to a table (use append instead)
  533. self.assertRaises(ValueError, store.put, 'c', df[10:], append=True)
  534. # overwrite table
  535. store.put('c', df[:10], format='table', append=False)
  536. tm.assert_frame_equal(df[:10], store['c'])
  537. def test_put_string_index(self):
  538. with ensure_clean_store(self.path) as store:
  539. index = Index(
  540. ["I am a very long string index: %s" % i for i in range(20)])
  541. s = Series(np.arange(20), index=index)
  542. df = DataFrame({'A': s, 'B': s})
  543. store['a'] = s
  544. tm.assert_series_equal(store['a'], s)
  545. store['b'] = df
  546. tm.assert_frame_equal(store['b'], df)
  547. # mixed length
  548. index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] +
  549. ["I am a very long string index: %s" % i
  550. for i in range(20)])
  551. s = Series(np.arange(21), index=index)
  552. df = DataFrame({'A': s, 'B': s})
  553. store['a'] = s
  554. tm.assert_series_equal(store['a'], s)
  555. store['b'] = df
  556. tm.assert_frame_equal(store['b'], df)
  557. def test_put_compression(self):
  558. with ensure_clean_store(self.path) as store:
  559. df = tm.makeTimeDataFrame()
  560. store.put('c', df, format='table', complib='zlib')
  561. tm.assert_frame_equal(store['c'], df)
  562. # can't compress if format='fixed'
  563. self.assertRaises(ValueError, store.put, 'b', df,
  564. format='fixed', complib='zlib')
  565. def test_put_compression_blosc(self):
  566. tm.skip_if_no_package('tables', '2.2', app='blosc support')
  567. if skip_compression:
  568. raise nose.SkipTest("skipping on windows/PY3")
  569. df = tm.makeTimeDataFrame()
  570. with ensure_clean_store(self.path) as store:
  571. # can't compress if format='fixed'
  572. self.assertRaises(ValueError, store.put, 'b', df,
  573. format='fixed', complib='blosc')
  574. store.put('c', df, format='table', complib='blosc')
  575. tm.assert_frame_equal(store['c'], df)
  576. def test_put_integer(self):
  577. # non-date, non-string index
  578. df = DataFrame(np.random.randn(50, 100))
  579. self._check_roundtrip(df, tm.assert_frame_equal)
  580. def test_put_mixed_type(self):
  581. df = tm.makeTimeDataFrame()
  582. df['obj1'] = 'foo'
  583. df['obj2'] = 'bar'
  584. df['bool1'] = df['A'] > 0
  585. df['bool2'] = df['B'] > 0
  586. df['bool3'] = True
  587. df['int1'] = 1
  588. df['int2'] = 2
  589. df['timestamp1'] = Timestamp('20010102')
  590. df['timestamp2'] = Timestamp('20010103')
  591. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  592. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  593. df.ix[3:6, ['obj1']] = np.nan
  594. df = df.consolidate()._convert(datetime=True)
  595. with ensure_clean_store(self.path) as store:
  596. _maybe_remove(store, 'df')
  597. # cannot use assert_produces_warning here for some reason
  598. # a PendingDeprecationWarning is also raised?
  599. warnings.filterwarnings('ignore', category=PerformanceWarning)
  600. store.put('df', df)
  601. warnings.filterwarnings('always', category=PerformanceWarning)
  602. expected = store.get('df')
  603. tm.assert_frame_equal(expected, df)
  604. def test_append(self):
  605. with ensure_clean_store(self.path) as store:
  606. df = tm.makeTimeDataFrame()
  607. _maybe_remove(store, 'df1')
  608. store.append('df1', df[:10])
  609. store.append('df1', df[10:])
  610. tm.assert_frame_equal(store['df1'], df)
  611. _maybe_remove(store, 'df2')
  612. store.put('df2', df[:10], format='table')
  613. store.append('df2', df[10:])
  614. tm.assert_frame_equal(store['df2'], df)
  615. _maybe_remove(store, 'df3')
  616. store.append('/df3', df[:10])
  617. store.append('/df3', df[10:])
  618. tm.assert_frame_equal(store['df3'], df)
  619. # this is allowed by almost always don't want to do it
  620. with tm.assert_produces_warning(
  621. expected_warning=tables.NaturalNameWarning):
  622. _maybe_remove(store, '/df3 foo')
  623. store.append('/df3 foo', df[:10])
  624. store.append('/df3 foo', df[10:])
  625. tm.assert_frame_equal(store['df3 foo'], df)
  626. # panel
  627. wp = tm.makePanel()
  628. _maybe_remove(store, 'wp1')
  629. store.append('wp1', wp.ix[:, :10, :])
  630. store.append('wp1', wp.ix[:, 10:, :])
  631. assert_panel_equal(store['wp1'], wp)
  632. # ndim
  633. with tm.assert_produces_warning(FutureWarning,
  634. check_stacklevel=False):
  635. p4d = tm.makePanel4D()
  636. _maybe_remove(store, 'p4d')
  637. store.append('p4d', p4d.ix[:, :, :10, :])
  638. store.append('p4d', p4d.ix[:, :, 10:, :])
  639. assert_panel4d_equal(store['p4d'], p4d)
  640. # test using axis labels
  641. _maybe_remove(store, 'p4d')
  642. store.append('p4d', p4d.ix[:, :, :10, :], axes=[
  643. 'items', 'major_axis', 'minor_axis'])
  644. store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
  645. 'items', 'major_axis', 'minor_axis'])
  646. assert_panel4d_equal(store['p4d'], p4d)
  647. # test using differnt number of items on each axis
  648. p4d2 = p4d.copy()
  649. p4d2['l4'] = p4d['l1']
  650. p4d2['l5'] = p4d['l1']
  651. _maybe_remove(store, 'p4d2')
  652. store.append(
  653. 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis'])
  654. assert_panel4d_equal(store['p4d2'], p4d2)
  655. # test using differt order of items on the non-index axes
  656. _maybe_remove(store, 'wp1')
  657. wp_append1 = wp.ix[:, :10, :]
  658. store.append('wp1', wp_append1)
  659. wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1])
  660. store.append('wp1', wp_append2)
  661. assert_panel_equal(store['wp1'], wp)
  662. # dtype issues - mizxed type in a single object column
  663. df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
  664. df['mixed_column'] = 'testing'
  665. df.ix[2, 'mixed_column'] = np.nan
  666. _maybe_remove(store, 'df')
  667. store.append('df', df)
  668. tm.assert_frame_equal(store['df'], df)
  669. # uints - test storage of uints
  670. uint_data = DataFrame({
  671. 'u08': Series(np.random.randint(0, high=255, size=5),
  672. dtype=np.uint8),
  673. 'u16': Series(np.random.randint(0, high=65535, size=5),
  674. dtype=np.uint16),
  675. 'u32': Series(np.random.randint(0, high=2**30, size=5),
  676. dtype=np.uint32),
  677. 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62],
  678. dtype=np.uint64)}, index=np.arange(5))
  679. _maybe_remove(store, 'uints')
  680. store.append('uints', uint_data)
  681. tm.assert_frame_equal(store['uints'], uint_data)
  682. # uints - test storage of uints in indexable columns
  683. _maybe_remove(store, 'uints')
  684. # 64-bit indices not yet supported
  685. store.append('uints', uint_data, data_columns=[
  686. 'u08', 'u16', 'u32'])
  687. tm.assert_frame_equal(store['uints'], uint_data)
  688. def test_append_series(self):
  689. with ensure_clean_store(self.path) as store:
  690. # basic
  691. ss = tm.makeStringSeries()
  692. ts = tm.makeTimeSeries()
  693. ns = Series(np.arange(100))
  694. store.append('ss', ss)
  695. result = store['ss']
  696. tm.assert_series_equal(result, ss)
  697. self.assertIsNone(result.name)
  698. store.append('ts', ts)
  699. result = store['ts']
  700. tm.assert_series_equal(result, ts)
  701. self.assertIsNone(result.name)
  702. ns.name = 'foo'
  703. store.append('ns', ns)
  704. result = store['ns']
  705. tm.assert_series_equal(result, ns)
  706. self.assertEqual(result.name, ns.name)
  707. # select on the values
  708. expected = ns[ns > 60]
  709. result = store.select('ns', Term('foo>60'))
  710. tm.assert_series_equal(result, expected)
  711. # select on the index and values
  712. expected = ns[(ns > 70) & (ns.index < 90)]
  713. result = store.select('ns', [Term('foo>70'), Term('index<90')])
  714. tm.assert_series_equal(result, expected)
  715. # multi-index
  716. mi = DataFrame(np.random.randn(5, 1), columns=['A'])
  717. mi['B'] = np.arange(len(mi))
  718. mi['C'] = 'foo'
  719. mi.loc[3:5, 'C'] = 'bar'
  720. mi.set_index(['C', 'B'], inplace=True)
  721. s = mi.stack()
  722. s.index = s.index.droplevel(2)
  723. store.append('mi', s)
  724. tm.assert_series_equal(store['mi'], s)
  725. def test_store_index_types(self):
  726. # GH5386
  727. # test storing various index types
  728. with ensure_clean_store(self.path) as store:
  729. def check(format, index):
  730. df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
  731. df.index = index(len(df))
  732. _maybe_remove(store, 'df')
  733. store.put('df', df, format=format)
  734. assert_frame_equal(df, store['df'])
  735. for index in [tm.makeFloatIndex, tm.makeStringIndex,
  736. tm.makeIntIndex, tm.makeDateIndex]:
  737. check('table', index)
  738. check('fixed', index)
  739. # period index currently broken for table
  740. # seee GH7796 FIXME
  741. check('fixed', tm.makePeriodIndex)
  742. # check('table',tm.makePeriodIndex)
  743. # unicode
  744. index = tm.makeUnicodeIndex
  745. if compat.PY3:
  746. check('table', index)
  747. check('fixed', index)
  748. else:
  749. # only support for fixed types (and they have a perf warning)
  750. self.assertRaises(TypeError, check, 'table', index)
  751. with tm.assert_produces_warning(
  752. expected_warning=PerformanceWarning):
  753. check('fixed', index)
  754. def test_encoding(self):
  755. if sys.byteorder != 'little':
  756. raise nose.SkipTest('system byteorder is not little')
  757. with ensure_clean_store(self.path) as store:
  758. df = DataFrame(dict(A='foo', B='bar'), index=range(5))
  759. df.loc[2, 'A'] = np.nan
  760. df.loc[3, 'B'] = np.nan
  761. _maybe_remove(store, 'df')
  762. store.append('df', df, encoding='ascii')
  763. tm.assert_frame_equal(store['df'], df)
  764. expected = df.reindex(columns=['A'])
  765. result = store.select('df', Term('columns=A', encoding='ascii'))
  766. tm.assert_frame_equal(result, expected)
  767. def test_latin_encoding(self):
  768. if compat.PY2:
  769. self.assertRaisesRegexp(
  770. TypeError, '\[unicode\] is not implemented as a table column')
  771. return
  772. values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
  773. [b'E\xc9, 17', b'a', b'b', b'c'],
  774. [b'EE, 17', b'', b'a', b'b', b'c'],
  775. [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
  776. [b'', b'a', b'b', b'c'],
  777. [b'\xf8\xfc', b'a', b'b', b'c'],
  778. [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
  779. [np.nan, b'', b'b', b'c'],
  780. [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
  781. def _try_decode(x, encoding='latin-1'):
  782. try:
  783. return x.decode(encoding)
  784. except AttributeError:
  785. return x
  786. # not sure how to remove latin-1 from code in python 2 and 3
  787. values = [[_try_decode(x) for x in y] for y in values]
  788. examples = []
  789. for dtype in ['category', object]:
  790. for val in values:
  791. examples.append(pandas.Series(val, dtype=dtype))
  792. def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
  793. with ensure_clean_path(self.path) as store:
  794. s.to_hdf(store, key, format='table', encoding=encoding,
  795. nan_rep=nan_rep)
  796. retr = read_hdf(store, key)
  797. s_nan = s.replace(nan_rep, np.nan)
  798. assert_series_equal(s_nan, retr, check_categorical=False)
  799. for s in examples:
  800. roundtrip(s)
  801. # fails:
  802. # for x in examples:
  803. # roundtrip(s, nan_rep=b'\xf8\xfc')
  804. def test_append_some_nans(self):
  805. with ensure_clean_store(self.path) as store:
  806. df = DataFrame({'A': Series(np.random.randn(20)).astype('int32'),
  807. 'A1': np.random.randn(20),
  808. 'A2': np.random.randn(20),
  809. 'B': 'foo', 'C': 'bar',
  810. 'D': Timestamp("20010101"),
  811. 'E': datetime.datetime(2001, 1, 2, 0, 0)},
  812. index=np.arange(20))
  813. # some nans
  814. _maybe_remove(store, 'df1')
  815. df.ix[0:15, ['A1', 'B', 'D', 'E']] = np.nan
  816. store.append('df1', df[:10])
  817. store.append('df1', df[10:])
  818. tm.assert_frame_equal(store['df1'], df)
  819. # first column
  820. df1 = df.copy()
  821. df1.ix[:, 'A1'] = np.nan
  822. _maybe_remove(store, 'df1')
  823. store.append('df1', df1[:10])
  824. store.append('df1', df1[10:])
  825. tm.assert_frame_equal(store['df1'], df1)
  826. # 2nd column
  827. df2 = df.copy()
  828. df2.ix[:, 'A2'] = np.nan
  829. _maybe_remove(store, 'df2')
  830. store.append('df2', df2[:10])
  831. store.append('df2', df2[10:])
  832. tm.assert_frame_equal(store['df2'], df2)
  833. # datetimes
  834. df3 = df.copy()
  835. df3.ix[:, 'E'] = np.nan
  836. _maybe_remove(store, 'df3')
  837. store.append('df3', df3[:10])
  838. store.append('df3', df3[10:])
  839. tm.assert_frame_equal(store['df3'], df3)
  840. def test_append_all_nans(self):
  841. with ensure_clean_store(self.path) as store:
  842. df = DataFrame({'A1': np.random.randn(20),
  843. 'A2': np.random.randn(20)},
  844. index=np.arange(20))
  845. df.ix[0:15, :] = np.nan
  846. # nan some entire rows (dropna=True)
  847. _maybe_remove(store, 'df')
  848. store.append('df', df[:10], dropna=True)
  849. store.append('df', df[10:], dropna=True)
  850. tm.assert_frame_equal(store['df'], df[-4:])
  851. # nan some entire rows (dropna=False)
  852. _maybe_remove(store, 'df2')
  853. store.append('df2', df[:10], dropna=False)
  854. store.append('df2', df[10:], dropna=False)
  855. tm.assert_frame_equal(store['df2'], df)
  856. # tests the option io.hdf.dropna_table
  857. pandas.set_option('io.hdf.dropna_table', False)
  858. _maybe_remove(store, 'df3')
  859. store.append('df3', df[:10])
  860. store.append('df3', df[10:])
  861. tm.assert_frame_equal(store['df3'], df)
  862. pandas.set_option('io.hdf.dropna_table', True)
  863. _maybe_remove(store, 'df4')
  864. store.append('df4', df[:10])
  865. store.append('df4', df[10:])
  866. tm.assert_frame_equal(store['df4'], df[-4:])
  867. # nan some entire rows (string are still written!)
  868. df = DataFrame({'A1': np.random.randn(20),
  869. 'A2': np.random.randn(20),
  870. 'B': 'foo', 'C': 'bar'},
  871. index=np.arange(20))
  872. df.ix[0:15, :] = np.nan
  873. _maybe_remove(store, 'df')
  874. store.append('df', df[:10], dropna=True)
  875. store.append('df', df[10:], dropna=True)
  876. tm.assert_frame_equal(store['df'], df)
  877. _maybe_remove(store, 'df2')
  878. store.append('df2', df[:10], dropna=False)
  879. store.append('df2', df[10:], dropna=False)
  880. tm.assert_frame_equal(store['df2'], df)
  881. # nan some entire rows (but since we have dates they are still
  882. # written!)
  883. df = DataFrame({'A1': np.random.randn(20),
  884. 'A2': np.random.randn(20),
  885. 'B': 'foo', 'C': 'bar',
  886. 'D': Timestamp("20010101"),
  887. 'E': datetime.datetime(2001, 1, 2, 0, 0)},
  888. index=np.arange(20))
  889. df.ix[0:15, :] = np.nan
  890. _maybe_remove(store, 'df')
  891. store.append('df', df[:10], dropna=True)
  892. store.append('df', df[10:], dropna=True)
  893. tm.assert_frame_equal(store['df'], df)
  894. _maybe_remove(store, 'df2')
  895. store.append('df2', df[:10], dropna=False)
  896. store.append('df2', df[10:], dropna=False)
  897. tm.assert_frame_equal(store['df2'], df)
  898. # Test to make sure defaults are to not drop.
  899. # Corresponding to Issue 9382
  900. df_with_missing = DataFrame(
  901. {'col1': [0, np.nan, 2], 'col2': [1, np.nan, np.nan]})
  902. with ensure_clean_path(self.path) as path:
  903. df_with_missing.to_hdf(path, 'df_with_missing', format='table')
  904. reloaded = read_hdf(path, 'df_with_missing')
  905. tm.assert_frame_equal(df_with_missing, reloaded)
  906. matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
  907. [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
  908. [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
  909. panel_with_missing = Panel(matrix, items=['Item1', 'Item2', 'Item3'],
  910. major_axis=[1, 2],
  911. minor_axis=['A', 'B', 'C'])
  912. with ensure_clean_path(self.path) as path:
  913. panel_with_missing.to_hdf(
  914. path, 'panel_with_missing', format='table')
  915. reloaded_panel = read_hdf(path, 'panel_with_missing')
  916. tm.assert_panel_equal(panel_with_missing, reloaded_panel)
  917. def test_append_frame_column_oriented(self):
  918. with ensure_clean_store(self.path) as store:
  919. # column oriented
  920. df = tm.makeTimeDataFrame()
  921. _maybe_remove(store, 'df1')
  922. store.append('df1', df.ix[:, :2], axes=['columns'])
  923. store.append('df1', df.ix[:, 2:])
  924. tm.assert_frame_equal(store['df1'], df)
  925. result = store.select('df1', 'columns=A')
  926. expected = df.reindex(columns=['A'])
  927. tm.assert_frame_equal(expected, result)
  928. # selection on the non-indexable
  929. result = store.select(
  930. 'df1', ('columns=A', Term('index=df.index[0:4]')))
  931. expected = df.reindex(columns=['A'], index=df.index[0:4])
  932. tm.assert_frame_equal(expected, result)
  933. # this isn't supported
  934. self.assertRaises(TypeError, store.select, 'df1', (
  935. 'columns=A', Term('index>df.index[4]')))
  936. def test_append_with_different_block_ordering(self):
  937. # GH 4096; using same frames, but different block orderings
  938. with ensure_clean_store(self.path) as store:
  939. for i in range(10):
  940. df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
  941. df['index'] = range(10)
  942. df['index'] += i * 10
  943. df['int64'] = Series([1] * len(df), dtype='int64')
  944. df['int16'] = Series([1] * len(df), dtype='int16')
  945. if i % 2 == 0:
  946. del df['int64']
  947. df['int64'] = Series([1] * len(df), dtype='int64')
  948. if i % 3 == 0:
  949. a = df.pop('A')
  950. df['A'] = a
  951. df.set_index('index', inplace=True)
  952. store.append('df', df)
  953. # test a different ordering but with more fields (like invalid
  954. # combinate)
  955. with ensure_clean_store(self.path) as store:
  956. df = DataFrame(np.random.randn(10, 2),
  957. columns=list('AB'), dtype='float64')
  958. df['int64'] = Series([1] * len(df), dtype='int64')
  959. df['int16'] = Series([1] * len(df), dtype='int16')
  960. store.append('df', df)
  961. # store additonal fields in different blocks
  962. df['int16_2'] = Series([1] * len(df), dtype='int16')
  963. self.assertRaises(ValueError, store.append, 'df', df)
  964. # store multile additonal fields in different blocks
  965. df['float_3'] = Series([1.] * len(df), dtype='float64')
  966. self.assertRaises(ValueError, store.append, 'df', df)
  967. def test_ndim_indexables(self):
  968. # test using ndim tables in new ways
  969. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  970. with ensure_clean_store(self.path) as store:
  971. p4d = tm.makePanel4D()
  972. def check_indexers(key, indexers):
  973. for i, idx in enumerate(indexers):
  974. descr = getattr(store.root, key).table.description
  975. self.assertTrue(getattr(descr, idx)._v_pos == i)
  976. # append then change (will take existing schema)
  977. indexers = ['items', 'major_axis', 'minor_axis']
  978. _maybe_remove(store, 'p4d')
  979. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  980. store.append('p4d', p4d.ix[:, :, 10:, :])
  981. assert_panel4d_equal(store.select('p4d'), p4d)
  982. check_indexers('p4d', indexers)
  983. # same as above, but try to append with differnt axes
  984. _maybe_remove(store, 'p4d')
  985. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  986. store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
  987. 'labels', 'items', 'major_axis'])
  988. assert_panel4d_equal(store.select('p4d'), p4d)
  989. check_indexers('p4d', indexers)
  990. # pass incorrect number of axes
  991. _maybe_remove(store, 'p4d')
  992. self.assertRaises(ValueError, store.append, 'p4d', p4d.ix[
  993. :, :, :10, :], axes=['major_axis', 'minor_axis'])
  994. # different than default indexables #1
  995. indexers = ['labels', 'major_axis', 'minor_axis']
  996. _maybe_remove(store, 'p4d')
  997. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  998. store.append('p4d', p4d.ix[:, :, 10:, :])
  999. assert_panel4d_equal(store['p4d'], p4d)
  1000. check_indexers('p4d', indexers)
  1001. # different than default indexables #2
  1002. indexers = ['major_axis', 'labels', 'minor_axis']
  1003. _maybe_remove(store, 'p4d')
  1004. store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
  1005. store.append('p4d', p4d.ix[:, :, 10:, :])
  1006. assert_panel4d_equal(store['p4d'], p4d)
  1007. check_indexers('p4d', indexers)
  1008. # partial selection
  1009. result = store.select('p4d', ['labels=l1'])
  1010. expected = p4d.reindex(labels=['l1'])
  1011. assert_panel4d_equal(result, expected)
  1012. # partial selection2
  1013. result = store.select('p4d', [Term(
  1014. 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')])
  1015. expected = p4d.reindex(
  1016. labels=['l1'], items=['ItemA'], minor_axis=['B'])
  1017. assert_panel4d_equal(result, expected)
  1018. # non-existant partial selection
  1019. result = store.select('p4d', [Term(
  1020. 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')])
  1021. expected = p4d.reindex(labels=['l1'], items=[],
  1022. minor_axis=['B'])
  1023. assert_panel4d_equal(result, expected)
  1024. def test_append_with_strings(self):
  1025. with ensure_clean_store(self.path) as store:
  1026. wp = tm.makePanel()
  1027. wp2 = wp.rename_axis(
  1028. dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2)
  1029. def check_col(key, name, size):
  1030. self.assertEqual(getattr(store.get_storer(
  1031. key).table.description, name).itemsize, size)
  1032. store.append('s1', wp, min_itemsize=20)
  1033. store.append('s1', wp2)
  1034. expected = concat([wp, wp2], axis=2)
  1035. expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
  1036. assert_panel_equal(store['s1'], expected)
  1037. check_col('s1', 'minor_axis', 20)
  1038. # test dict format
  1039. store.append('s2', wp, min_itemsize={'minor_axis': 20})
  1040. store.append('s2', wp2)
  1041. expected = concat([wp, wp2], axis=2)
  1042. expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
  1043. assert_panel_equal(store['s2'], expected)
  1044. check_col('s2', 'minor_axis', 20)
  1045. # apply the wrong field (similar to #1)
  1046. store.append('s3', wp, min_itemsize={'major_axis': 20})
  1047. self.assertRaises(ValueError, store.append, 's3', wp2)
  1048. # test truncation of bigger strings
  1049. store.append('s4', wp)
  1050. self.assertRaises(ValueError, store.append, 's4', wp2)
  1051. # avoid truncation on elements
  1052. df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
  1053. store.append('df_big', df)
  1054. tm.assert_frame_equal(store.select('df_big'), df)
  1055. check_col('df_big', 'values_block_1', 15)
  1056. # appending smaller string ok
  1057. df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
  1058. store.append('df_big', df2)
  1059. expected = concat([df, df2])
  1060. tm.assert_frame_equal(store.select('df_big'), expected)
  1061. check_col('df_big', 'values_block_1', 15)
  1062. # avoid truncation on elements
  1063. df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
  1064. store.append('df_big2', df, min_itemsize={'values': 50})
  1065. tm.assert_frame_equal(store.select('df_big2'), df)
  1066. check_col('df_big2', 'values_block_1', 50)
  1067. # bigger string on next append
  1068. store.append('df_new', df)
  1069. df_new = DataFrame(
  1070. [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
  1071. self.assertRaises(ValueError, store.append, 'df_new', df_new)
  1072. # with nans
  1073. _maybe_remove(store, 'df')
  1074. df = tm.makeTimeDataFrame()
  1075. df['string'] = 'foo'
  1076. df.ix[1:4, 'string'] = np.nan
  1077. df['string2'] = 'bar'
  1078. df.ix[4:8, 'string2'] = np.nan
  1079. df['string3'] = 'bah'
  1080. df.ix[1:, 'string3'] = np.nan
  1081. store.append('df', df)
  1082. result = store.select('df')
  1083. tm.assert_frame_equal(result, df)
  1084. with ensure_clean_store(self.path) as store:
  1085. def check_col(key, name, size):
  1086. self.assertEqual(getattr(store.get_storer(
  1087. key).table.description, name).itemsize, size)
  1088. d