PageRenderTime 48ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/test_internals.py

http://github.com/wesm/pandas
Python | 1187 lines | 1131 code | 21 blank | 35 comment | 17 complexity | 38df5d3ca11595be644cd8d97a4a3813 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # -*- coding: utf-8 -*-
  2. # pylint: disable=W0102
  3. from datetime import datetime, date
  4. import nose
  5. import numpy as np
  6. import re
  7. import itertools
  8. from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex,
  9. Series, Categorical)
  10. from pandas.compat import OrderedDict, lrange
  11. from pandas.sparse.array import SparseArray
  12. from pandas.core.internals import (BlockPlacement, SingleBlockManager,
  13. make_block, BlockManager)
  14. import pandas.core.algorithms as algos
  15. import pandas.util.testing as tm
  16. import pandas as pd
  17. from pandas import lib
  18. from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
  19. randn, assert_series_equal)
  20. from pandas.compat import zip, u
  21. def assert_block_equal(left, right):
  22. tm.assert_numpy_array_equal(left.values, right.values)
  23. assert (left.dtype == right.dtype)
  24. tm.assertIsInstance(left.mgr_locs, lib.BlockPlacement)
  25. tm.assertIsInstance(right.mgr_locs, lib.BlockPlacement)
  26. tm.assert_numpy_array_equal(left.mgr_locs.as_array,
  27. right.mgr_locs.as_array)
  28. def get_numeric_mat(shape):
  29. arr = np.arange(shape[0])
  30. return np.lib.stride_tricks.as_strided(x=arr, shape=shape, strides=(
  31. arr.itemsize, ) + (0, ) * (len(shape) - 1)).copy()
  32. N = 10
  33. def create_block(typestr, placement, item_shape=None, num_offset=0):
  34. """
  35. Supported typestr:
  36. * float, f8, f4, f2
  37. * int, i8, i4, i2, i1
  38. * uint, u8, u4, u2, u1
  39. * complex, c16, c8
  40. * bool
  41. * object, string, O
  42. * datetime, dt, M8[ns], M8[ns, tz]
  43. * timedelta, td, m8[ns]
  44. * sparse (SparseArray with fill_value=0.0)
  45. * sparse_na (SparseArray with fill_value=np.nan)
  46. * category, category2
  47. """
  48. placement = BlockPlacement(placement)
  49. num_items = len(placement)
  50. if item_shape is None:
  51. item_shape = (N, )
  52. shape = (num_items, ) + item_shape
  53. mat = get_numeric_mat(shape)
  54. if typestr in ('float', 'f8', 'f4', 'f2', 'int', 'i8', 'i4', 'i2', 'i1',
  55. 'uint', 'u8', 'u4', 'u2', 'u1'):
  56. values = mat.astype(typestr) + num_offset
  57. elif typestr in ('complex', 'c16', 'c8'):
  58. values = 1.j * (mat.astype(typestr) + num_offset)
  59. elif typestr in ('object', 'string', 'O'):
  60. values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset],
  61. shape)
  62. elif typestr in ('b', 'bool', ):
  63. values = np.ones(shape, dtype=np.bool_)
  64. elif typestr in ('datetime', 'dt', 'M8[ns]'):
  65. values = (mat * 1e9).astype('M8[ns]')
  66. elif typestr.startswith('M8[ns'):
  67. # datetime with tz
  68. m = re.search('M8\[ns,\s*(\w+\/?\w*)\]', typestr)
  69. assert m is not None, "incompatible typestr -> {0}".format(typestr)
  70. tz = m.groups()[0]
  71. assert num_items == 1, "must have only 1 num items for a tz-aware"
  72. values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
  73. elif typestr in ('timedelta', 'td', 'm8[ns]'):
  74. values = (mat * 1).astype('m8[ns]')
  75. elif typestr in ('category', ):
  76. values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
  77. elif typestr in ('category2', ):
  78. values = Categorical(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd'
  79. ])
  80. elif typestr in ('sparse', 'sparse_na'):
  81. # FIXME: doesn't support num_rows != 10
  82. assert shape[-1] == 10
  83. assert all(s == 1 for s in shape[:-1])
  84. if typestr.endswith('_na'):
  85. fill_value = np.nan
  86. else:
  87. fill_value = 0.0
  88. values = SparseArray([fill_value, fill_value, 1, 2, 3, fill_value,
  89. 4, 5, fill_value, 6], fill_value=fill_value)
  90. arr = values.sp_values.view()
  91. arr += (num_offset - 1)
  92. else:
  93. raise ValueError('Unsupported typestr: "%s"' % typestr)
  94. return make_block(values, placement=placement, ndim=len(shape))
  95. def create_single_mgr(typestr, num_rows=None):
  96. if num_rows is None:
  97. num_rows = N
  98. return SingleBlockManager(
  99. create_block(typestr, placement=slice(0, num_rows), item_shape=()),
  100. np.arange(num_rows))
  101. def create_mgr(descr, item_shape=None):
  102. """
  103. Construct BlockManager from string description.
  104. String description syntax looks similar to np.matrix initializer. It looks
  105. like this::
  106. a,b,c: f8; d,e,f: i8
  107. Rules are rather simple:
  108. * see list of supported datatypes in `create_block` method
  109. * components are semicolon-separated
  110. * each component is `NAME,NAME,NAME: DTYPE_ID`
  111. * whitespace around colons & semicolons are removed
  112. * components with same DTYPE_ID are combined into single block
  113. * to force multiple blocks with same dtype, use '-SUFFIX'::
  114. 'a:f8-1; b:f8-2; c:f8-foobar'
  115. """
  116. if item_shape is None:
  117. item_shape = (N, )
  118. offset = 0
  119. mgr_items = []
  120. block_placements = OrderedDict()
  121. for d in descr.split(';'):
  122. d = d.strip()
  123. if not len(d):
  124. continue
  125. names, blockstr = d.partition(':')[::2]
  126. blockstr = blockstr.strip()
  127. names = names.strip().split(',')
  128. mgr_items.extend(names)
  129. placement = list(np.arange(len(names)) + offset)
  130. try:
  131. block_placements[blockstr].extend(placement)
  132. except KeyError:
  133. block_placements[blockstr] = placement
  134. offset += len(names)
  135. mgr_items = Index(mgr_items)
  136. blocks = []
  137. num_offset = 0
  138. for blockstr, placement in block_placements.items():
  139. typestr = blockstr.split('-')[0]
  140. blocks.append(create_block(typestr,
  141. placement,
  142. item_shape=item_shape,
  143. num_offset=num_offset, ))
  144. num_offset += len(placement)
  145. return BlockManager(sorted(blocks, key=lambda b: b.mgr_locs[0]),
  146. [mgr_items] + [np.arange(n) for n in item_shape])
  147. class TestBlock(tm.TestCase):
  148. _multiprocess_can_split_ = True
  149. def setUp(self):
  150. # self.fblock = get_float_ex() # a,c,e
  151. # self.cblock = get_complex_ex() #
  152. # self.oblock = get_obj_ex()
  153. # self.bool_block = get_bool_ex()
  154. # self.int_block = get_int_ex()
  155. self.fblock = create_block('float', [0, 2, 4])
  156. self.cblock = create_block('complex', [7])
  157. self.oblock = create_block('object', [1, 3])
  158. self.bool_block = create_block('bool', [5])
  159. self.int_block = create_block('int', [6])
  160. def test_constructor(self):
  161. int32block = create_block('i4', [0])
  162. self.assertEqual(int32block.dtype, np.int32)
  163. def test_pickle(self):
  164. def _check(blk):
  165. assert_block_equal(self.round_trip_pickle(blk), blk)
  166. _check(self.fblock)
  167. _check(self.cblock)
  168. _check(self.oblock)
  169. _check(self.bool_block)
  170. def test_mgr_locs(self):
  171. tm.assertIsInstance(self.fblock.mgr_locs, lib.BlockPlacement)
  172. tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array,
  173. np.array([0, 2, 4], dtype=np.int64))
  174. def test_attrs(self):
  175. self.assertEqual(self.fblock.shape, self.fblock.values.shape)
  176. self.assertEqual(self.fblock.dtype, self.fblock.values.dtype)
  177. self.assertEqual(len(self.fblock), len(self.fblock.values))
  178. def test_merge(self):
  179. avals = randn(2, 10)
  180. bvals = randn(2, 10)
  181. ref_cols = Index(['e', 'a', 'b', 'd', 'f'])
  182. ablock = make_block(avals, ref_cols.get_indexer(['e', 'b']))
  183. bblock = make_block(bvals, ref_cols.get_indexer(['a', 'd']))
  184. merged = ablock.merge(bblock)
  185. tm.assert_numpy_array_equal(merged.mgr_locs.as_array,
  186. np.array([0, 1, 2, 3], dtype=np.int64))
  187. tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals))
  188. tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals))
  189. # TODO: merge with mixed type?
  190. def test_copy(self):
  191. cop = self.fblock.copy()
  192. self.assertIsNot(cop, self.fblock)
  193. assert_block_equal(self.fblock, cop)
  194. def test_reindex_index(self):
  195. pass
  196. def test_reindex_cast(self):
  197. pass
  198. def test_insert(self):
  199. pass
  200. def test_delete(self):
  201. newb = self.fblock.copy()
  202. newb.delete(0)
  203. tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement)
  204. tm.assert_numpy_array_equal(newb.mgr_locs.as_array,
  205. np.array([2, 4], dtype=np.int64))
  206. self.assertTrue((newb.values[0] == 1).all())
  207. newb = self.fblock.copy()
  208. newb.delete(1)
  209. tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement)
  210. tm.assert_numpy_array_equal(newb.mgr_locs.as_array,
  211. np.array([0, 4], dtype=np.int64))
  212. self.assertTrue((newb.values[1] == 2).all())
  213. newb = self.fblock.copy()
  214. newb.delete(2)
  215. tm.assert_numpy_array_equal(newb.mgr_locs.as_array,
  216. np.array([0, 2], dtype=np.int64))
  217. self.assertTrue((newb.values[1] == 1).all())
  218. newb = self.fblock.copy()
  219. self.assertRaises(Exception, newb.delete, 3)
  220. def test_split_block_at(self):
  221. # with dup column support this method was taken out
  222. # GH3679
  223. raise nose.SkipTest("skipping for now")
  224. bs = list(self.fblock.split_block_at('a'))
  225. self.assertEqual(len(bs), 1)
  226. self.assertTrue(np.array_equal(bs[0].items, ['c', 'e']))
  227. bs = list(self.fblock.split_block_at('c'))
  228. self.assertEqual(len(bs), 2)
  229. self.assertTrue(np.array_equal(bs[0].items, ['a']))
  230. self.assertTrue(np.array_equal(bs[1].items, ['e']))
  231. bs = list(self.fblock.split_block_at('e'))
  232. self.assertEqual(len(bs), 1)
  233. self.assertTrue(np.array_equal(bs[0].items, ['a', 'c']))
  234. # bblock = get_bool_ex(['f'])
  235. # bs = list(bblock.split_block_at('f'))
  236. # self.assertEqual(len(bs), 0)
  237. class TestDatetimeBlock(tm.TestCase):
  238. _multiprocess_can_split_ = True
  239. def test_try_coerce_arg(self):
  240. block = create_block('datetime', [0])
  241. # coerce None
  242. none_coerced = block._try_coerce_args(block.values, None)[2]
  243. self.assertTrue(pd.Timestamp(none_coerced) is pd.NaT)
  244. # coerce different types of date bojects
  245. vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10),
  246. date(2010, 10, 10))
  247. for val in vals:
  248. coerced = block._try_coerce_args(block.values, val)[2]
  249. self.assertEqual(np.int64, type(coerced))
  250. self.assertEqual(pd.Timestamp('2010-10-10'), pd.Timestamp(coerced))
  251. class TestBlockManager(tm.TestCase):
  252. _multiprocess_can_split_ = True
  253. def setUp(self):
  254. self.mgr = create_mgr(
  255. 'a: f8; b: object; c: f8; d: object; e: f8;'
  256. 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
  257. 'k: M8[ns, US/Eastern]; l: M8[ns, CET];')
  258. def test_constructor_corner(self):
  259. pass
  260. def test_attrs(self):
  261. mgr = create_mgr('a,b,c: f8-1; d,e,f: f8-2')
  262. self.assertEqual(mgr.nblocks, 2)
  263. self.assertEqual(len(mgr), 6)
  264. def test_is_mixed_dtype(self):
  265. self.assertFalse(create_mgr('a,b:f8').is_mixed_type)
  266. self.assertFalse(create_mgr('a:f8-1; b:f8-2').is_mixed_type)
  267. self.assertTrue(create_mgr('a,b:f8; c,d: f4').is_mixed_type)
  268. self.assertTrue(create_mgr('a,b:f8; c,d: object').is_mixed_type)
  269. def test_is_indexed_like(self):
  270. mgr1 = create_mgr('a,b: f8')
  271. mgr2 = create_mgr('a:i8; b:bool')
  272. mgr3 = create_mgr('a,b,c: f8')
  273. self.assertTrue(mgr1._is_indexed_like(mgr1))
  274. self.assertTrue(mgr1._is_indexed_like(mgr2))
  275. self.assertTrue(mgr1._is_indexed_like(mgr3))
  276. self.assertFalse(mgr1._is_indexed_like(mgr1.get_slice(
  277. slice(-1), axis=1)))
  278. def test_duplicate_ref_loc_failure(self):
  279. tmp_mgr = create_mgr('a:bool; a: f8')
  280. axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
  281. blocks[0].mgr_locs = np.array([0])
  282. blocks[1].mgr_locs = np.array([0])
  283. # test trying to create block manager with overlapping ref locs
  284. self.assertRaises(AssertionError, BlockManager, blocks, axes)
  285. blocks[0].mgr_locs = np.array([0])
  286. blocks[1].mgr_locs = np.array([1])
  287. mgr = BlockManager(blocks, axes)
  288. mgr.iget(1)
  289. def test_contains(self):
  290. self.assertIn('a', self.mgr)
  291. self.assertNotIn('baz', self.mgr)
  292. def test_pickle(self):
  293. mgr2 = self.round_trip_pickle(self.mgr)
  294. assert_frame_equal(DataFrame(self.mgr), DataFrame(mgr2))
  295. # share ref_items
  296. # self.assertIs(mgr2.blocks[0].ref_items, mgr2.blocks[1].ref_items)
  297. # GH2431
  298. self.assertTrue(hasattr(mgr2, "_is_consolidated"))
  299. self.assertTrue(hasattr(mgr2, "_known_consolidated"))
  300. # reset to False on load
  301. self.assertFalse(mgr2._is_consolidated)
  302. self.assertFalse(mgr2._known_consolidated)
  303. def test_non_unique_pickle(self):
  304. mgr = create_mgr('a,a,a:f8')
  305. mgr2 = self.round_trip_pickle(mgr)
  306. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  307. mgr = create_mgr('a: f8; a: i8')
  308. mgr2 = self.round_trip_pickle(mgr)
  309. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  310. def test_categorical_block_pickle(self):
  311. mgr = create_mgr('a: category')
  312. mgr2 = self.round_trip_pickle(mgr)
  313. assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  314. smgr = create_single_mgr('category')
  315. smgr2 = self.round_trip_pickle(smgr)
  316. assert_series_equal(Series(smgr), Series(smgr2))
  317. def test_get_scalar(self):
  318. for item in self.mgr.items:
  319. for i, index in enumerate(self.mgr.axes[1]):
  320. res = self.mgr.get_scalar((item, index))
  321. exp = self.mgr.get(item, fastpath=False)[i]
  322. self.assertEqual(res, exp)
  323. exp = self.mgr.get(item).internal_values()[i]
  324. self.assertEqual(res, exp)
  325. def test_get(self):
  326. cols = Index(list('abc'))
  327. values = np.random.rand(3, 3)
  328. block = make_block(values=values.copy(), placement=np.arange(3))
  329. mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])
  330. assert_almost_equal(mgr.get('a', fastpath=False), values[0])
  331. assert_almost_equal(mgr.get('b', fastpath=False), values[1])
  332. assert_almost_equal(mgr.get('c', fastpath=False), values[2])
  333. assert_almost_equal(mgr.get('a').internal_values(), values[0])
  334. assert_almost_equal(mgr.get('b').internal_values(), values[1])
  335. assert_almost_equal(mgr.get('c').internal_values(), values[2])
  336. def test_set(self):
  337. mgr = create_mgr('a,b,c: int', item_shape=(3, ))
  338. mgr.set('d', np.array(['foo'] * 3))
  339. mgr.set('b', np.array(['bar'] * 3))
  340. tm.assert_numpy_array_equal(mgr.get('a').internal_values(),
  341. np.array([0] * 3))
  342. tm.assert_numpy_array_equal(mgr.get('b').internal_values(),
  343. np.array(['bar'] * 3, dtype=np.object_))
  344. tm.assert_numpy_array_equal(mgr.get('c').internal_values(),
  345. np.array([2] * 3))
  346. tm.assert_numpy_array_equal(mgr.get('d').internal_values(),
  347. np.array(['foo'] * 3, dtype=np.object_))
  348. def test_insert(self):
  349. self.mgr.insert(0, 'inserted', np.arange(N))
  350. self.assertEqual(self.mgr.items[0], 'inserted')
  351. assert_almost_equal(self.mgr.get('inserted'), np.arange(N))
  352. for blk in self.mgr.blocks:
  353. yield self.assertIs, self.mgr.items, blk.ref_items
  354. def test_set_change_dtype(self):
  355. self.mgr.set('baz', np.zeros(N, dtype=bool))
  356. self.mgr.set('baz', np.repeat('foo', N))
  357. self.assertEqual(self.mgr.get('baz').dtype, np.object_)
  358. mgr2 = self.mgr.consolidate()
  359. mgr2.set('baz', np.repeat('foo', N))
  360. self.assertEqual(mgr2.get('baz').dtype, np.object_)
  361. mgr2.set('quux', randn(N).astype(int))
  362. self.assertEqual(mgr2.get('quux').dtype, np.int_)
  363. mgr2.set('quux', randn(N))
  364. self.assertEqual(mgr2.get('quux').dtype, np.float_)
  365. def test_set_change_dtype_slice(self): # GH8850
  366. cols = MultiIndex.from_tuples([('1st', 'a'), ('2nd', 'b'), ('3rd', 'c')
  367. ])
  368. df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
  369. df['2nd'] = df['2nd'] * 2.0
  370. self.assertEqual(sorted(df.blocks.keys()), ['float64', 'int64'])
  371. assert_frame_equal(df.blocks['float64'], DataFrame(
  372. [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]))
  373. assert_frame_equal(df.blocks['int64'], DataFrame(
  374. [[3], [6]], columns=cols[2:]))
  375. def test_copy(self):
  376. cp = self.mgr.copy(deep=False)
  377. for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
  378. # view assertion
  379. self.assertTrue(cp_blk.equals(blk))
  380. self.assertTrue(cp_blk.values.base is blk.values.base)
  381. cp = self.mgr.copy(deep=True)
  382. for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
  383. # copy assertion we either have a None for a base or in case of
  384. # some blocks it is an array (e.g. datetimetz), but was copied
  385. self.assertTrue(cp_blk.equals(blk))
  386. if cp_blk.values.base is not None and blk.values.base is not None:
  387. self.assertFalse(cp_blk.values.base is blk.values.base)
  388. else:
  389. self.assertTrue(cp_blk.values.base is None and blk.values.base
  390. is None)
  391. def test_sparse(self):
  392. mgr = create_mgr('a: sparse-1; b: sparse-2')
  393. # what to test here?
  394. self.assertEqual(mgr.as_matrix().dtype, np.float64)
  395. def test_sparse_mixed(self):
  396. mgr = create_mgr('a: sparse-1; b: sparse-2; c: f8')
  397. self.assertEqual(len(mgr.blocks), 3)
  398. self.assertIsInstance(mgr, BlockManager)
  399. # what to test here?
  400. def test_as_matrix_float(self):
  401. mgr = create_mgr('c: f4; d: f2; e: f8')
  402. self.assertEqual(mgr.as_matrix().dtype, np.float64)
  403. mgr = create_mgr('c: f4; d: f2')
  404. self.assertEqual(mgr.as_matrix().dtype, np.float32)
  405. def test_as_matrix_int_bool(self):
  406. mgr = create_mgr('a: bool-1; b: bool-2')
  407. self.assertEqual(mgr.as_matrix().dtype, np.bool_)
  408. mgr = create_mgr('a: i8-1; b: i8-2; c: i4; d: i2; e: u1')
  409. self.assertEqual(mgr.as_matrix().dtype, np.int64)
  410. mgr = create_mgr('c: i4; d: i2; e: u1')
  411. self.assertEqual(mgr.as_matrix().dtype, np.int32)
  412. def test_as_matrix_datetime(self):
  413. mgr = create_mgr('h: datetime-1; g: datetime-2')
  414. self.assertEqual(mgr.as_matrix().dtype, 'M8[ns]')
  415. def test_as_matrix_datetime_tz(self):
  416. mgr = create_mgr('h: M8[ns, US/Eastern]; g: M8[ns, CET]')
  417. self.assertEqual(mgr.get('h').dtype, 'datetime64[ns, US/Eastern]')
  418. self.assertEqual(mgr.get('g').dtype, 'datetime64[ns, CET]')
  419. self.assertEqual(mgr.as_matrix().dtype, 'object')
  420. def test_astype(self):
  421. # coerce all
  422. mgr = create_mgr('c: f4; d: f2; e: f8')
  423. for t in ['float16', 'float32', 'float64', 'int32', 'int64']:
  424. t = np.dtype(t)
  425. tmgr = mgr.astype(t)
  426. self.assertEqual(tmgr.get('c').dtype.type, t)
  427. self.assertEqual(tmgr.get('d').dtype.type, t)
  428. self.assertEqual(tmgr.get('e').dtype.type, t)
  429. # mixed
  430. mgr = create_mgr('a,b: object; c: bool; d: datetime;'
  431. 'e: f4; f: f2; g: f8')
  432. for t in ['float16', 'float32', 'float64', 'int32', 'int64']:
  433. t = np.dtype(t)
  434. tmgr = mgr.astype(t, raise_on_error=False)
  435. self.assertEqual(tmgr.get('c').dtype.type, t)
  436. self.assertEqual(tmgr.get('e').dtype.type, t)
  437. self.assertEqual(tmgr.get('f').dtype.type, t)
  438. self.assertEqual(tmgr.get('g').dtype.type, t)
  439. self.assertEqual(tmgr.get('a').dtype.type, np.object_)
  440. self.assertEqual(tmgr.get('b').dtype.type, np.object_)
  441. if t != np.int64:
  442. self.assertEqual(tmgr.get('d').dtype.type, np.datetime64)
  443. else:
  444. self.assertEqual(tmgr.get('d').dtype.type, t)
  445. def test_convert(self):
  446. def _compare(old_mgr, new_mgr):
  447. """ compare the blocks, numeric compare ==, object don't """
  448. old_blocks = set(old_mgr.blocks)
  449. new_blocks = set(new_mgr.blocks)
  450. self.assertEqual(len(old_blocks), len(new_blocks))
  451. # compare non-numeric
  452. for b in old_blocks:
  453. found = False
  454. for nb in new_blocks:
  455. if (b.values == nb.values).all():
  456. found = True
  457. break
  458. self.assertTrue(found)
  459. for b in new_blocks:
  460. found = False
  461. for ob in old_blocks:
  462. if (b.values == ob.values).all():
  463. found = True
  464. break
  465. self.assertTrue(found)
  466. # noops
  467. mgr = create_mgr('f: i8; g: f8')
  468. new_mgr = mgr.convert()
  469. _compare(mgr, new_mgr)
  470. mgr = create_mgr('a, b: object; f: i8; g: f8')
  471. new_mgr = mgr.convert()
  472. _compare(mgr, new_mgr)
  473. # convert
  474. mgr = create_mgr('a,b,foo: object; f: i8; g: f8')
  475. mgr.set('a', np.array(['1'] * N, dtype=np.object_))
  476. mgr.set('b', np.array(['2.'] * N, dtype=np.object_))
  477. mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_))
  478. new_mgr = mgr.convert(numeric=True)
  479. self.assertEqual(new_mgr.get('a').dtype, np.int64)
  480. self.assertEqual(new_mgr.get('b').dtype, np.float64)
  481. self.assertEqual(new_mgr.get('foo').dtype, np.object_)
  482. self.assertEqual(new_mgr.get('f').dtype, np.int64)
  483. self.assertEqual(new_mgr.get('g').dtype, np.float64)
  484. mgr = create_mgr('a,b,foo: object; f: i4; bool: bool; dt: datetime;'
  485. 'i: i8; g: f8; h: f2')
  486. mgr.set('a', np.array(['1'] * N, dtype=np.object_))
  487. mgr.set('b', np.array(['2.'] * N, dtype=np.object_))
  488. mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_))
  489. new_mgr = mgr.convert(numeric=True)
  490. self.assertEqual(new_mgr.get('a').dtype, np.int64)
  491. self.assertEqual(new_mgr.get('b').dtype, np.float64)
  492. self.assertEqual(new_mgr.get('foo').dtype, np.object_)
  493. self.assertEqual(new_mgr.get('f').dtype, np.int32)
  494. self.assertEqual(new_mgr.get('bool').dtype, np.bool_)
  495. self.assertEqual(new_mgr.get('dt').dtype.type, np.datetime64)
  496. self.assertEqual(new_mgr.get('i').dtype, np.int64)
  497. self.assertEqual(new_mgr.get('g').dtype, np.float64)
  498. self.assertEqual(new_mgr.get('h').dtype, np.float16)
  499. def test_interleave(self):
  500. # self
  501. for dtype in ['f8', 'i8', 'object', 'bool', 'complex', 'M8[ns]',
  502. 'm8[ns]']:
  503. mgr = create_mgr('a: {0}'.format(dtype))
  504. self.assertEqual(mgr.as_matrix().dtype, dtype)
  505. mgr = create_mgr('a: {0}; b: {0}'.format(dtype))
  506. self.assertEqual(mgr.as_matrix().dtype, dtype)
  507. # will be converted according the actual dtype of the underlying
  508. mgr = create_mgr('a: category')
  509. self.assertEqual(mgr.as_matrix().dtype, 'i8')
  510. mgr = create_mgr('a: category; b: category')
  511. self.assertEqual(mgr.as_matrix().dtype, 'i8'),
  512. mgr = create_mgr('a: category; b: category2')
  513. self.assertEqual(mgr.as_matrix().dtype, 'object')
  514. mgr = create_mgr('a: category2')
  515. self.assertEqual(mgr.as_matrix().dtype, 'object')
  516. mgr = create_mgr('a: category2; b: category2')
  517. self.assertEqual(mgr.as_matrix().dtype, 'object')
  518. # combinations
  519. mgr = create_mgr('a: f8')
  520. self.assertEqual(mgr.as_matrix().dtype, 'f8')
  521. mgr = create_mgr('a: f8; b: i8')
  522. self.assertEqual(mgr.as_matrix().dtype, 'f8')
  523. mgr = create_mgr('a: f4; b: i8')
  524. self.assertEqual(mgr.as_matrix().dtype, 'f4')
  525. mgr = create_mgr('a: f4; b: i8; d: object')
  526. self.assertEqual(mgr.as_matrix().dtype, 'object')
  527. mgr = create_mgr('a: bool; b: i8')
  528. self.assertEqual(mgr.as_matrix().dtype, 'object')
  529. mgr = create_mgr('a: complex')
  530. self.assertEqual(mgr.as_matrix().dtype, 'complex')
  531. mgr = create_mgr('a: f8; b: category')
  532. self.assertEqual(mgr.as_matrix().dtype, 'object')
  533. mgr = create_mgr('a: M8[ns]; b: category')
  534. self.assertEqual(mgr.as_matrix().dtype, 'object')
  535. mgr = create_mgr('a: M8[ns]; b: bool')
  536. self.assertEqual(mgr.as_matrix().dtype, 'object')
  537. mgr = create_mgr('a: M8[ns]; b: i8')
  538. self.assertEqual(mgr.as_matrix().dtype, 'object')
  539. mgr = create_mgr('a: m8[ns]; b: bool')
  540. self.assertEqual(mgr.as_matrix().dtype, 'object')
  541. mgr = create_mgr('a: m8[ns]; b: i8')
  542. self.assertEqual(mgr.as_matrix().dtype, 'object')
  543. mgr = create_mgr('a: M8[ns]; b: m8[ns]')
  544. self.assertEqual(mgr.as_matrix().dtype, 'object')
  545. def test_interleave_non_unique_cols(self):
  546. df = DataFrame([
  547. [pd.Timestamp('20130101'), 3.5],
  548. [pd.Timestamp('20130102'), 4.5]],
  549. columns=['x', 'x'],
  550. index=[1, 2])
  551. df_unique = df.copy()
  552. df_unique.columns = ['x', 'y']
  553. self.assertEqual(df_unique.values.shape, df.values.shape)
  554. tm.assert_numpy_array_equal(df_unique.values[0], df.values[0])
  555. tm.assert_numpy_array_equal(df_unique.values[1], df.values[1])
  556. def test_consolidate(self):
  557. pass
  558. def test_consolidate_ordering_issues(self):
  559. self.mgr.set('f', randn(N))
  560. self.mgr.set('d', randn(N))
  561. self.mgr.set('b', randn(N))
  562. self.mgr.set('g', randn(N))
  563. self.mgr.set('h', randn(N))
  564. # we have datetime/tz blocks in self.mgr
  565. cons = self.mgr.consolidate()
  566. self.assertEqual(cons.nblocks, 4)
  567. cons = self.mgr.consolidate().get_numeric_data()
  568. self.assertEqual(cons.nblocks, 1)
  569. tm.assertIsInstance(cons.blocks[0].mgr_locs, lib.BlockPlacement)
  570. tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array,
  571. np.arange(len(cons.items), dtype=np.int64))
  572. def test_reindex_index(self):
  573. pass
  574. def test_reindex_items(self):
  575. # mgr is not consolidated, f8 & f8-2 blocks
  576. mgr = create_mgr('a: f8; b: i8; c: f8; d: i8; e: f8;'
  577. 'f: bool; g: f8-2')
  578. reindexed = mgr.reindex_axis(['g', 'c', 'a', 'd'], axis=0)
  579. self.assertEqual(reindexed.nblocks, 2)
  580. tm.assert_index_equal(reindexed.items, pd.Index(['g', 'c', 'a', 'd']))
  581. assert_almost_equal(
  582. mgr.get('g', fastpath=False), reindexed.get('g', fastpath=False))
  583. assert_almost_equal(
  584. mgr.get('c', fastpath=False), reindexed.get('c', fastpath=False))
  585. assert_almost_equal(
  586. mgr.get('a', fastpath=False), reindexed.get('a', fastpath=False))
  587. assert_almost_equal(
  588. mgr.get('d', fastpath=False), reindexed.get('d', fastpath=False))
  589. assert_almost_equal(
  590. mgr.get('g').internal_values(),
  591. reindexed.get('g').internal_values())
  592. assert_almost_equal(
  593. mgr.get('c').internal_values(),
  594. reindexed.get('c').internal_values())
  595. assert_almost_equal(
  596. mgr.get('a').internal_values(),
  597. reindexed.get('a').internal_values())
  598. assert_almost_equal(
  599. mgr.get('d').internal_values(),
  600. reindexed.get('d').internal_values())
  601. def test_multiindex_xs(self):
  602. mgr = create_mgr('a,b,c: f8; d,e,f: i8')
  603. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
  604. 'three']],
  605. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  606. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  607. names=['first', 'second'])
  608. mgr.set_axis(1, index)
  609. result = mgr.xs('bar', axis=1)
  610. self.assertEqual(result.shape, (6, 2))
  611. self.assertEqual(result.axes[1][0], ('bar', 'one'))
  612. self.assertEqual(result.axes[1][1], ('bar', 'two'))
  613. def test_get_numeric_data(self):
  614. mgr = create_mgr('int: int; float: float; complex: complex;'
  615. 'str: object; bool: bool; obj: object; dt: datetime',
  616. item_shape=(3, ))
  617. mgr.set('obj', np.array([1, 2, 3], dtype=np.object_))
  618. numeric = mgr.get_numeric_data()
  619. tm.assert_index_equal(numeric.items,
  620. pd.Index(['int', 'float', 'complex', 'bool']))
  621. assert_almost_equal(
  622. mgr.get('float', fastpath=False), numeric.get('float',
  623. fastpath=False))
  624. assert_almost_equal(
  625. mgr.get('float').internal_values(),
  626. numeric.get('float').internal_values())
  627. # Check sharing
  628. numeric.set('float', np.array([100., 200., 300.]))
  629. assert_almost_equal(
  630. mgr.get('float', fastpath=False), np.array([100., 200., 300.]))
  631. assert_almost_equal(
  632. mgr.get('float').internal_values(), np.array([100., 200., 300.]))
  633. numeric2 = mgr.get_numeric_data(copy=True)
  634. tm.assert_index_equal(numeric.items,
  635. pd.Index(['int', 'float', 'complex', 'bool']))
  636. numeric2.set('float', np.array([1000., 2000., 3000.]))
  637. assert_almost_equal(
  638. mgr.get('float', fastpath=False), np.array([100., 200., 300.]))
  639. assert_almost_equal(
  640. mgr.get('float').internal_values(), np.array([100., 200., 300.]))
  641. def test_get_bool_data(self):
  642. mgr = create_mgr('int: int; float: float; complex: complex;'
  643. 'str: object; bool: bool; obj: object; dt: datetime',
  644. item_shape=(3, ))
  645. mgr.set('obj', np.array([True, False, True], dtype=np.object_))
  646. bools = mgr.get_bool_data()
  647. tm.assert_index_equal(bools.items, pd.Index(['bool']))
  648. assert_almost_equal(mgr.get('bool', fastpath=False),
  649. bools.get('bool', fastpath=False))
  650. assert_almost_equal(
  651. mgr.get('bool').internal_values(),
  652. bools.get('bool').internal_values())
  653. bools.set('bool', np.array([True, False, True]))
  654. tm.assert_numpy_array_equal(mgr.get('bool', fastpath=False),
  655. np.array([True, False, True]))
  656. tm.assert_numpy_array_equal(mgr.get('bool').internal_values(),
  657. np.array([True, False, True]))
  658. # Check sharing
  659. bools2 = mgr.get_bool_data(copy=True)
  660. bools2.set('bool', np.array([False, True, False]))
  661. tm.assert_numpy_array_equal(mgr.get('bool', fastpath=False),
  662. np.array([True, False, True]))
  663. tm.assert_numpy_array_equal(mgr.get('bool').internal_values(),
  664. np.array([True, False, True]))
  665. def test_unicode_repr_doesnt_raise(self):
  666. repr(create_mgr(u('b,\u05d0: object')))
  667. def test_missing_unicode_key(self):
  668. df = DataFrame({"a": [1]})
  669. try:
  670. df.ix[:, u("\u05d0")] # should not raise UnicodeEncodeError
  671. except KeyError:
  672. pass # this is the expected exception
  673. def test_equals(self):
  674. # unique items
  675. bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
  676. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  677. self.assertTrue(bm1.equals(bm2))
  678. bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
  679. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  680. self.assertTrue(bm1.equals(bm2))
  681. def test_equals_block_order_different_dtypes(self):
  682. # GH 9330
  683. mgr_strings = [
  684. "a:i8;b:f8", # basic case
  685. "a:i8;b:f8;c:c8;d:b", # many types
  686. "a:i8;e:dt;f:td;g:string", # more types
  687. "a:i8;b:category;c:category2;d:category2", # categories
  688. "c:sparse;d:sparse_na;b:f8", # sparse
  689. ]
  690. for mgr_string in mgr_strings:
  691. bm = create_mgr(mgr_string)
  692. block_perms = itertools.permutations(bm.blocks)
  693. for bm_perm in block_perms:
  694. bm_this = BlockManager(bm_perm, bm.axes)
  695. self.assertTrue(bm.equals(bm_this))
  696. self.assertTrue(bm_this.equals(bm))
  697. def test_single_mgr_ctor(self):
  698. mgr = create_single_mgr('f8', num_rows=5)
  699. self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.])
  700. class TestIndexing(object):
  701. # Nosetests-style data-driven tests.
  702. #
  703. # This test applies different indexing routines to block managers and
  704. # compares the outcome to the result of same operations on np.ndarray.
  705. #
  706. # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests
  707. # and are disabled.
  708. MANAGERS = [
  709. create_single_mgr('f8', N),
  710. create_single_mgr('i8', N),
  711. # create_single_mgr('sparse', N),
  712. create_single_mgr('sparse_na', N),
  713. # 2-dim
  714. create_mgr('a,b,c,d,e,f: f8', item_shape=(N,)),
  715. create_mgr('a,b,c,d,e,f: i8', item_shape=(N,)),
  716. create_mgr('a,b: f8; c,d: i8; e,f: string', item_shape=(N,)),
  717. create_mgr('a,b: f8; c,d: i8; e,f: f8', item_shape=(N,)),
  718. # create_mgr('a: sparse', item_shape=(N,)),
  719. create_mgr('a: sparse_na', item_shape=(N,)),
  720. # 3-dim
  721. create_mgr('a,b,c,d,e,f: f8', item_shape=(N, N)),
  722. create_mgr('a,b,c,d,e,f: i8', item_shape=(N, N)),
  723. create_mgr('a,b: f8; c,d: i8; e,f: string', item_shape=(N, N)),
  724. create_mgr('a,b: f8; c,d: i8; e,f: f8', item_shape=(N, N)),
  725. # create_mgr('a: sparse', item_shape=(1, N)),
  726. ]
  727. # MANAGERS = [MANAGERS[6]]
  728. def test_get_slice(self):
  729. def assert_slice_ok(mgr, axis, slobj):
  730. # import pudb; pudb.set_trace()
  731. mat = mgr.as_matrix()
  732. # we maybe using an ndarray to test slicing and
  733. # might not be the full length of the axis
  734. if isinstance(slobj, np.ndarray):
  735. ax = mgr.axes[axis]
  736. if len(ax) and len(slobj) and len(slobj) != len(ax):
  737. slobj = np.concatenate([slobj, np.zeros(
  738. len(ax) - len(slobj), dtype=bool)])
  739. sliced = mgr.get_slice(slobj, axis=axis)
  740. mat_slobj = (slice(None), ) * axis + (slobj, )
  741. tm.assert_numpy_array_equal(mat[mat_slobj], sliced.as_matrix(),
  742. check_dtype=False)
  743. tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis])
  744. for mgr in self.MANAGERS:
  745. for ax in range(mgr.ndim):
  746. # slice
  747. yield assert_slice_ok, mgr, ax, slice(None)
  748. yield assert_slice_ok, mgr, ax, slice(3)
  749. yield assert_slice_ok, mgr, ax, slice(100)
  750. yield assert_slice_ok, mgr, ax, slice(1, 4)
  751. yield assert_slice_ok, mgr, ax, slice(3, 0, -2)
  752. # boolean mask
  753. yield assert_slice_ok, mgr, ax, np.array([], dtype=np.bool_)
  754. yield (assert_slice_ok, mgr, ax,
  755. np.ones(mgr.shape[ax], dtype=np.bool_))
  756. yield (assert_slice_ok, mgr, ax,
  757. np.zeros(mgr.shape[ax], dtype=np.bool_))
  758. if mgr.shape[ax] >= 3:
  759. yield (assert_slice_ok, mgr, ax,
  760. np.arange(mgr.shape[ax]) % 3 == 0)
  761. yield (assert_slice_ok, mgr, ax, np.array(
  762. [True, True, False], dtype=np.bool_))
  763. # fancy indexer
  764. yield assert_slice_ok, mgr, ax, []
  765. yield assert_slice_ok, mgr, ax, lrange(mgr.shape[ax])
  766. if mgr.shape[ax] >= 3:
  767. yield assert_slice_ok, mgr, ax, [0, 1, 2]
  768. yield assert_slice_ok, mgr, ax, [-1, -2, -3]
  769. def test_take(self):
  770. def assert_take_ok(mgr, axis, indexer):
  771. mat = mgr.as_matrix()
  772. taken = mgr.take(indexer, axis)
  773. tm.assert_numpy_array_equal(np.take(mat, indexer, axis),
  774. taken.as_matrix(), check_dtype=False)
  775. tm.assert_index_equal(mgr.axes[axis].take(indexer),
  776. taken.axes[axis])
  777. for mgr in self.MANAGERS:
  778. for ax in range(mgr.ndim):
  779. # take/fancy indexer
  780. yield assert_take_ok, mgr, ax, []
  781. yield assert_take_ok, mgr, ax, [0, 0, 0]
  782. yield assert_take_ok, mgr, ax, lrange(mgr.shape[ax])
  783. if mgr.shape[ax] >= 3:
  784. yield assert_take_ok, mgr, ax, [0, 1, 2]
  785. yield assert_take_ok, mgr, ax, [-1, -2, -3]
  786. def test_reindex_axis(self):
  787. def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
  788. mat = mgr.as_matrix()
  789. indexer = mgr.axes[axis].get_indexer_for(new_labels)
  790. reindexed = mgr.reindex_axis(new_labels, axis,
  791. fill_value=fill_value)
  792. tm.assert_numpy_array_equal(algos.take_nd(mat, indexer, axis,
  793. fill_value=fill_value),
  794. reindexed.as_matrix(),
  795. check_dtype=False)
  796. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  797. for mgr in self.MANAGERS:
  798. for ax in range(mgr.ndim):
  799. for fill_value in (None, np.nan, 100.):
  800. yield (assert_reindex_axis_is_ok, mgr, ax,
  801. pd.Index([]), fill_value)
  802. yield (assert_reindex_axis_is_ok, mgr, ax, mgr.axes[ax],
  803. fill_value)
  804. yield (assert_reindex_axis_is_ok, mgr, ax,
  805. mgr.axes[ax][[0, 0, 0]], fill_value)
  806. yield (assert_reindex_axis_is_ok, mgr, ax,
  807. pd.Index(['foo', 'bar', 'baz']), fill_value)
  808. yield (assert_reindex_axis_is_ok, mgr, ax,
  809. pd.Index(['foo', mgr.axes[ax][0], 'baz']),
  810. fill_value)
  811. if mgr.shape[ax] >= 3:
  812. yield (assert_reindex_axis_is_ok, mgr, ax,
  813. mgr.axes[ax][:-3], fill_value)
  814. yield (assert_reindex_axis_is_ok, mgr, ax,
  815. mgr.axes[ax][-3::-1], fill_value)
  816. yield (assert_reindex_axis_is_ok, mgr, ax,
  817. mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value)
  818. def test_reindex_indexer(self):
  819. def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer,
  820. fill_value):
  821. mat = mgr.as_matrix()
  822. reindexed_mat = algos.take_nd(mat, indexer, axis,
  823. fill_value=fill_value)
  824. reindexed = mgr.reindex_indexer(new_labels, indexer, axis,
  825. fill_value=fill_value)
  826. tm.assert_numpy_array_equal(reindexed_mat,
  827. reindexed.as_matrix(),
  828. check_dtype=False)
  829. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  830. for mgr in self.MANAGERS:
  831. for ax in range(mgr.ndim):
  832. for fill_value in (None, np.nan, 100.):
  833. yield (assert_reindex_indexer_is_ok, mgr, ax,
  834. pd.Index([]), [], fill_value)
  835. yield (assert_reindex_indexer_is_ok, mgr, ax,
  836. mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value)
  837. yield (assert_reindex_indexer_is_ok, mgr, ax,
  838. pd.Index(['foo'] * mgr.shape[ax]),
  839. np.arange(mgr.shape[ax]), fill_value)
  840. yield (assert_reindex_indexer_is_ok, mgr, ax,
  841. mgr.axes[ax][::-1], np.arange(mgr.shape[ax]),
  842. fill_value)
  843. yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax],
  844. np.arange(mgr.shape[ax])[::-1], fill_value)
  845. yield (assert_reindex_indexer_is_ok, mgr, ax,
  846. pd.Index(['foo', 'bar', 'baz']),
  847. [0, 0, 0], fill_value)
  848. yield (assert_reindex_indexer_is_ok, mgr, ax,
  849. pd.Index(['foo', 'bar', 'baz']),
  850. [-1, 0, -1], fill_value)
  851. yield (assert_reindex_indexer_is_ok, mgr, ax,
  852. pd.Index(['foo', mgr.axes[ax][0], 'baz']),
  853. [-1, -1, -1], fill_value)
  854. if mgr.shape[ax] >= 3:
  855. yield (assert_reindex_indexer_is_ok, mgr, ax,
  856. pd.Index(['foo', 'bar', 'baz']),
  857. [0, 1, 2], fill_value)
  858. # test_get_slice(slice_like, axis)
  859. # take(indexer, axis)
  860. # reindex_axis(new_labels, axis)
  861. # reindex_indexer(new_labels, indexer, axis)
  862. class TestBlockPlacement(tm.TestCase):
  863. _multiprocess_can_split_ = True
  864. def test_slice_len(self):
  865. self.assertEqual(len(BlockPlacement(slice(0, 4))), 4)
  866. self.assertEqual(len(BlockPlacement(slice(0, 4, 2))), 2)
  867. self.assertEqual(len(BlockPlacement(slice(0, 3, 2))), 2)
  868. self.assertEqual(len(BlockPlacement(slice(0, 1, 2))), 1)
  869. self.assertEqual(len(BlockPlacement(slice(1, 0, -1))), 1)
  870. def test_zero_step_raises(self):
  871. self.assertRaises(ValueError, BlockPlacement, slice(1, 1, 0))
  872. self.assertRaises(ValueError, BlockPlacement, slice(1, 2, 0))
  873. def test_unbounded_slice_raises(self):
  874. def assert_unbounded_slice_error(slc):
  875. self.assertRaisesRegexp(ValueError, "unbounded slice",
  876. lambda: BlockPlacement(slc))
  877. assert_unbounded_slice_error(slice(None, None))
  878. assert_unbounded_slice_error(slice(10, None))
  879. assert_unbounded_slice_error(slice(None, None, -1))
  880. assert_unbounded_slice_error(slice(None, 10, -1))
  881. # These are "unbounded" because negative index will change depending on
  882. # container shape.
  883. assert_unbounded_slice_error(slice(-1, None))
  884. assert_unbounded_slice_error(slice(None, -1))
  885. assert_unbounded_slice_error(slice(-1, -1))
  886. assert_unbounded_slice_error(slice(-1, None, -1))
  887. assert_unbounded_slice_error(slice(None, -1, -1))
  888. assert_unbounded_slice_error(slice(-1, -1, -1))
  889. def test_not_slice_like_slices(self):
  890. def assert_not_slice_like(slc):
  891. self.assertTrue(not BlockPlacement(slc).is_slice_like)
  892. assert_not_slice_like(slice(0, 0))
  893. assert_not_slice_like(slice(100, 0))
  894. assert_not_slice_like(slice(100, 100, -1))
  895. assert_not_slice_like(slice(0, 100, -1))
  896. self.assertTrue(not BlockPlacement(slice(0, 0)).is_slice_like)
  897. self.assertTrue(not BlockPlacement(slice(100, 100)).is_slice_like)
  898. def test_array_to_slice_conversion(self):
  899. def assert_as_slice_equals(arr, slc):
  900. self.assertEqual(BlockPlacement(arr).as_slice, slc)
  901. assert_as_slice_equals([0], slice(0, 1, 1))
  902. assert_as_slice_equals([100], slice(100, 101, 1))
  903. assert_as_slice_equals([0, 1, 2], slice(0, 3, 1))
  904. assert_as_slice_equals([0, 5, 10], slice(0, 15, 5))
  905. assert_as_slice_equals([0, 100], slice(0, 200, 100))
  906. assert_as_slice_equals([2, 1], slice(2, 0, -1))
  907. assert_as_slice_equals([2, 1, 0], slice(2, None, -1))
  908. assert_as_slice_equals([100, 0], slice(100, None, -100))
  909. def test_not_slice_like_arrays(self):
  910. def assert_not_slice_like(arr):
  911. self.assertTrue(not BlockPlacement(arr).is_slice_like)
  912. assert_not_slice_like([])
  913. assert_not_slice_like([-1])
  914. assert_not_slice_like([-1, -2, -3])
  915. assert_not_slice_like([-10])
  916. assert_not_slice_like([-1])
  917. assert_not_slice_like([-1, 0, 1, 2])
  918. assert_not_slice_like([-2, 0, 2, 4])
  919. assert_not_slice_like([1, 0, -1])
  920. assert_not_slice_like([1, 1, 1])
  921. def test_slice_iter(self):
  922. self.assertEqual(list(BlockPlacement(slice(0, 3))), [0, 1, 2])
  923. self.assertEqual(list(BlockPlacement(slice(0, 0))), [])
  924. self.assertEqual(list(BlockPlacement(slice(3, 0))), [])
  925. self.assertEqual(list(BlockPlacement(slice(3, 0, -1))), [3, 2, 1])
  926. self.assertEqual(list(BlockPlacement(slice(3, None, -1))),
  927. [3, 2, 1, 0])
  928. def test_slice_to_array_conversion(self):
  929. def assert_as_array_equals(slc, asarray):
  930. tm.assert_numpy_array_equal(
  931. BlockPlacement(slc).as_array,
  932. np.asarray(asarray, dtype=np.int64))
  933. assert_as_array_equals(slice(0, 3), [0, 1, 2])
  934. assert_as_array_equals(slice(0, 0), [])
  935. assert_as_array_equals(slice(3, 0), [])
  936. assert_as_array_equals(slice(3, 0, -1), [3, 2, 1])
  937. assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0])
  938. assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1])
  939. def test_blockplacement_add(self):
  940. bpl = BlockPlacement(slice(0, 5))
  941. self.assertEqual(bpl.add(1).as_slice, slice(1, 6, 1))
  942. self.assertEqual(bpl.add(np.arange(5)).as_slice, slice(0, 10, 2))
  943. self.assertEqual(list(bpl.add(np.arange(5, 0, -1))), [5, 5, 5, 5, 5])
  944. def test_blockplacement_add_int(self):
  945. def assert_add_equals(val, inc, result):
  946. self.assertEqual(list(BlockPlacement(val).add(inc)), result)
  947. assert_add_equals(slice(0, 0), 0, [])
  948. assert_add_equals(slice(1, 4), 0, [1, 2, 3])
  949. assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1])
  950. assert_add_equals(slice(2, None, -1), 0, [2, 1, 0])
  951. assert_add_equals([1, 2, 4], 0, [1, 2, 4])
  952. assert_add_equals(slice(0, 0), 10, [])
  953. assert_add_equals(slice(1, 4), 10, [11, 12, 13])
  954. assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11])
  955. assert_add_equals(slice(2, None, -1), 10, [12, 11, 10])
  956. assert_add_equals([1, 2, 4], 10, [11, 12, 14])
  957. assert_add_equals(slice(0, 0), -1, [])
  958. assert_add_equals(slice(1, 4), -1, [0, 1, 2])
  959. assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0])
  960. assert_add_equals([1, 2, 4], -1, [0, 1, 3])
  961. self.assertRaises(ValueError,
  962. lambda: BlockPlacement(slice(1, 4)).add(-10))
  963. self.assertRaises(ValueError,
  964. lambda: BlockPlacement([1, 2, 4]).add(-10))
  965. self.assertRaises(ValueError,
  966. lambda: BlockPlacement(slice(2, None, -1)).add(-1))
  967. if __name__ == '__main__':
  968. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  969. exit=False)