PageRenderTime 72ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/tests/test_index.py

http://github.com/pydata/pandas
Python | 2832 lines | 2055 code | 570 blank | 207 comment | 85 complexity | 9fdefc21048410bd9feaa3634b3c1bf8 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # pylint: disable=E1101,E1103,W0232
  2. from datetime import datetime, timedelta
  3. from pandas.compat import range, lrange, lzip, u, zip
  4. import operator
  5. import pickle
  6. import re
  7. import nose
  8. import warnings
  9. import os
  10. import numpy as np
  11. from numpy.testing import assert_array_equal
  12. from pandas.core.index import (Index, Float64Index, Int64Index, MultiIndex,
  13. InvalidIndexError)
  14. from pandas.tseries.index import DatetimeIndex
  15. from pandas.core.series import Series
  16. from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
  17. assert_copy)
  18. from pandas import compat
  19. from pandas.compat import long
  20. import pandas.util.testing as tm
  21. import pandas.core.config as cf
  22. from pandas.tseries.index import _to_m8
  23. import pandas.tseries.offsets as offsets
  24. import pandas as pd
  25. from pandas.lib import Timestamp
  26. from pandas import _np_version_under1p7
  27. class TestIndex(tm.TestCase):
  28. _multiprocess_can_split_ = True
  29. def setUp(self):
  30. self.indices = dict(
  31. unicodeIndex = tm.makeUnicodeIndex(100),
  32. strIndex = tm.makeStringIndex(100),
  33. dateIndex = tm.makeDateIndex(100),
  34. intIndex = tm.makeIntIndex(100),
  35. floatIndex = tm.makeFloatIndex(100),
  36. empty = Index([]),
  37. tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'],
  38. [1, 2, 3]))
  39. )
  40. for name, ind in self.indices.items():
  41. setattr(self, name, ind)
  42. def test_wrong_number_names(self):
  43. def testit(ind):
  44. ind.names = ["apple", "banana", "carrot"]
  45. for ind in self.indices.values():
  46. assertRaisesRegexp(ValueError, "^Length", testit, ind)
  47. def test_set_name_methods(self):
  48. new_name = "This is the new name for this index"
  49. indices = (self.dateIndex, self.intIndex, self.unicodeIndex,
  50. self.empty)
  51. for ind in indices:
  52. original_name = ind.name
  53. new_ind = ind.set_names([new_name])
  54. self.assertEqual(new_ind.name, new_name)
  55. self.assertEqual(ind.name, original_name)
  56. res = ind.rename(new_name, inplace=True)
  57. # should return None
  58. self.assertIsNone(res)
  59. self.assertEqual(ind.name, new_name)
  60. self.assertEqual(ind.names, [new_name])
  61. with assertRaisesRegexp(TypeError, "list-like"):
  62. # should still fail even if it would be the right length
  63. ind.set_names("a")
  64. # rename in place just leaves tuples and other containers alone
  65. name = ('A', 'B')
  66. ind = self.intIndex
  67. ind.rename(name, inplace=True)
  68. self.assertEqual(ind.name, name)
  69. self.assertEqual(ind.names, [name])
  70. def test_hash_error(self):
  71. with tm.assertRaisesRegexp(TypeError,
  72. "unhashable type: %r" %
  73. type(self.strIndex).__name__):
  74. hash(self.strIndex)
  75. def test_new_axis(self):
  76. new_index = self.dateIndex[None, :]
  77. self.assertEqual(new_index.ndim, 2)
  78. tm.assert_isinstance(new_index, np.ndarray)
  79. def test_copy_and_deepcopy(self):
  80. from copy import copy, deepcopy
  81. for func in (copy, deepcopy):
  82. idx_copy = func(self.strIndex)
  83. self.assertIsNot(idx_copy, self.strIndex)
  84. self.assertTrue(idx_copy.equals(self.strIndex))
  85. new_copy = self.strIndex.copy(deep=True, name="banana")
  86. self.assertEqual(new_copy.name, "banana")
  87. new_copy2 = self.intIndex.copy(dtype=int)
  88. self.assertEqual(new_copy2.dtype.kind, 'i')
  89. def test_duplicates(self):
  90. idx = Index([0, 0, 0])
  91. self.assertFalse(idx.is_unique)
  92. def test_sort(self):
  93. self.assertRaises(TypeError, self.strIndex.sort)
  94. def test_mutability(self):
  95. self.assertRaises(TypeError, self.strIndex.__setitem__, 0, 'foo')
  96. def test_constructor(self):
  97. # regular instance creation
  98. tm.assert_contains_all(self.strIndex, self.strIndex)
  99. tm.assert_contains_all(self.dateIndex, self.dateIndex)
  100. # casting
  101. arr = np.array(self.strIndex)
  102. index = arr.view(Index)
  103. tm.assert_contains_all(arr, index)
  104. self.assert_numpy_array_equal(self.strIndex, index)
  105. # copy
  106. arr = np.array(self.strIndex)
  107. index = Index(arr, copy=True, name='name')
  108. tm.assert_isinstance(index, Index)
  109. self.assertEqual(index.name, 'name')
  110. assert_array_equal(arr, index)
  111. arr[0] = "SOMEBIGLONGSTRING"
  112. self.assertNotEqual(index[0], "SOMEBIGLONGSTRING")
  113. # what to do here?
  114. # arr = np.array(5.)
  115. # self.assertRaises(Exception, arr.view, Index)
  116. def test_constructor_corner(self):
  117. # corner case
  118. self.assertRaises(TypeError, Index, 0)
  119. def test_constructor_from_series(self):
  120. expected = DatetimeIndex([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')])
  121. s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')])
  122. result = Index(s)
  123. self.assertTrue(result.equals(expected))
  124. result = DatetimeIndex(s)
  125. self.assertTrue(result.equals(expected))
  126. # GH 6273
  127. # create from a series, passing a freq
  128. s = Series(pd.to_datetime(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']))
  129. result = DatetimeIndex(s, freq='MS')
  130. expected = DatetimeIndex(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'],freq='MS')
  131. self.assertTrue(result.equals(expected))
  132. df = pd.DataFrame(np.random.rand(5,3))
  133. df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
  134. result = DatetimeIndex(df['date'], freq='MS')
  135. # GH 6274
  136. # infer freq of same
  137. result = pd.infer_freq(df['date'])
  138. self.assertEqual(result,'MS')
  139. def test_constructor_ndarray_like(self):
  140. # GH 5460#issuecomment-44474502
  141. # it should be possible to convert any object that satisfies the numpy
  142. # ndarray interface directly into an Index
  143. class ArrayLike(object):
  144. def __init__(self, array):
  145. self.array = array
  146. def __array__(self, dtype=None):
  147. return self.array
  148. for array in [np.arange(5),
  149. np.array(['a', 'b', 'c']),
  150. pd.date_range('2000-01-01', periods=3).values]:
  151. expected = pd.Index(array)
  152. result = pd.Index(ArrayLike(array))
  153. self.assertTrue(result.equals(expected))
  154. def test_index_ctor_infer_periodindex(self):
  155. from pandas import period_range, PeriodIndex
  156. xp = period_range('2012-1-1', freq='M', periods=3)
  157. rs = Index(xp)
  158. assert_array_equal(rs, xp)
  159. tm.assert_isinstance(rs, PeriodIndex)
  160. def test_constructor_simple_new(self):
  161. idx = Index([1, 2, 3, 4, 5], name='int')
  162. result = idx._simple_new(idx, 'int')
  163. self.assertTrue(result.equals(idx))
  164. idx = Index([1.1, np.nan, 2.2, 3.0], name='float')
  165. result = idx._simple_new(idx, 'float')
  166. self.assertTrue(result.equals(idx))
  167. idx = Index(['A', 'B', 'C', np.nan], name='obj')
  168. result = idx._simple_new(idx, 'obj')
  169. self.assertTrue(result.equals(idx))
  170. def test_copy(self):
  171. i = Index([], name='Foo')
  172. i_copy = i.copy()
  173. self.assertEqual(i_copy.name, 'Foo')
  174. def test_view(self):
  175. i = Index([], name='Foo')
  176. i_view = i.view()
  177. self.assertEqual(i_view.name, 'Foo')
  178. def test_astype(self):
  179. casted = self.intIndex.astype('i8')
  180. # it works!
  181. casted.get_loc(5)
  182. # pass on name
  183. self.intIndex.name = 'foobar'
  184. casted = self.intIndex.astype('i8')
  185. self.assertEqual(casted.name, 'foobar')
  186. def test_compat(self):
  187. self.strIndex.tolist()
  188. def test_equals(self):
  189. # same
  190. self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c'])))
  191. # different length
  192. self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b'])))
  193. # same length, different values
  194. self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd'])))
  195. # Must also be an Index
  196. self.assertFalse(Index(['a', 'b', 'c']).equals(['a', 'b', 'c']))
  197. def test_insert(self):
  198. # GH 7256
  199. # validate neg/pos inserts
  200. result = Index(['b', 'c', 'd'])
  201. #test 0th element
  202. self.assertTrue(Index(['a', 'b', 'c', 'd']).equals(
  203. result.insert(0, 'a')))
  204. #test Nth element that follows Python list behavior
  205. self.assertTrue(Index(['b', 'c', 'e', 'd']).equals(
  206. result.insert(-1, 'e')))
  207. #test loc +/- neq (0, -1)
  208. self.assertTrue(result.insert(1, 'z').equals(
  209. result.insert(-2, 'z')))
  210. #test empty
  211. null_index = Index([])
  212. self.assertTrue(Index(['a']).equals(
  213. null_index.insert(0, 'a')))
  214. def test_delete(self):
  215. idx = Index(['a', 'b', 'c', 'd'], name='idx')
  216. expected = Index(['b', 'c', 'd'], name='idx')
  217. result = idx.delete(0)
  218. self.assertTrue(result.equals(expected))
  219. self.assertEqual(result.name, expected.name)
  220. expected = Index(['a', 'b', 'c'], name='idx')
  221. result = idx.delete(-1)
  222. self.assertTrue(result.equals(expected))
  223. self.assertEqual(result.name, expected.name)
  224. with tm.assertRaises((IndexError, ValueError)):
  225. # either depeidnig on numpy version
  226. result = idx.delete(5)
  227. def test_identical(self):
  228. # index
  229. i1 = Index(['a', 'b', 'c'])
  230. i2 = Index(['a', 'b', 'c'])
  231. self.assertTrue(i1.identical(i2))
  232. i1 = i1.rename('foo')
  233. self.assertTrue(i1.equals(i2))
  234. self.assertFalse(i1.identical(i2))
  235. i2 = i2.rename('foo')
  236. self.assertTrue(i1.identical(i2))
  237. i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')])
  238. i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False)
  239. self.assertFalse(i3.identical(i4))
  240. def test_is_(self):
  241. ind = Index(range(10))
  242. self.assertTrue(ind.is_(ind))
  243. self.assertTrue(ind.is_(ind.view().view().view().view()))
  244. self.assertFalse(ind.is_(Index(range(10))))
  245. self.assertFalse(ind.is_(ind.copy()))
  246. self.assertFalse(ind.is_(ind.copy(deep=False)))
  247. self.assertFalse(ind.is_(ind[:]))
  248. self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index)))
  249. self.assertFalse(ind.is_(np.array(range(10))))
  250. # quasi-implementation dependent
  251. self.assertTrue(ind.is_(ind.view().base))
  252. ind2 = ind.view()
  253. ind2.name = 'bob'
  254. self.assertTrue(ind.is_(ind2))
  255. self.assertTrue(ind2.is_(ind))
  256. # doesn't matter if Indices are *actually* views of underlying data,
  257. self.assertFalse(ind.is_(Index(ind.values)))
  258. arr = np.array(range(1, 11))
  259. ind1 = Index(arr, copy=False)
  260. ind2 = Index(arr, copy=False)
  261. self.assertFalse(ind1.is_(ind2))
  262. def test_asof(self):
  263. d = self.dateIndex[0]
  264. self.assertIs(self.dateIndex.asof(d), d)
  265. self.assertTrue(np.isnan(self.dateIndex.asof(d - timedelta(1))))
  266. d = self.dateIndex[-1]
  267. self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d)
  268. d = self.dateIndex[0].to_datetime()
  269. tm.assert_isinstance(self.dateIndex.asof(d), Timestamp)
  270. def test_nanosecond_index_access(self):
  271. tm._skip_if_not_numpy17_friendly()
  272. s = Series([Timestamp('20130101')]).values.view('i8')[0]
  273. r = DatetimeIndex([s + 50 + i for i in range(100)])
  274. x = Series(np.random.randn(100), index=r)
  275. first_value = x.asof(x.index[0])
  276. # this does not yet work, as parsing strings is done via dateutil
  277. #self.assertEqual(first_value, x['2013-01-01 00:00:00.000000050+0000'])
  278. self.assertEqual(first_value, x[Timestamp(np.datetime64('2013-01-01 00:00:00.000000050+0000', 'ns'))])
  279. def test_argsort(self):
  280. result = self.strIndex.argsort()
  281. expected = np.array(self.strIndex).argsort()
  282. self.assert_numpy_array_equal(result, expected)
  283. def test_comparators(self):
  284. index = self.dateIndex
  285. element = index[len(index) // 2]
  286. element = _to_m8(element)
  287. arr = np.array(index)
  288. def _check(op):
  289. arr_result = op(arr, element)
  290. index_result = op(index, element)
  291. tm.assert_isinstance(index_result, np.ndarray)
  292. self.assertNotIsInstance(index_result, Index)
  293. self.assert_numpy_array_equal(arr_result, index_result)
  294. _check(operator.eq)
  295. _check(operator.ne)
  296. _check(operator.gt)
  297. _check(operator.lt)
  298. _check(operator.ge)
  299. _check(operator.le)
  300. def test_booleanindex(self):
  301. boolIdx = np.repeat(True, len(self.strIndex)).astype(bool)
  302. boolIdx[5:30:2] = False
  303. subIndex = self.strIndex[boolIdx]
  304. for i, val in enumerate(subIndex):
  305. self.assertEqual(subIndex.get_loc(val), i)
  306. subIndex = self.strIndex[list(boolIdx)]
  307. for i, val in enumerate(subIndex):
  308. self.assertEqual(subIndex.get_loc(val), i)
  309. def test_fancy(self):
  310. sl = self.strIndex[[1, 2, 3]]
  311. for i in sl:
  312. self.assertEqual(i, sl[sl.get_loc(i)])
  313. def test_empty_fancy(self):
  314. empty_farr = np.array([], dtype=np.float_)
  315. empty_iarr = np.array([], dtype=np.int_)
  316. empty_barr = np.array([], dtype=np.bool_)
  317. # pd.DatetimeIndex is excluded, because it overrides getitem and should
  318. # be tested separately.
  319. for idx in [self.strIndex, self.intIndex, self.floatIndex]:
  320. empty_idx = idx.__class__([])
  321. values = idx.values
  322. self.assertTrue(idx[[]].identical(empty_idx))
  323. self.assertTrue(idx[empty_iarr].identical(empty_idx))
  324. self.assertTrue(idx[empty_barr].identical(empty_idx))
  325. # np.ndarray only accepts ndarray of int & bool dtypes, so should
  326. # Index.
  327. self.assertRaises(IndexError, idx.__getitem__, empty_farr)
  328. def test_getitem(self):
  329. arr = np.array(self.dateIndex)
  330. exp = self.dateIndex[5]
  331. exp = _to_m8(exp)
  332. self.assertEqual(exp, arr[5])
  333. def test_shift(self):
  334. shifted = self.dateIndex.shift(0, timedelta(1))
  335. self.assertIs(shifted, self.dateIndex)
  336. shifted = self.dateIndex.shift(5, timedelta(1))
  337. self.assert_numpy_array_equal(shifted, self.dateIndex + timedelta(5))
  338. shifted = self.dateIndex.shift(1, 'B')
  339. self.assert_numpy_array_equal(shifted, self.dateIndex + offsets.BDay())
  340. shifted.name = 'shifted'
  341. self.assertEqual(shifted.name, shifted.shift(1, 'D').name)
  342. def test_intersection(self):
  343. first = self.strIndex[:20]
  344. second = self.strIndex[:10]
  345. intersect = first.intersection(second)
  346. self.assertTrue(tm.equalContents(intersect, second))
  347. # Corner cases
  348. inter = first.intersection(first)
  349. self.assertIs(inter, first)
  350. # non-iterable input
  351. assertRaisesRegexp(TypeError, "iterable", first.intersection, 0.5)
  352. idx1 = Index([1, 2, 3, 4, 5], name='idx')
  353. # if target has the same name, it is preserved
  354. idx2 = Index([3, 4, 5, 6, 7], name='idx')
  355. expected2 = Index([3, 4, 5], name='idx')
  356. result2 = idx1.intersection(idx2)
  357. self.assertTrue(result2.equals(expected2))
  358. self.assertEqual(result2.name, expected2.name)
  359. # if target name is different, it will be reset
  360. idx3 = Index([3, 4, 5, 6, 7], name='other')
  361. expected3 = Index([3, 4, 5], name=None)
  362. result3 = idx1.intersection(idx3)
  363. self.assertTrue(result3.equals(expected3))
  364. self.assertEqual(result3.name, expected3.name)
  365. # non monotonic
  366. idx1 = Index([5, 3, 2, 4, 1], name='idx')
  367. idx2 = Index([4, 7, 6, 5, 3], name='idx')
  368. result2 = idx1.intersection(idx2)
  369. self.assertTrue(tm.equalContents(result2, expected2))
  370. self.assertEqual(result2.name, expected2.name)
  371. idx3 = Index([4, 7, 6, 5, 3], name='other')
  372. result3 = idx1.intersection(idx3)
  373. self.assertTrue(tm.equalContents(result3, expected3))
  374. self.assertEqual(result3.name, expected3.name)
  375. def test_union(self):
  376. first = self.strIndex[5:20]
  377. second = self.strIndex[:10]
  378. everything = self.strIndex[:20]
  379. union = first.union(second)
  380. self.assertTrue(tm.equalContents(union, everything))
  381. # Corner cases
  382. union = first.union(first)
  383. self.assertIs(union, first)
  384. union = first.union([])
  385. self.assertIs(union, first)
  386. union = Index([]).union(first)
  387. self.assertIs(union, first)
  388. # non-iterable input
  389. assertRaisesRegexp(TypeError, "iterable", first.union, 0.5)
  390. # preserve names
  391. first.name = 'A'
  392. second.name = 'A'
  393. union = first.union(second)
  394. self.assertEqual(union.name, 'A')
  395. second.name = 'B'
  396. union = first.union(second)
  397. self.assertIsNone(union.name)
  398. def test_add(self):
  399. firstCat = self.strIndex + self.dateIndex
  400. secondCat = self.strIndex + self.strIndex
  401. if self.dateIndex.dtype == np.object_:
  402. appended = np.append(self.strIndex, self.dateIndex)
  403. else:
  404. appended = np.append(self.strIndex, self.dateIndex.astype('O'))
  405. self.assertTrue(tm.equalContents(firstCat, appended))
  406. self.assertTrue(tm.equalContents(secondCat, self.strIndex))
  407. tm.assert_contains_all(self.strIndex, firstCat)
  408. tm.assert_contains_all(self.strIndex, secondCat)
  409. tm.assert_contains_all(self.dateIndex, firstCat)
  410. def test_append_multiple(self):
  411. index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
  412. foos = [index[:2], index[2:4], index[4:]]
  413. result = foos[0].append(foos[1:])
  414. self.assertTrue(result.equals(index))
  415. # empty
  416. result = index.append([])
  417. self.assertTrue(result.equals(index))
  418. def test_append_empty_preserve_name(self):
  419. left = Index([], name='foo')
  420. right = Index([1, 2, 3], name='foo')
  421. result = left.append(right)
  422. self.assertEqual(result.name, 'foo')
  423. left = Index([], name='foo')
  424. right = Index([1, 2, 3], name='bar')
  425. result = left.append(right)
  426. self.assertIsNone(result.name)
  427. def test_add_string(self):
  428. # from bug report
  429. index = Index(['a', 'b', 'c'])
  430. index2 = index + 'foo'
  431. self.assertNotIn('a', index2)
  432. self.assertIn('afoo', index2)
  433. def test_iadd_string(self):
  434. index = pd.Index(['a', 'b', 'c'])
  435. # doesn't fail test unless there is a check before `+=`
  436. self.assertIn('a', index)
  437. index += '_x'
  438. self.assertIn('a_x', index)
  439. def test_diff(self):
  440. first = self.strIndex[5:20]
  441. second = self.strIndex[:10]
  442. answer = self.strIndex[10:20]
  443. first.name = 'name'
  444. # different names
  445. result = first - second
  446. self.assertTrue(tm.equalContents(result, answer))
  447. self.assertEqual(result.name, None)
  448. # same names
  449. second.name = 'name'
  450. result = first - second
  451. self.assertEqual(result.name, 'name')
  452. # with empty
  453. result = first.diff([])
  454. self.assertTrue(tm.equalContents(result, first))
  455. self.assertEqual(result.name, first.name)
  456. # with everythin
  457. result = first.diff(first)
  458. self.assertEqual(len(result), 0)
  459. self.assertEqual(result.name, first.name)
  460. # non-iterable input
  461. assertRaisesRegexp(TypeError, "iterable", first.diff, 0.5)
  462. def test_symmetric_diff(self):
  463. # smoke
  464. idx1 = Index([1, 2, 3, 4], name='idx1')
  465. idx2 = Index([2, 3, 4, 5])
  466. result = idx1.sym_diff(idx2)
  467. expected = Index([1, 5])
  468. self.assertTrue(tm.equalContents(result, expected))
  469. self.assertIsNone(result.name)
  470. # __xor__ syntax
  471. expected = idx1 ^ idx2
  472. self.assertTrue(tm.equalContents(result, expected))
  473. self.assertIsNone(result.name)
  474. # multiIndex
  475. idx1 = MultiIndex.from_tuples(self.tuples)
  476. idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)])
  477. result = idx1.sym_diff(idx2)
  478. expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)])
  479. self.assertTrue(tm.equalContents(result, expected))
  480. # nans:
  481. # GH #6444, sorting of nans. Make sure the number of nans is right
  482. # and the correct non-nan values are there. punt on sorting.
  483. idx1 = Index([1, 2, 3, np.nan])
  484. idx2 = Index([0, 1, np.nan])
  485. result = idx1.sym_diff(idx2)
  486. # expected = Index([0.0, np.nan, 2.0, 3.0, np.nan])
  487. nans = pd.isnull(result)
  488. self.assertEqual(nans.sum(), 2)
  489. self.assertEqual((~nans).sum(), 3)
  490. [self.assertIn(x, result) for x in [0.0, 2.0, 3.0]]
  491. # other not an Index:
  492. idx1 = Index([1, 2, 3, 4], name='idx1')
  493. idx2 = np.array([2, 3, 4, 5])
  494. expected = Index([1, 5])
  495. result = idx1.sym_diff(idx2)
  496. self.assertTrue(tm.equalContents(result, expected))
  497. self.assertEqual(result.name, 'idx1')
  498. result = idx1.sym_diff(idx2, result_name='new_name')
  499. self.assertTrue(tm.equalContents(result, expected))
  500. self.assertEqual(result.name, 'new_name')
  501. # other isn't iterable
  502. with tm.assertRaises(TypeError):
  503. idx1 - 1
  504. def test_pickle(self):
  505. def testit(index):
  506. pickled = pickle.dumps(index)
  507. unpickled = pickle.loads(pickled)
  508. tm.assert_isinstance(unpickled, Index)
  509. self.assert_numpy_array_equal(unpickled, index)
  510. self.assertEqual(unpickled.name, index.name)
  511. # tm.assert_dict_equal(unpickled.indexMap, index.indexMap)
  512. testit(self.strIndex)
  513. self.strIndex.name = 'foo'
  514. testit(self.strIndex)
  515. testit(self.dateIndex)
  516. def test_is_numeric(self):
  517. self.assertFalse(self.dateIndex.is_numeric())
  518. self.assertFalse(self.strIndex.is_numeric())
  519. self.assertTrue(self.intIndex.is_numeric())
  520. self.assertTrue(self.floatIndex.is_numeric())
  521. def test_is_all_dates(self):
  522. self.assertTrue(self.dateIndex.is_all_dates)
  523. self.assertFalse(self.strIndex.is_all_dates)
  524. self.assertFalse(self.intIndex.is_all_dates)
  525. def test_summary(self):
  526. self._check_method_works(Index.summary)
  527. # GH3869
  528. ind = Index(['{other}%s', "~:{range}:0"], name='A')
  529. result = ind.summary()
  530. # shouldn't be formatted accidentally.
  531. self.assertIn('~:{range}:0', result)
  532. self.assertIn('{other}%s', result)
  533. def test_format(self):
  534. self._check_method_works(Index.format)
  535. index = Index([datetime.now()])
  536. formatted = index.format()
  537. expected = [str(index[0])]
  538. self.assertEqual(formatted, expected)
  539. # 2845
  540. index = Index([1, 2.0+3.0j, np.nan])
  541. formatted = index.format()
  542. expected = [str(index[0]), str(index[1]), u('NaN')]
  543. self.assertEqual(formatted, expected)
  544. # is this really allowed?
  545. index = Index([1, 2.0+3.0j, None])
  546. formatted = index.format()
  547. expected = [str(index[0]), str(index[1]), u('NaN')]
  548. self.assertEqual(formatted, expected)
  549. self.strIndex[:0].format()
  550. def test_format_with_name_time_info(self):
  551. # bug I fixed 12/20/2011
  552. inc = timedelta(hours=4)
  553. dates = Index([dt + inc for dt in self.dateIndex], name='something')
  554. formatted = dates.format(name=True)
  555. self.assertEqual(formatted[0], 'something')
  556. def test_format_datetime_with_time(self):
  557. t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
  558. result = t.format()
  559. expected = ['2012-02-07 00:00:00', '2012-02-07 23:00:00']
  560. self.assertEqual(len(result), 2)
  561. self.assertEqual(result, expected)
  562. def test_format_none(self):
  563. values = ['a', 'b', 'c', None]
  564. idx = Index(values)
  565. idx.format()
  566. self.assertIsNone(idx[3])
  567. def test_take(self):
  568. indexer = [4, 3, 0, 2]
  569. result = self.dateIndex.take(indexer)
  570. expected = self.dateIndex[indexer]
  571. self.assertTrue(result.equals(expected))
  572. def _check_method_works(self, method):
  573. method(self.empty)
  574. method(self.dateIndex)
  575. method(self.unicodeIndex)
  576. method(self.strIndex)
  577. method(self.intIndex)
  578. method(self.tuples)
  579. def test_get_indexer(self):
  580. idx1 = Index([1, 2, 3, 4, 5])
  581. idx2 = Index([2, 4, 6])
  582. r1 = idx1.get_indexer(idx2)
  583. assert_almost_equal(r1, [1, 3, -1])
  584. r1 = idx2.get_indexer(idx1, method='pad')
  585. assert_almost_equal(r1, [-1, 0, 0, 1, 1])
  586. rffill1 = idx2.get_indexer(idx1, method='ffill')
  587. assert_almost_equal(r1, rffill1)
  588. r1 = idx2.get_indexer(idx1, method='backfill')
  589. assert_almost_equal(r1, [0, 0, 1, 1, 2])
  590. rbfill1 = idx2.get_indexer(idx1, method='bfill')
  591. assert_almost_equal(r1, rbfill1)
  592. def test_slice_locs(self):
  593. idx = Index([0, 1, 2, 5, 6, 7, 9, 10])
  594. n = len(idx)
  595. self.assertEqual(idx.slice_locs(start=2), (2, n))
  596. self.assertEqual(idx.slice_locs(start=3), (3, n))
  597. self.assertEqual(idx.slice_locs(3, 8), (3, 6))
  598. self.assertEqual(idx.slice_locs(5, 10), (3, n))
  599. self.assertEqual(idx.slice_locs(end=8), (0, 6))
  600. self.assertEqual(idx.slice_locs(end=9), (0, 7))
  601. idx2 = idx[::-1]
  602. self.assertRaises(KeyError, idx2.slice_locs, 8, 2)
  603. self.assertRaises(KeyError, idx2.slice_locs, 7, 3)
  604. def test_slice_locs_dup(self):
  605. idx = Index(['a', 'a', 'b', 'c', 'd', 'd'])
  606. rs = idx.slice_locs('a', 'd')
  607. self.assertEqual(rs, (0, 6))
  608. rs = idx.slice_locs(end='d')
  609. self.assertEqual(rs, (0, 6))
  610. rs = idx.slice_locs('a', 'c')
  611. self.assertEqual(rs, (0, 4))
  612. rs = idx.slice_locs('b', 'd')
  613. self.assertEqual(rs, (2, 6))
  614. def test_drop(self):
  615. n = len(self.strIndex)
  616. dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)])
  617. expected = self.strIndex[lrange(5) + lrange(10, n)]
  618. self.assertTrue(dropped.equals(expected))
  619. self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar'])
  620. dropped = self.strIndex.drop(self.strIndex[0])
  621. expected = self.strIndex[1:]
  622. self.assertTrue(dropped.equals(expected))
  623. ser = Index([1, 2, 3])
  624. dropped = ser.drop(1)
  625. expected = Index([2, 3])
  626. self.assertTrue(dropped.equals(expected))
  627. def test_tuple_union_bug(self):
  628. import pandas
  629. import numpy as np
  630. aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
  631. dtype=[('num', int), ('let', 'a1')])
  632. aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B'), (1, 'C'), (2,
  633. 'C')], dtype=[('num', int), ('let', 'a1')])
  634. idx1 = pandas.Index(aidx1)
  635. idx2 = pandas.Index(aidx2)
  636. # intersection broken?
  637. int_idx = idx1.intersection(idx2)
  638. # needs to be 1d like idx1 and idx2
  639. expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
  640. self.assertEqual(int_idx.ndim, 1)
  641. self.assertTrue(int_idx.equals(expected))
  642. # union broken
  643. union_idx = idx1.union(idx2)
  644. expected = idx2
  645. self.assertEqual(union_idx.ndim, 1)
  646. self.assertTrue(union_idx.equals(expected))
  647. def test_is_monotonic_incomparable(self):
  648. index = Index([5, datetime.now(), 7])
  649. self.assertFalse(index.is_monotonic)
  650. def test_get_set_value(self):
  651. values = np.random.randn(100)
  652. date = self.dateIndex[67]
  653. assert_almost_equal(self.dateIndex.get_value(values, date),
  654. values[67])
  655. self.dateIndex.set_value(values, date, 10)
  656. self.assertEqual(values[67], 10)
  657. def test_isin(self):
  658. values = ['foo', 'bar']
  659. idx = Index(['qux', 'baz', 'foo', 'bar'])
  660. result = idx.isin(values)
  661. expected = np.array([False, False, True, True])
  662. self.assert_numpy_array_equal(result, expected)
  663. # empty, return dtype bool
  664. idx = Index([])
  665. result = idx.isin(values)
  666. self.assertEqual(len(result), 0)
  667. self.assertEqual(result.dtype, np.bool_)
  668. def test_boolean_cmp(self):
  669. values = [1, 2, 3, 4]
  670. idx = Index(values)
  671. res = (idx == values)
  672. self.assertTrue(res.all())
  673. self.assertEqual(res.dtype, 'bool')
  674. self.assertNotIsInstance(res, Index)
  675. def test_get_level_values(self):
  676. result = self.strIndex.get_level_values(0)
  677. self.assertTrue(result.equals(self.strIndex))
  678. def test_slice_keep_name(self):
  679. idx = Index(['a', 'b'], name='asdf')
  680. self.assertEqual(idx.name, idx[1:].name)
  681. def test_join_self(self):
  682. # instance attributes of the form self.<name>Index
  683. indices = 'unicode', 'str', 'date', 'int', 'float'
  684. kinds = 'outer', 'inner', 'left', 'right'
  685. for index_kind in indices:
  686. res = getattr(self, '{0}Index'.format(index_kind))
  687. for kind in kinds:
  688. joined = res.join(res, how=kind)
  689. self.assertIs(res, joined)
  690. def test_indexing_doesnt_change_class(self):
  691. idx = Index([1, 2, 3, 'a', 'b', 'c'])
  692. self.assertTrue(idx[1:3].identical(
  693. pd.Index([2, 3], dtype=np.object_)))
  694. self.assertTrue(idx[[0,1]].identical(
  695. pd.Index([1, 2], dtype=np.object_)))
  696. def test_outer_join_sort(self):
  697. left_idx = Index(np.random.permutation(15))
  698. right_idx = tm.makeDateIndex(10)
  699. with tm.assert_produces_warning(RuntimeWarning):
  700. joined = left_idx.join(right_idx, how='outer')
  701. # right_idx in this case because DatetimeIndex has join precedence over
  702. # Int64Index
  703. expected = right_idx.astype(object).union(left_idx.astype(object))
  704. tm.assert_index_equal(joined, expected)
  705. def test_nan_first_take_datetime(self):
  706. idx = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')])
  707. res = idx.take([-1, 0, 1])
  708. exp = Index([idx[-1], idx[0], idx[1]])
  709. tm.assert_index_equal(res, exp)
  710. class TestFloat64Index(tm.TestCase):
  711. _multiprocess_can_split_ = True
  712. def setUp(self):
  713. self.mixed = Float64Index([1.5, 2, 3, 4, 5])
  714. self.float = Float64Index(np.arange(5) * 2.5)
  715. def test_hash_error(self):
  716. with tm.assertRaisesRegexp(TypeError,
  717. "unhashable type: %r" %
  718. type(self.float).__name__):
  719. hash(self.float)
  720. def test_repr_roundtrip(self):
  721. for ind in (self.mixed, self.float):
  722. tm.assert_index_equal(eval(repr(ind)), ind)
  723. def check_is_index(self, i):
  724. self.assertIsInstance(i, Index)
  725. self.assertNotIsInstance(i, Float64Index)
  726. def check_coerce(self, a, b, is_float_index=True):
  727. self.assertTrue(a.equals(b))
  728. if is_float_index:
  729. self.assertIsInstance(b, Float64Index)
  730. else:
  731. self.check_is_index(b)
  732. def test_constructor(self):
  733. # explicit construction
  734. index = Float64Index([1,2,3,4,5])
  735. self.assertIsInstance(index, Float64Index)
  736. self.assertTrue((index.values == np.array([1,2,3,4,5],dtype='float64')).all())
  737. index = Float64Index(np.array([1,2,3,4,5]))
  738. self.assertIsInstance(index, Float64Index)
  739. index = Float64Index([1.,2,3,4,5])
  740. self.assertIsInstance(index, Float64Index)
  741. index = Float64Index(np.array([1.,2,3,4,5]))
  742. self.assertIsInstance(index, Float64Index)
  743. self.assertEqual(index.dtype, float)
  744. index = Float64Index(np.array([1.,2,3,4,5]),dtype=np.float32)
  745. self.assertIsInstance(index, Float64Index)
  746. self.assertEqual(index.dtype, np.float64)
  747. index = Float64Index(np.array([1,2,3,4,5]),dtype=np.float32)
  748. self.assertIsInstance(index, Float64Index)
  749. self.assertEqual(index.dtype, np.float64)
  750. # nan handling
  751. result = Float64Index([np.nan, np.nan])
  752. self.assertTrue(pd.isnull(result.values).all())
  753. result = Float64Index(np.array([np.nan]))
  754. self.assertTrue(pd.isnull(result.values).all())
  755. result = Index(np.array([np.nan]))
  756. self.assertTrue(pd.isnull(result.values).all())
  757. def test_constructor_invalid(self):
  758. # invalid
  759. self.assertRaises(TypeError, Float64Index, 0.)
  760. self.assertRaises(TypeError, Float64Index, ['a','b',0.])
  761. self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')])
  762. def test_constructor_coerce(self):
  763. self.check_coerce(self.mixed,Index([1.5, 2, 3, 4, 5]))
  764. self.check_coerce(self.float,Index(np.arange(5) * 2.5))
  765. self.check_coerce(self.float,Index(np.array(np.arange(5) * 2.5, dtype=object)))
  766. def test_constructor_explicit(self):
  767. # these don't auto convert
  768. self.check_coerce(self.float,Index((np.arange(5) * 2.5), dtype=object),
  769. is_float_index=False)
  770. self.check_coerce(self.mixed,Index([1.5, 2, 3, 4, 5],dtype=object),
  771. is_float_index=False)
  772. def test_astype(self):
  773. result = self.float.astype(object)
  774. self.assertTrue(result.equals(self.float))
  775. self.assertTrue(self.float.equals(result))
  776. self.check_is_index(result)
  777. i = self.mixed.copy()
  778. i.name = 'foo'
  779. result = i.astype(object)
  780. self.assertTrue(result.equals(i))
  781. self.assertTrue(i.equals(result))
  782. self.check_is_index(result)
  783. def test_equals(self):
  784. i = Float64Index([1.0,2.0])
  785. self.assertTrue(i.equals(i))
  786. self.assertTrue(i.identical(i))
  787. i2 = Float64Index([1.0,2.0])
  788. self.assertTrue(i.equals(i2))
  789. i = Float64Index([1.0,np.nan])
  790. self.assertTrue(i.equals(i))
  791. self.assertTrue(i.identical(i))
  792. i2 = Float64Index([1.0,np.nan])
  793. self.assertTrue(i.equals(i2))
  794. def test_contains_nans(self):
  795. i = Float64Index([1.0, 2.0, np.nan])
  796. self.assertTrue(np.nan in i)
  797. def test_contains_not_nans(self):
  798. i = Float64Index([1.0, 2.0, np.nan])
  799. self.assertTrue(1.0 in i)
  800. def test_doesnt_contain_all_the_things(self):
  801. i = Float64Index([np.nan])
  802. self.assertFalse(i.isin([0]).item())
  803. self.assertFalse(i.isin([1]).item())
  804. self.assertTrue(i.isin([np.nan]).item())
  805. def test_nan_multiple_containment(self):
  806. i = Float64Index([1.0, np.nan])
  807. np.testing.assert_array_equal(i.isin([1.0]), np.array([True, False]))
  808. np.testing.assert_array_equal(i.isin([2.0, np.pi]),
  809. np.array([False, False]))
  810. np.testing.assert_array_equal(i.isin([np.nan]),
  811. np.array([False, True]))
  812. np.testing.assert_array_equal(i.isin([1.0, np.nan]),
  813. np.array([True, True]))
  814. i = Float64Index([1.0, 2.0])
  815. np.testing.assert_array_equal(i.isin([np.nan]),
  816. np.array([False, False]))
  817. def test_astype_from_object(self):
  818. index = Index([1.0, np.nan, 0.2], dtype='object')
  819. result = index.astype(float)
  820. expected = Float64Index([1.0, np.nan, 0.2])
  821. tm.assert_equal(result.dtype, expected.dtype)
  822. tm.assert_index_equal(result, expected)
  823. class TestInt64Index(tm.TestCase):
  824. _multiprocess_can_split_ = True
  825. def setUp(self):
  826. self.index = Int64Index(np.arange(0, 20, 2))
  827. def test_too_many_names(self):
  828. def testit():
  829. self.index.names = ["roger", "harold"]
  830. assertRaisesRegexp(ValueError, "^Length", testit)
  831. def test_constructor(self):
  832. # pass list, coerce fine
  833. index = Int64Index([-5, 0, 1, 2])
  834. expected = np.array([-5, 0, 1, 2], dtype=np.int64)
  835. self.assert_numpy_array_equal(index, expected)
  836. # from iterable
  837. index = Int64Index(iter([-5, 0, 1, 2]))
  838. self.assert_numpy_array_equal(index, expected)
  839. # scalar raise Exception
  840. self.assertRaises(TypeError, Int64Index, 5)
  841. # copy
  842. arr = self.index.values
  843. new_index = Int64Index(arr, copy=True)
  844. self.assert_numpy_array_equal(new_index, self.index)
  845. val = arr[0] + 3000
  846. # this should not change index
  847. arr[0] = val
  848. self.assertNotEqual(new_index[0], val)
  849. def test_constructor_corner(self):
  850. arr = np.array([1, 2, 3, 4], dtype=object)
  851. index = Int64Index(arr)
  852. self.assertEqual(index.values.dtype, np.int64)
  853. self.assertTrue(index.equals(arr))
  854. # preventing casting
  855. arr = np.array([1, '2', 3, '4'], dtype=object)
  856. with tm.assertRaisesRegexp(TypeError, 'casting'):
  857. Int64Index(arr)
  858. arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1]
  859. with tm.assertRaisesRegexp(TypeError, 'casting'):
  860. Int64Index(arr_with_floats)
  861. def test_hash_error(self):
  862. with tm.assertRaisesRegexp(TypeError,
  863. "unhashable type: %r" %
  864. type(self.index).__name__):
  865. hash(self.index)
  866. def test_copy(self):
  867. i = Int64Index([], name='Foo')
  868. i_copy = i.copy()
  869. self.assertEqual(i_copy.name, 'Foo')
  870. def test_view(self):
  871. i = Int64Index([], name='Foo')
  872. i_view = i.view()
  873. self.assertEqual(i_view.name, 'Foo')
  874. def test_coerce_list(self):
  875. # coerce things
  876. arr = Index([1, 2, 3, 4])
  877. tm.assert_isinstance(arr, Int64Index)
  878. # but not if explicit dtype passed
  879. arr = Index([1, 2, 3, 4], dtype=object)
  880. tm.assert_isinstance(arr, Index)
  881. def test_dtype(self):
  882. self.assertEqual(self.index.dtype, np.int64)
  883. def test_is_monotonic(self):
  884. self.assertTrue(self.index.is_monotonic)
  885. index = Int64Index([4, 3, 2, 1])
  886. self.assertFalse(index.is_monotonic)
  887. def test_equals(self):
  888. same_values = Index(self.index, dtype=object)
  889. self.assertTrue(self.index.equals(same_values))
  890. self.assertTrue(same_values.equals(self.index))
  891. def test_identical(self):
  892. i = Index(self.index.copy())
  893. self.assertTrue(i.identical(self.index))
  894. same_values_different_type = Index(i, dtype=object)
  895. self.assertFalse(i.identical(same_values_different_type))
  896. i = self.index.copy(dtype=object)
  897. i = i.rename('foo')
  898. same_values = Index(i, dtype=object)
  899. self.assertTrue(same_values.identical(self.index.copy(dtype=object)))
  900. self.assertFalse(i.identical(self.index))
  901. self.assertTrue(Index(same_values, name='foo', dtype=object
  902. ).identical(i))
  903. self.assertFalse(
  904. self.index.copy(dtype=object)
  905. .identical(self.index.copy(dtype='int64')))
  906. def test_get_indexer(self):
  907. target = Int64Index(np.arange(10))
  908. indexer = self.index.get_indexer(target)
  909. expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1])
  910. self.assert_numpy_array_equal(indexer, expected)
  911. def test_get_indexer_pad(self):
  912. target = Int64Index(np.arange(10))
  913. indexer = self.index.get_indexer(target, method='pad')
  914. expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
  915. self.assert_numpy_array_equal(indexer, expected)
  916. def test_get_indexer_backfill(self):
  917. target = Int64Index(np.arange(10))
  918. indexer = self.index.get_indexer(target, method='backfill')
  919. expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5])
  920. self.assert_numpy_array_equal(indexer, expected)
  921. def test_join_outer(self):
  922. other = Int64Index([7, 12, 25, 1, 2, 5])
  923. other_mono = Int64Index([1, 2, 5, 7, 12, 25])
  924. # not monotonic
  925. # guarantee of sortedness
  926. res, lidx, ridx = self.index.join(other, how='outer',
  927. return_indexers=True)
  928. noidx_res = self.index.join(other, how='outer')
  929. self.assertTrue(res.equals(noidx_res))
  930. eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25])
  931. elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1],
  932. dtype=np.int64)
  933. eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2],
  934. dtype=np.int64)
  935. tm.assert_isinstance(res, Int64Index)
  936. self.assertTrue(res.equals(eres))
  937. self.assert_numpy_array_equal(lidx, elidx)
  938. self.assert_numpy_array_equal(ridx, eridx)
  939. # monotonic
  940. res, lidx, ridx = self.index.join(other_mono, how='outer',
  941. return_indexers=True)
  942. noidx_res = self.index.join(other_mono, how='outer')
  943. self.assertTrue(res.equals(noidx_res))
  944. eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5],
  945. dtype=np.int64)
  946. tm.assert_isinstance(res, Int64Index)
  947. self.assertTrue(res.equals(eres))
  948. self.assert_numpy_array_equal(lidx, elidx)
  949. self.assert_numpy_array_equal(ridx, eridx)
  950. def test_join_inner(self):
  951. other = Int64Index([7, 12, 25, 1, 2, 5])
  952. other_mono = Int64Index([1, 2, 5, 7, 12, 25])
  953. # not monotonic
  954. res, lidx, ridx = self.index.join(other, how='inner',
  955. return_indexers=True)
  956. # no guarantee of sortedness, so sort for comparison purposes
  957. ind = res.argsort()
  958. res = res.take(ind)
  959. lidx = lidx.take(ind)
  960. ridx = ridx.take(ind)
  961. eres = Int64Index([2, 12])
  962. elidx = np.array([1, 6])
  963. eridx = np.array([4, 1])
  964. tm.assert_isinstance(res, Int64Index)
  965. self.assertTrue(res.equals(eres))
  966. self.assert_numpy_array_equal(lidx, elidx)
  967. self.assert_numpy_array_equal(ridx, eridx)
  968. # monotonic
  969. res, lidx, ridx = self.index.join(other_mono, how='inner',
  970. return_indexers=True)
  971. res2 = self.index.intersection(other_mono)
  972. self.assertTrue(res.equals(res2))
  973. eridx = np.array([1, 4])
  974. tm.assert_isinstance(res, Int64Index)
  975. self.assertTrue(res.equals(eres))
  976. self.assert_numpy_array_equal(lidx, elidx)
  977. self.assert_numpy_array_equal(ridx, eridx)
  978. def test_join_left(self):
  979. other = Int64Index([7, 12, 25, 1, 2, 5])
  980. other_mono = Int64Index([1, 2, 5, 7, 12, 25])
  981. # not monotonic
  982. res, lidx, ridx = self.index.join(other, how='left',
  983. return_indexers=True)
  984. eres = self.index
  985. eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1],
  986. dtype=np.int64)
  987. tm.assert_isinstance(res, Int64Index)
  988. self.assertTrue(res.equals(eres))
  989. self.assertIsNone(lidx)
  990. self.assert_numpy_array_equal(ridx, eridx)
  991. # monotonic
  992. res, lidx, ridx = self.index.join(other_mono, how='left',
  993. return_indexers=True)
  994. eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1],
  995. dtype=np.int64)
  996. tm.assert_isinstance(res, Int64Index)
  997. self.assertTrue(res.equals(eres))
  998. self.assertIsNone(lidx)
  999. self.assert_numpy_array_equal(ridx, eridx)
  1000. # non-unique
  1001. """
  1002. idx = Index([1,1,2,5])
  1003. idx2 = Index([1,2,5,7,9])
  1004. res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True)
  1005. eres = idx2
  1006. eridx = np.array([0, 2, 3, -1, -1])
  1007. elidx = np.array([0, 1, 2, 3, 4])
  1008. self.assertTrue(res.equals(eres))
  1009. self.assert_numpy_array_equal(lidx, elidx)
  1010. self.assert_numpy_array_equal(ridx, eridx)
  1011. """
  1012. def test_join_right(self):
  1013. other = Int64Index([7, 12, 25, 1, 2, 5])
  1014. other_mono = Int64Index([1, 2, 5, 7, 12, 25])
  1015. # not monotonic
  1016. res, lidx, ridx = self.index.join(other, how='right',
  1017. return_indexers=True)
  1018. eres = other
  1019. elidx = np.array([-1, 6, -1, -1, 1, -1],
  1020. dtype=np.int64)
  1021. tm.assert_isinstance(other, Int64Index)
  1022. self.assertTrue(res.equals(eres))
  1023. self.assert_numpy_array_equal(lidx, elidx)
  1024. self.assertIsNone(ridx)
  1025. # monotonic
  1026. res, lidx, ridx = self.index.join(other_mono, how='right',
  1027. return_indexers=True)
  1028. eres = other_mono
  1029. elidx = np.array([-1, 1, -1, -1, 6, -1],
  1030. dtype=np.int64)
  1031. tm.assert_isinstance(other, Int64Index)
  1032. self.assertTrue(res.equals(eres))
  1033. self.assert_numpy_array_equal(lidx, elidx)
  1034. self.assertIsNone(ridx)
  1035. # non-unique
  1036. """
  1037. idx = Index([1,1,2,5])
  1038. idx2 = Index([1,2,5,7,9])
  1039. res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True)
  1040. eres = idx2
  1041. elidx = np.array([0, 2, 3, -1, -1])
  1042. eridx = np.array([0, 1, 2, 3, 4])
  1043. self.assertTrue(res.equals(eres))
  1044. self.assert_numpy_array_equal(lidx, elidx)
  1045. self.assert_numpy_array_equal(ridx, eridx)
  1046. idx = Index([1,1,2,5])
  1047. idx2 = Index([1,2,5,9,7])
  1048. res = idx.join(idx2, how='right', return_indexers=False)
  1049. eres = idx2
  1050. self.assert(res.equals(eres))
  1051. """
  1052. def test_join_non_int_index(self):
  1053. other = Index([3, 6, 7, 8, 10], dtype=object)
  1054. outer = self.index.join(other, how='outer')
  1055. outer2 = other.join(self.index, how='outer')
  1056. expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14,
  1057. 16, 18], dtype=object)
  1058. self.assertTrue(outer.equals(outer2))
  1059. self.assertTrue(outer.equals(expected))
  1060. inner = self.index.join(other, how='inner')
  1061. inner2 = other.join(self.index, how='inner')
  1062. expected = Index([6, 8, 10], dtype=object)
  1063. self.assertTrue(inner.equals(inner2))
  1064. self.assertTrue(inner.equals(expected))
  1065. left = self.index.join(other, how='left')
  1066. self.assertTrue(left.equals(self.index))
  1067. left2 = other.join(self.index, how='left')
  1068. self.assertTrue(left2.equals(other))
  1069. right = self.index.join(other, how='right')
  1070. self.assertTrue(right.equals(other))
  1071. right2 = other.join(self.index, how='right')
  1072. self.assertTrue(right2.equals(self.index))
  1073. def test_join_non_unique(self):
  1074. left = Index([4, 4, 3, 3])
  1075. joined, lidx, ridx = left.join(left, return_indexers=True)
  1076. exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4])
  1077. self.assertTrue(joined.equals(exp_joined))
  1078. exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.int64)
  1079. self.assert_numpy_array_equal(lidx, exp_lidx)
  1080. exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64)
  1081. self.assert_numpy_array_equal(ridx, exp_ridx)
  1082. def test_join_self(self):
  1083. kinds = 'outer', 'inner', 'left', 'right'
  1084. for kind in kinds:
  1085. joined = self.index.join(self.index, how=kind)
  1086. self.assertIs(self.index, joined)
  1087. def test_intersection(self):
  1088. other = Index([1, 2, 3, 4, 5])
  1089. result = self.index.intersection(other)
  1090. expected = np.sort(np.intersect1d(self.index.values, other.values))
  1091. self.assert_numpy_array_equal(result, expected)
  1092. result = other.intersection(self.index)
  1093. expected = np.sort(np.asarray(np.intersect1d(self.index.values,
  1094. other.values)))
  1095. self.assert_numpy_array_equal(result, expected)
  1096. def test_intersect_str_dates(self):
  1097. dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
  1098. i1 = Index(dt_dates, dtype=object)
  1099. i2 = Index(['aa'], dtype=object)
  1100. res = i2.intersection(i1)
  1101. self.assertEqual(len(res), 0)
  1102. def test_union_noncomparable(self):
  1103. from datetime import datetime, timedelta
  1104. # corner case, non-Int64Index
  1105. now = datetime.now()
  1106. other = Index([now + timedelta(i) for i in range(4)], dtype=object)
  1107. result = self.index.union(other)
  1108. expected = np.concatenate((self.index, other))
  1109. self.assert_numpy_array_equal(result, expected)
  1110. result = other.union(self.index)
  1111. expected = np.concatenate((other, self.index))
  1112. self.assert_numpy_array_equal(result, expected)
  1113. def test_cant_or_shouldnt_cast(self):
  1114. # can't
  1115. data = ['foo', 'bar', 'baz']
  1116. self.assertRaises(TypeError, Int64Index, data)
  1117. # shouldn't
  1118. data = ['0', '1', '2']
  1119. self.assertRaises(TypeError, Int64Index, data)
  1120. def test_view_Index(self):
  1121. self.index.view(Index)
  1122. def test_prevent_casting(self):
  1123. result = self.index.astype('O')
  1124. self.assertEqual(result.dtype, np.object_)
  1125. def test_take_preserve_name(self):
  1126. index = Int64Index([1, 2, 3, 4], name='foo')
  1127. taken = index.take([3, 0, 1])
  1128. self.assertEqual(index.name, taken.name)
  1129. def test_int_name_format(self):
  1130. from pandas import Series, DataFrame
  1131. index = Index(['a', 'b', 'c'], name=0)
  1132. s = Series(lrange(3), index)
  1133. df = DataFrame(lrange(3), index=index)
  1134. repr(s)
  1135. repr(df)
  1136. def test_print_unicode_columns(self):
  1137. df = pd.DataFrame(
  1138. {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]})
  1139. repr(df.columns) # should not raise UnicodeDecodeError
  1140. def test_repr_summary(self):
  1141. with cf.option_context('display.max_seq_items', 10):
  1142. r = repr(pd.Index(np.arange(1000)))
  1143. self.assertTrue(len(r) < 100)
  1144. self.assertTrue("..." in r)
  1145. def test_repr_roundtrip(self):
  1146. tm.assert_index_equal(eval(repr(self.index)), self.index)
  1147. def test_unicode_string_with_unicode(self):
  1148. idx = Index(lrange(1000))
  1149. if compat.PY3:
  1150. str(idx)
  1151. else:
  1152. compat.text_type(idx)
  1153. def test_bytestring_with_unicode(self):
  1154. idx = Index(lrange(1000))
  1155. if compat.PY3:
  1156. bytes(idx)
  1157. else:
  1158. str(idx)
  1159. def test_slice_keep_name(self):
  1160. idx = Int64Index([1, 2], name='asdf')
  1161. self.assertEqual(idx.name, idx[1:].name)
  1162. class TestMultiIndex(tm.TestCase):
  1163. _multiprocess_can_split_ = True
  1164. def setUp(self):
  1165. major_axis = Index(['foo', 'bar', 'baz', 'qux'])
  1166. minor_axis = Index(['one', 'two'])
  1167. major_labels = np.array([0, 0, 1, 2, 3, 3])
  1168. minor_labels = np.array([0, 1, 0, 1, 0, 1])
  1169. self.index_names = ['first', 'second']
  1170. self.index = MultiIndex(levels=[major_axis, minor_axis],
  1171. labels=[major_labels, minor_labels],
  1172. names=self.index_names, verify_integrity=False)
  1173. def test_hash_error(self):
  1174. with tm.assertRaisesRegexp(TypeError,
  1175. "unhashable type: %r" %
  1176. type(self.index).__name__):
  1177. hash(self.index)
  1178. def test_set_names_and_rename(self):
  1179. # so long as these are synonyms, we don't need to test set_names
  1180. self.assertEqual(self.index.rename, self.index.set_names)
  1181. new_names = [name + "SUFFIX" for name in self.index_names]
  1182. ind = self.index.set_names(new_names)
  1183. self.assertEqual(self.index.names, self.index_names)
  1184. self.assertEqual(ind.names, new_names)
  1185. with assertRaisesRegexp(ValueError, "^Length"):
  1186. ind.set_names(new_names + new_names)
  1187. new_names2 = [name + "SUFFIX2" for name in new_names]
  1188. res = ind.set_names(new_names2, inplace=True)
  1189. self.assertIsNone(res)
  1190. self.assertEqual(ind.names, new_names2)
  1191. def test_set_levels_and_set_labels(self):
  1192. # side note - you probably wouldn't want to use levels and labels
  1193. # directly like this - but it is possible.
  1194. levels, labels = self.index.levels, self.index.labels
  1195. new_levels = [[lev + 'a' for lev in level] for level in levels]
  1196. major_labels, minor_labels = labels
  1197. major_labels = [(x + 1) % 3 for x in major_labels]
  1198. minor_labels = [(x + 1) % 1 for x in minor_labels]
  1199. new_labels = [major_labels, minor_labels]
  1200. def assert_matching(actual, expected):
  1201. # avoid specifying internal representation
  1202. # as much as possible
  1203. self.assertEqual(len(actual), len(expected))
  1204. for act, exp in zip(actual, expected):
  1205. act = np.asarray(act)
  1206. exp = np.asarray(exp)
  1207. assert_almost_equal(act, exp)
  1208. # level changing [w/o mutation]
  1209. ind2 = self.index.set_levels(new_levels)
  1210. assert_matching(ind2.levels, new_levels)
  1211. assert_matching(self.index.levels, levels)
  1212. # level changing [w/ mutation]
  1213. ind2 = self.index.copy()
  1214. inplace_return = ind2.set_levels(new_levels, inplace=True)
  1215. self.assertIsNone(inplace_return)
  1216. assert_matching(ind2.levels, new_levels)
  1217. # label changing [w/o mutation]
  1218. ind2 = self.index.set_labels(new_labels)
  1219. assert_matching(ind2.labels, new_labels)
  1220. assert_matching(self.index.labels, labels)
  1221. # label changing [w/ mutation]
  1222. ind2 = self.index.copy()
  1223. inplace_return = ind2.set_labels(new_labels, inplace=True)
  1224. self.assertIsNone(inplace_return)
  1225. assert_matching(ind2.labels, new_labels)
  1226. def test_set_levels_labels_names_bad_input(self):
  1227. levels, labels = self.index.levels, self.index.labels
  1228. names = self.index.names
  1229. with tm.assertRaisesRegexp(ValueError, 'Length of levels'):
  1230. self.index.set_levels([levels[0]])
  1231. with tm.assertRaisesRegexp(ValueError, 'Length of labels'):
  1232. self.index.set_labels([labels[0]])
  1233. with tm.assertRaisesRegexp(ValueError, 'Length of names'):
  1234. self.index.set_names([names[0]])
  1235. # shouldn't scalar data error, instead should demand list-like
  1236. with tm.assertRaisesRegexp(TypeError, 'list of lists-like'):
  1237. self.index.set_levels(levels[0])
  1238. # shouldn't scalar data error, instead should demand list-like
  1239. with tm.assertRaisesRegexp(TypeError, 'list of lists-like'):
  1240. self.index.set_labels(labels[0])
  1241. # shouldn't scalar data error, instead should demand list-like
  1242. with tm.assertRaisesRegexp(TypeError, 'list-like'):
  1243. self.index.set_names(names[0])
  1244. def test_metadata_immutable(self):
  1245. levels, labels = self.index.levels, self.index.labels
  1246. # shouldn't be able to set at either the top level or base level
  1247. mutable_regex = re.compile('does not support mutable operations')
  1248. with assertRaisesRegexp(TypeError, mutable_regex):
  1249. levels[0] = levels[0]
  1250. with assertRaisesRegexp(TypeError, mutable_regex):
  1251. levels[0][0] = levels[0][0]
  1252. # ditto for labels
  1253. with assertRaisesRegexp(TypeError, mutable_regex):
  1254. labels[0] = labels[0]
  1255. with assertRaisesRegexp(TypeError, mutable_regex):
  1256. labels[0][0] = labels[0][0]
  1257. # and for names
  1258. names = self.index.names
  1259. with assertRaisesRegexp(TypeError, mutable_regex):
  1260. names[0] = names[0]
  1261. def test_inplace_mutation_resets_values(self):
  1262. levels = [['a', 'b', 'c'], [4]]
  1263. levels2 = [[1, 2, 3], ['a']]
  1264. labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
  1265. mi1 = MultiIndex(levels=levels, labels=labels)
  1266. mi2 = MultiIndex(levels=levels2, labels=labels)
  1267. vals = mi1.values.copy()
  1268. vals2 = mi2.values.copy()
  1269. self.assertIsNotNone(mi1._tuples)
  1270. # make sure level setting works
  1271. new_vals = mi1.set_levels(levels2).values
  1272. assert_almost_equal(vals2, new_vals)
  1273. # non-inplace doesn't kill _tuples [implementation detail]
  1274. assert_almost_equal(mi1._tuples, vals)
  1275. # and values is still same too
  1276. assert_almost_equal(mi1.values, vals)
  1277. # inplace should kill _tuples
  1278. mi1.set_levels(levels2, inplace=True)
  1279. assert_almost_equal(mi1.values, vals2)
  1280. # make sure label setting works too
  1281. labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
  1282. exp_values = np.array([(long(1), 'a')] * 6, dtype=object)
  1283. new_values = mi2.set_labels(labels2).values
  1284. # not inplace shouldn't change
  1285. assert_almost_equal(mi2._tuples, vals2)
  1286. # should have correct values
  1287. assert_almost_equal(exp_values, new_values)
  1288. # and again setting inplace should kill _tuples, etc
  1289. mi2.set_labels(labels2, inplace=True)
  1290. assert_almost_equal(mi2.values, new_values)
  1291. def test_copy_in_constructor(self):
  1292. levels = np.array(["a", "b", "c"])
  1293. labels = np.array([1, 1, 2, 0, 0, 1, 1])
  1294. val = labels[0]
  1295. mi = MultiIndex(levels=[levels, levels], labels=[labels, labels],
  1296. copy=True)
  1297. self.assertEqual(mi.labels[0][0], val)
  1298. labels[0] = 15
  1299. self.assertEqual(mi.labels[0][0], val)
  1300. val = levels[0]
  1301. levels[0] = "PANDA"
  1302. self.assertEqual(mi.levels[0][0], val)
  1303. def test_set_value_keeps_names(self):
  1304. # motivating example from #3742
  1305. lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe']
  1306. lev2 = ['1', '2', '3'] * 2
  1307. idx = pd.MultiIndex.from_arrays(
  1308. [lev1, lev2],
  1309. names=['Name', 'Number'])
  1310. df = pd.DataFrame(
  1311. np.random.randn(6, 4),
  1312. columns=['one', 'two', 'three', 'four'],
  1313. index=idx)
  1314. df = df.sortlevel()
  1315. self.assertIsNone(df.is_copy)
  1316. self.assertEqual(df.index.names, ('Name', 'Number'))
  1317. df = df.set_value(('grethe', '4'), 'one', 99.34)
  1318. self.assertIsNone(df.is_copy)
  1319. self.assertEqual(df.index.names, ('Name', 'Number'))
  1320. def test_names(self):
  1321. # names are assigned in __init__
  1322. names = self.index_names
  1323. level_names = [level.name for level in self.index.levels]
  1324. self.assertEqual(names, level_names)
  1325. # setting bad names on existing
  1326. index = self.index
  1327. assertRaisesRegexp(ValueError, "^Length of names", setattr, index,
  1328. "names", list(index.names) + ["third"])
  1329. assertRaisesRegexp(ValueError, "^Length of names", setattr, index,
  1330. "names", [])
  1331. # initializing with bad names (should always be equivalent)
  1332. major_axis, minor_axis = self.index.levels
  1333. major_labels, minor_labels = self.index.labels
  1334. assertRaisesRegexp(ValueError, "^Length of names", MultiIndex,
  1335. levels=[major_axis, minor_axis],
  1336. labels=[major_labels, minor_labels],
  1337. names=['first'])
  1338. assertRaisesRegexp(ValueError, "^Length of names", MultiIndex,
  1339. levels=[major_axis, minor_axis],
  1340. labels=[major_labels, minor_labels],
  1341. names=['first', 'second', 'third'])
  1342. # names are assigned
  1343. index.names = ["a", "b"]
  1344. ind_names = list(index.names)
  1345. level_names = [level.name for level in index.levels]
  1346. self.assertEqual(ind_names, level_names)
  1347. def test_reference_duplicate_name(self):
  1348. idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'x'])
  1349. self.assertTrue(idx._reference_duplicate_name('x'))
  1350. idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'y'])
  1351. self.assertFalse(idx._reference_duplicate_name('x'))
  1352. def test_astype(self):
  1353. expected = self.index.copy()
  1354. actual = self.index.astype('O')
  1355. assert_copy(actual.levels, expected.levels)
  1356. assert_copy(actual.labels, expected.labels)
  1357. self.check_level_names(actual, expected.names)
  1358. with assertRaisesRegexp(TypeError, "^Setting.*dtype.*object"):
  1359. self.index.astype(np.dtype(int))
  1360. def test_constructor_single_level(self):
  1361. single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
  1362. labels=[[0, 1, 2, 3]],
  1363. names=['first'])
  1364. tm.assert_isinstance(single_level, Index)
  1365. self.assertNotIsInstance(single_level, MultiIndex)
  1366. self.assertEqual(single_level.name, 'first')
  1367. single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
  1368. labels=[[0, 1, 2, 3]])
  1369. self.assertIsNone(single_level.name)
  1370. def test_constructor_no_levels(self):
  1371. assertRaisesRegexp(ValueError, "non-zero number of levels/labels",
  1372. MultiIndex, levels=[], labels=[])
  1373. both_re = re.compile('Must pass both levels and labels')
  1374. with tm.assertRaisesRegexp(TypeError, both_re):
  1375. MultiIndex(levels=[])
  1376. with tm.assertRaisesRegexp(TypeError, both_re):
  1377. MultiIndex(labels=[])
  1378. def test_constructor_mismatched_label_levels(self):
  1379. labels = [np.array([1]), np.array([2]), np.array([3])]
  1380. levels = ["a"]
  1381. assertRaisesRegexp(ValueError, "Length of levels and labels must be"
  1382. " the same", MultiIndex, levels=levels,
  1383. labels=labels)
  1384. length_error = re.compile('>= length of level')
  1385. label_error = re.compile(r'Unequal label lengths: \[4, 2\]')
  1386. # important to check that it's looking at the right thing.
  1387. with tm.assertRaisesRegexp(ValueError, length_error):
  1388. MultiIndex(levels=[['a'], ['b']], labels=[[0, 1, 2, 3], [0, 3, 4, 1]])
  1389. with tm.assertRaisesRegexp(ValueError, label_error):
  1390. MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]])
  1391. # external API
  1392. with tm.assertRaisesRegexp(ValueError, length_error):
  1393. self.index.copy().set_levels([['a'], ['b']])
  1394. with tm.assertRaisesRegexp(ValueError, label_error):
  1395. self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])
  1396. # deprecated properties
  1397. with warnings.catch_warnings():
  1398. warnings.simplefilter('ignore')
  1399. with tm.assertRaisesRegexp(ValueError, length_error):
  1400. self.index.copy().levels = [['a'], ['b']]
  1401. with tm.assertRaisesRegexp(ValueError, label_error):
  1402. self.index.copy().labels = [[0, 0, 0, 0], [0, 0]]
  1403. def assert_multiindex_copied(self, copy, original):
  1404. # levels shoudl be (at least, shallow copied)
  1405. assert_copy(copy.levels, original.levels)
  1406. assert_almost_equal(copy.labels, original.labels)
  1407. # labels doesn't matter which way copied
  1408. assert_almost_equal(copy.labels, original.labels)
  1409. self.assertIsNot(copy.labels, original.labels)
  1410. # names doesn't matter which way copied
  1411. self.assertEqual(copy.names, original.names)
  1412. self.assertIsNot(copy.names, original.names)
  1413. # sort order should be copied
  1414. self.assertEqual(copy.sortorder, original.sortorder)
  1415. def test_copy(self):
  1416. i_copy = self.index.copy()
  1417. self.assert_multiindex_copied(i_copy, self.index)
  1418. def test_shallow_copy(self):
  1419. i_copy = self.index._shallow_copy()
  1420. self.assert_multiindex_copied(i_copy, self.index)
  1421. def test_view(self):
  1422. i_view = self.index.view()
  1423. self.assert_multiindex_copied(i_view, self.index)
  1424. def check_level_names(self, index, names):
  1425. self.assertEqual([level.name for level in index.levels], list(names))
  1426. def test_changing_names(self):
  1427. # names should be applied to levels
  1428. level_names = [level.name for level in self.index.levels]
  1429. self.check_level_names(self.index, self.index.names)
  1430. view = self.index.view()
  1431. copy = self.index.copy()
  1432. shallow_copy = self.index._shallow_copy()
  1433. # changing names should change level names on object
  1434. new_names = [name + "a" for name in self.index.names]
  1435. self.index.names = new_names
  1436. self.check_level_names(self.index, new_names)
  1437. # but not on copies
  1438. self.check_level_names(view, level_names)
  1439. self.check_level_names(copy, level_names)
  1440. self.check_level_names(shallow_copy, level_names)
  1441. # and copies shouldn't change original
  1442. shallow_copy.names = [name + "c" for name in shallow_copy.names]
  1443. self.check_level_names(self.index, new_names)
  1444. def test_duplicate_names(self):
  1445. self.index.names = ['foo', 'foo']
  1446. assertRaisesRegexp(KeyError, 'Level foo not found',
  1447. self.index._get_level_number, 'foo')
  1448. def test_get_level_number_integer(self):
  1449. self.index.names = [1, 0]
  1450. self.assertEqual(self.index._get_level_number(1), 0)
  1451. self.assertEqual(self.index._get_level_number(0), 1)
  1452. self.assertRaises(IndexError, self.index._get_level_number, 2)
  1453. assertRaisesRegexp(KeyError, 'Level fourth not found',
  1454. self.index._get_level_number, 'fourth')
  1455. def test_from_arrays(self):
  1456. arrays = []
  1457. for lev, lab in zip(self.index.levels, self.index.labels):
  1458. arrays.append(np.asarray(lev).take(lab))
  1459. result = MultiIndex.from_arrays(arrays)
  1460. self.assertEqual(list(result), list(self.index))
  1461. def test_from_product(self):
  1462. first = ['foo', 'bar', 'buz']
  1463. second = ['a', 'b', 'c']
  1464. names = ['first', 'second']
  1465. result = MultiIndex.from_product([first, second], names=names)
  1466. tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'),
  1467. ('bar', 'a'), ('bar', 'b'), ('bar', 'c'),
  1468. ('buz', 'a'), ('buz', 'b'), ('buz', 'c')]
  1469. expected = MultiIndex.from_tuples(tuples, names=names)
  1470. assert_array_equal(result, expected)
  1471. self.assertEqual(result.names, names)
  1472. def test_from_product_datetimeindex(self):
  1473. dt_index = pd.date_range('2000-01-01', periods=2)
  1474. mi = pd.MultiIndex.from_product([[1, 2], dt_index])
  1475. etalon = pd.lib.list_to_object_array([(1, pd.Timestamp('2000-01-01')),
  1476. (1, pd.Timestamp('2000-01-02')),
  1477. (2, pd.Timestamp('2000-01-01')),
  1478. (2, pd.Timestamp('2000-01-02'))])
  1479. assert_array_equal(mi.values, etalon)
  1480. def test_append(self):
  1481. result = self.index[:3].append(self.index[3:])
  1482. self.assertTrue(result.equals(self.index))
  1483. foos = [self.index[:1], self.index[1:3], self.index[3:]]
  1484. result = foos[0].append(foos[1:])
  1485. self.assertTrue(result.equals(self.index))
  1486. # empty
  1487. result = self.index.append([])
  1488. self.assertTrue(result.equals(self.index))
  1489. def test_get_level_values(self):
  1490. result = self.index.get_level_values(0)
  1491. expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux']
  1492. self.assert_numpy_array_equal(result, expected)
  1493. self.assertEqual(result.name, 'first')
  1494. result = self.index.get_level_values('first')
  1495. expected = self.index.get_level_values(0)
  1496. self.assert_numpy_array_equal(result, expected)
  1497. def test_get_level_values_na(self):
  1498. arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
  1499. index = pd.MultiIndex.from_arrays(arrays)
  1500. values = index.get_level_values(1)
  1501. expected = [1, np.nan, 2]
  1502. assert_array_equal(values.values.astype(float), expected)
  1503. arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
  1504. index = pd.MultiIndex.from_arrays(arrays)
  1505. values = index.get_level_values(1)
  1506. expected = [np.nan, np.nan, 2]
  1507. assert_array_equal(values.values.astype(float), expected)
  1508. arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
  1509. index = pd.MultiIndex.from_arrays(arrays)
  1510. values = index.get_level_values(0)
  1511. expected = [np.nan, np.nan, np.nan]
  1512. assert_array_equal(values.values.astype(float), expected)
  1513. values = index.get_level_values(1)
  1514. expected = ['a', np.nan, 1]
  1515. assert_array_equal(values.values, expected)
  1516. if not _np_version_under1p7:
  1517. arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
  1518. index = pd.MultiIndex.from_arrays(arrays)
  1519. values = index.get_level_values(1)
  1520. expected = pd.DatetimeIndex([0, 1, pd.NaT])
  1521. assert_array_equal(values.values, expected.values)
  1522. arrays = [[], []]
  1523. index = pd.MultiIndex.from_arrays(arrays)
  1524. values = index.get_level_values(0)
  1525. self.assertEqual(values.shape, (0,))
  1526. def test_reorder_levels(self):
  1527. # this blows up
  1528. assertRaisesRegexp(IndexError, '^Too many levels',
  1529. self.index.reorder_levels, [2, 1, 0])
  1530. def test_nlevels(self):
  1531. self.assertEqual(self.index.nlevels, 2)
  1532. def test_iter(self):
  1533. result = list(self.index)
  1534. expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
  1535. ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
  1536. self.assertEqual(result, expected)
  1537. def test_pickle(self):
  1538. pickled = pickle.dumps(self.index)
  1539. unpickled = pickle.loads(pickled)
  1540. self.assertTrue(self.index.equals(unpickled))
  1541. def test_legacy_pickle(self):
  1542. if compat.PY3:
  1543. raise nose.SkipTest("doesn't work on Python 3")
  1544. def curpath():
  1545. pth, _ = os.path.split(os.path.abspath(__file__))
  1546. return pth
  1547. ppath = os.path.join(curpath(), 'data/multiindex_v1.pickle')
  1548. obj = pickle.load(open(ppath, 'r'))
  1549. self.assertTrue(obj._is_v1)
  1550. obj2 = MultiIndex.from_tuples(obj.values)
  1551. self.assertTrue(obj.equals(obj2))
  1552. res = obj.get_indexer(obj)
  1553. exp = np.arange(len(obj))
  1554. assert_almost_equal(res, exp)
  1555. res = obj.get_indexer(obj2[::-1])
  1556. exp = obj.get_indexer(obj[::-1])
  1557. exp2 = obj2.get_indexer(obj2[::-1])
  1558. assert_almost_equal(res, exp)
  1559. assert_almost_equal(exp, exp2)
  1560. def test_legacy_v2_unpickle(self):
  1561. # 0.7.3 -> 0.8.0 format manage
  1562. pth, _ = os.path.split(os.path.abspath(__file__))
  1563. filepath = os.path.join(pth, 'data', 'mindex_073.pickle')
  1564. obj = pd.read_pickle(filepath)
  1565. obj2 = MultiIndex.from_tuples(obj.values)
  1566. self.assertTrue(obj.equals(obj2))
  1567. res = obj.get_indexer(obj)
  1568. exp = np.arange(len(obj))
  1569. assert_almost_equal(res, exp)
  1570. res = obj.get_indexer(obj2[::-1])
  1571. exp = obj.get_indexer(obj[::-1])
  1572. exp2 = obj2.get_indexer(obj2[::-1])
  1573. assert_almost_equal(res, exp)
  1574. assert_almost_equal(exp, exp2)
  1575. def test_from_tuples_index_values(self):
  1576. result = MultiIndex.from_tuples(self.index)
  1577. self.assertTrue((result.values == self.index.values).all())
  1578. def test_contains(self):
  1579. self.assertIn(('foo', 'two'), self.index)
  1580. self.assertNotIn(('bar', 'two'), self.index)
  1581. self.assertNotIn(None, self.index)
  1582. def test_is_all_dates(self):
  1583. self.assertFalse(self.index.is_all_dates)
  1584. def test_is_numeric(self):
  1585. # MultiIndex is never numeric
  1586. self.assertFalse(self.index.is_numeric())
  1587. def test_getitem(self):
  1588. # scalar
  1589. self.assertEqual(self.index[2], ('bar', 'one'))
  1590. # slice
  1591. result = self.index[2:5]
  1592. expected = self.index[[2, 3, 4]]
  1593. self.assertTrue(result.equals(expected))
  1594. # boolean
  1595. result = self.index[[True, False, True, False, True, True]]
  1596. result2 = self.index[np.array([True, False, True, False, True, True])]
  1597. expected = self.index[[0, 2, 4, 5]]
  1598. self.assertTrue(result.equals(expected))
  1599. self.assertTrue(result2.equals(expected))
  1600. def test_getitem_group_select(self):
  1601. sorted_idx, _ = self.index.sortlevel(0)
  1602. self.assertEqual(sorted_idx.get_loc('baz'), slice(3, 4))
  1603. self.assertEqual(sorted_idx.get_loc('foo'), slice(0, 2))
  1604. def test_get_loc(self):
  1605. self.assertEqual(self.index.get_loc(('foo', 'two')), 1)
  1606. self.assertEqual(self.index.get_loc(('baz', 'two')), 3)
  1607. self.assertRaises(KeyError, self.index.get_loc, ('bar', 'two'))
  1608. self.assertRaises(KeyError, self.index.get_loc, 'quux')
  1609. # 3 levels
  1610. index = MultiIndex(levels=[Index(lrange(4)),
  1611. Index(lrange(4)),
  1612. Index(lrange(4))],
  1613. labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  1614. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  1615. np.array([1, 0, 1, 1, 0, 0, 1, 0])])
  1616. self.assertRaises(KeyError, index.get_loc, (1, 1))
  1617. self.assertEqual(index.get_loc((2, 0)), slice(3, 5))
  1618. def test_get_loc_duplicates(self):
  1619. index = Index([2, 2, 2, 2])
  1620. result = index.get_loc(2)
  1621. expected = slice(0, 4)
  1622. self.assertEqual(result, expected)
  1623. # self.assertRaises(Exception, index.get_loc, 2)
  1624. index = Index(['c', 'a', 'a', 'b', 'b'])
  1625. rs = index.get_loc('c')
  1626. xp = 0
  1627. assert(rs == xp)
  1628. def test_get_loc_level(self):
  1629. index = MultiIndex(levels=[Index(lrange(4)),
  1630. Index(lrange(4)),
  1631. Index(lrange(4))],
  1632. labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  1633. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  1634. np.array([1, 0, 1, 1, 0, 0, 1, 0])])
  1635. loc, new_index = index.get_loc_level((0, 1))
  1636. expected = slice(1, 2)
  1637. exp_index = index[expected].droplevel(0).droplevel(0)
  1638. self.assertEqual(loc, expected)
  1639. self.assertTrue(new_index.equals(exp_index))
  1640. loc, new_index = index.get_loc_level((0, 1, 0))
  1641. expected = 1
  1642. self.assertEqual(loc, expected)
  1643. self.assertIsNone(new_index)
  1644. self.assertRaises(KeyError, index.get_loc_level, (2, 2))
  1645. index = MultiIndex(levels=[[2000], lrange(4)],
  1646. labels=[np.array([0, 0, 0, 0]),
  1647. np.array([0, 1, 2, 3])])
  1648. result, new_index = index.get_loc_level((2000, slice(None, None)))
  1649. expected = slice(None, None)
  1650. self.assertEqual(result, expected)
  1651. self.assertTrue(new_index.equals(index.droplevel(0)))
  1652. def test_slice_locs(self):
  1653. df = tm.makeTimeDataFrame()
  1654. stacked = df.stack()
  1655. idx = stacked.index
  1656. slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
  1657. sliced = stacked[slob]
  1658. expected = df[5:16].stack()
  1659. tm.assert_almost_equal(sliced.values, expected.values)
  1660. slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30),
  1661. df.index[15] - timedelta(seconds=30)))
  1662. sliced = stacked[slob]
  1663. expected = df[6:15].stack()
  1664. tm.assert_almost_equal(sliced.values, expected.values)
  1665. def test_slice_locs_with_type_mismatch(self):
  1666. df = tm.makeTimeDataFrame()
  1667. stacked = df.stack()
  1668. idx = stacked.index
  1669. assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs,
  1670. (1, 3))
  1671. assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs,
  1672. df.index[5] + timedelta(seconds=30), (5, 2))
  1673. df = tm.makeCustomDataframe(5, 5)
  1674. stacked = df.stack()
  1675. idx = stacked.index
  1676. with assertRaisesRegexp(TypeError, '^Level type mismatch'):
  1677. idx.slice_locs(timedelta(seconds=30))
  1678. # TODO: Try creating a UnicodeDecodeError in exception message
  1679. with assertRaisesRegexp(TypeError, '^Level type mismatch'):
  1680. idx.slice_locs(df.index[1], (16, "a"))
  1681. def test_slice_locs_not_sorted(self):
  1682. index = MultiIndex(levels=[Index(lrange(4)),
  1683. Index(lrange(4)),
  1684. Index(lrange(4))],
  1685. labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  1686. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  1687. np.array([1, 0, 1, 1, 0, 0, 1, 0])])
  1688. assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex"
  1689. " lexsort depth", index.slice_locs, (1, 0, 1),
  1690. (2, 1, 0))
  1691. # works
  1692. sorted_index, _ = index.sortlevel(0)
  1693. # should there be a test case here???
  1694. sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
  1695. def test_slice_locs_partial(self):
  1696. sorted_idx, _ = self.index.sortlevel(0)
  1697. result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one'))
  1698. self.assertEqual(result, (1, 5))
  1699. result = sorted_idx.slice_locs(None, ('qux', 'one'))
  1700. self.assertEqual(result, (0, 5))
  1701. result = sorted_idx.slice_locs(('foo', 'two'), None)
  1702. self.assertEqual(result, (1, len(sorted_idx)))
  1703. result = sorted_idx.slice_locs('bar', 'baz')
  1704. self.assertEqual(result, (2, 4))
  1705. def test_slice_locs_not_contained(self):
  1706. # some searchsorted action
  1707. index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]],
  1708. labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3],
  1709. [0, 1, 2, 1, 2, 2, 0, 1, 2]],
  1710. sortorder=0)
  1711. result = index.slice_locs((1, 0), (5, 2))
  1712. self.assertEqual(result, (3, 6))
  1713. result = index.slice_locs(1, 5)
  1714. self.assertEqual(result, (3, 6))
  1715. result = index.slice_locs((2, 2), (5, 2))
  1716. self.assertEqual(result, (3, 6))
  1717. result = index.slice_locs(2, 5)
  1718. self.assertEqual(result, (3, 6))
  1719. result = index.slice_locs((1, 0), (6, 3))
  1720. self.assertEqual(result, (3, 8))
  1721. result = index.slice_locs(-1, 10)
  1722. self.assertEqual(result, (0, len(index)))
  1723. def test_consistency(self):
  1724. # need to construct an overflow
  1725. major_axis = lrange(70000)
  1726. minor_axis = lrange(10)
  1727. major_labels = np.arange(70000)
  1728. minor_labels = np.repeat(lrange(10), 7000)
  1729. # the fact that is works means it's consistent
  1730. index = MultiIndex(levels=[major_axis, minor_axis],
  1731. labels=[major_labels, minor_labels])
  1732. # inconsistent
  1733. major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
  1734. minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
  1735. index = MultiIndex(levels=[major_axis, minor_axis],
  1736. labels=[major_labels, minor_labels])
  1737. self.assertFalse(index.is_unique)
  1738. def test_truncate(self):
  1739. major_axis = Index(lrange(4))
  1740. minor_axis = Index(lrange(2))
  1741. major_labels = np.array([0, 0, 1, 2, 3, 3])
  1742. minor_labels = np.array([0, 1, 0, 1, 0, 1])
  1743. index = MultiIndex(levels=[major_axis, minor_axis],
  1744. labels=[major_labels, minor_labels])
  1745. result = index.truncate(before=1)
  1746. self.assertNotIn('foo', result.levels[0])
  1747. self.assertIn(1, result.levels[0])
  1748. result = index.truncate(after=1)
  1749. self.assertNotIn(2, result.levels[0])
  1750. self.assertIn(1, result.levels[0])
  1751. result = index.truncate(before=1, after=2)
  1752. self.assertEqual(len(result.levels[0]), 2)
  1753. # after < before
  1754. self.assertRaises(ValueError, index.truncate, 3, 1)
  1755. def test_get_indexer(self):
  1756. major_axis = Index(lrange(4))
  1757. minor_axis = Index(lrange(2))
  1758. major_labels = np.array([0, 0, 1, 2, 2, 3, 3])
  1759. minor_labels = np.array([0, 1, 0, 0, 1, 0, 1])
  1760. index = MultiIndex(levels=[major_axis, minor_axis],
  1761. labels=[major_labels, minor_labels])
  1762. idx1 = index[:5]
  1763. idx2 = index[[1, 3, 5]]
  1764. r1 = idx1.get_indexer(idx2)
  1765. assert_almost_equal(r1, [1, 3, -1])
  1766. r1 = idx2.get_indexer(idx1, method='pad')
  1767. assert_almost_equal(r1, [-1, 0, 0, 1, 1])
  1768. rffill1 = idx2.get_indexer(idx1, method='ffill')
  1769. assert_almost_equal(r1, rffill1)
  1770. r1 = idx2.get_indexer(idx1, method='backfill')
  1771. assert_almost_equal(r1, [0, 0, 1, 1, 2])
  1772. rbfill1 = idx2.get_indexer(idx1, method='bfill')
  1773. assert_almost_equal(r1, rbfill1)
  1774. # pass non-MultiIndex
  1775. r1 = idx1.get_indexer(idx2._tuple_index)
  1776. rexp1 = idx1.get_indexer(idx2)
  1777. assert_almost_equal(r1, rexp1)
  1778. r1 = idx1.get_indexer([1, 2, 3])
  1779. self.assertTrue((r1 == [-1, -1, -1]).all())
  1780. # create index with duplicates
  1781. idx1 = Index(lrange(10) + lrange(10))
  1782. idx2 = Index(lrange(20))
  1783. assertRaisesRegexp(InvalidIndexError, "Reindexing only valid with"
  1784. " uniquely valued Index objects",
  1785. idx1.get_indexer, idx2)
  1786. def test_format(self):
  1787. self.index.format()
  1788. self.index[:0].format()
  1789. def test_format_integer_names(self):
  1790. index = MultiIndex(levels=[[0, 1], [0, 1]],
  1791. labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
  1792. names=[0, 1])
  1793. index.format(names=True)
  1794. def test_format_sparse_display(self):
  1795. index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]],
  1796. labels=[[0, 0, 0, 1, 1, 1],
  1797. [0, 0, 1, 0, 0, 1],
  1798. [0, 1, 0, 0, 1, 0],
  1799. [0, 0, 0, 0, 0, 0]])
  1800. result = index.format()
  1801. self.assertEqual(result[3], '1 0 0 0')
  1802. def test_format_sparse_config(self):
  1803. import warnings
  1804. warn_filters = warnings.filters
  1805. warnings.filterwarnings('ignore',
  1806. category=FutureWarning,
  1807. module=".*format")
  1808. # GH1538
  1809. pd.set_option('display.multi_sparse', False)
  1810. result = self.index.format()
  1811. self.assertEqual(result[1], 'foo two')
  1812. self.reset_display_options()
  1813. warnings.filters = warn_filters
  1814. def test_to_hierarchical(self):
  1815. index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'),
  1816. (2, 'one'), (2, 'two')])
  1817. result = index.to_hierarchical(3)
  1818. expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
  1819. labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
  1820. [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
  1821. tm.assert_index_equal(result, expected)
  1822. self.assertEqual(result.names, index.names)
  1823. # K > 1
  1824. result = index.to_hierarchical(3, 2)
  1825. expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
  1826. labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
  1827. [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
  1828. tm.assert_index_equal(result, expected)
  1829. self.assertEqual(result.names, index.names)
  1830. # non-sorted
  1831. index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
  1832. (2, 'a'), (2, 'b')],
  1833. names=['N1', 'N2'])
  1834. result = index.to_hierarchical(2)
  1835. expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), (1, 'b'),
  1836. (2, 'a'), (2, 'a'), (2, 'b'), (2, 'b')],
  1837. names=['N1', 'N2'])
  1838. tm.assert_index_equal(result, expected)
  1839. self.assertEqual(result.names, index.names)
  1840. def test_bounds(self):
  1841. self.index._bounds
  1842. def test_equals(self):
  1843. self.assertTrue(self.index.equals(self.index))
  1844. self.assertTrue(self.index.equal_levels(self.index))
  1845. self.assertFalse(self.index.equals(self.index[:-1]))
  1846. self.assertTrue(self.index.equals(self.index._tuple_index))
  1847. # different number of levels
  1848. index = MultiIndex(levels=[Index(lrange(4)),
  1849. Index(lrange(4)),
  1850. Index(lrange(4))],
  1851. labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  1852. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  1853. np.array([1, 0, 1, 1, 0, 0, 1, 0])])
  1854. index2 = MultiIndex(levels=index.levels[:-1],
  1855. labels=index.labels[:-1])
  1856. self.assertFalse(index.equals(index2))
  1857. self.assertFalse(index.equal_levels(index2))
  1858. # levels are different
  1859. major_axis = Index(lrange(4))
  1860. minor_axis = Index(lrange(2))
  1861. major_labels = np.array([0, 0, 1, 2, 2, 3])
  1862. minor_labels = np.array([0, 1, 0, 0, 1, 0])
  1863. index = MultiIndex(levels=[major_axis, minor_axis],
  1864. labels=[major_labels, minor_labels])
  1865. self.assertFalse(self.index.equals(index))
  1866. self.assertFalse(self.index.equal_levels(index))
  1867. # some of the labels are different
  1868. major_axis = Index(['foo', 'bar', 'baz', 'qux'])
  1869. minor_axis = Index(['one', 'two'])
  1870. major_labels = np.array([0, 0, 2, 2, 3, 3])
  1871. minor_labels = np.array([0, 1, 0, 1, 0, 1])
  1872. index = MultiIndex(levels=[major_axis, minor_axis],
  1873. labels=[major_labels, minor_labels])
  1874. self.assertFalse(self.index.equals(index))
  1875. def test_identical(self):
  1876. mi = self.index.copy()
  1877. mi2 = self.index.copy()
  1878. self.assertTrue(mi.identical(mi2))
  1879. mi = mi.set_names(['new1', 'new2'])
  1880. self.assertTrue(mi.equals(mi2))
  1881. self.assertFalse(mi.identical(mi2))
  1882. mi2 = mi2.set_names(['new1', 'new2'])
  1883. self.assertTrue(mi.identical(mi2))
  1884. mi3 = Index(mi.tolist(), names=mi.names)
  1885. mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
  1886. self.assertTrue(mi.identical(mi3))
  1887. self.assertFalse(mi.identical(mi4))
  1888. self.assertTrue(mi.equals(mi4))
  1889. def test_is_(self):
  1890. mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
  1891. self.assertTrue(mi.is_(mi))
  1892. self.assertTrue(mi.is_(mi.view()))
  1893. self.assertTrue(mi.is_(mi.view().view().view().view()))
  1894. mi2 = mi.view()
  1895. # names are metadata, they don't change id
  1896. mi2.names = ["A", "B"]
  1897. self.assertTrue(mi2.is_(mi))
  1898. self.assertTrue(mi.is_(mi2))
  1899. self.assertTrue(mi.is_(mi.set_names(["C", "D"])))
  1900. mi2 = mi.view()
  1901. mi2.set_names(["E", "F"], inplace=True)
  1902. self.assertTrue(mi.is_(mi2))
  1903. # levels are inherent properties, they change identity
  1904. mi3 = mi2.set_levels([lrange(10), lrange(10)])
  1905. self.assertFalse(mi3.is_(mi2))
  1906. # shouldn't change
  1907. self.assertTrue(mi2.is_(mi))
  1908. mi4 = mi3.view()
  1909. mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True)
  1910. self.assertFalse(mi4.is_(mi3))
  1911. mi5 = mi.view()
  1912. mi5.set_levels(mi5.levels, inplace=True)
  1913. self.assertFalse(mi5.is_(mi))
  1914. def test_union(self):
  1915. piece1 = self.index[:5][::-1]
  1916. piece2 = self.index[3:]
  1917. the_union = piece1 | piece2
  1918. tups = sorted(self.index._tuple_index)
  1919. expected = MultiIndex.from_tuples(tups)
  1920. self.assertTrue(the_union.equals(expected))
  1921. # corner case, pass self or empty thing:
  1922. the_union = self.index.union(self.index)
  1923. self.assertIs(the_union, self.index)
  1924. the_union = self.index.union(self.index[:0])
  1925. self.assertIs(the_union, self.index)
  1926. # won't work in python 3
  1927. # tuples = self.index._tuple_index
  1928. # result = self.index[:4] | tuples[4:]
  1929. # self.assertTrue(result.equals(tuples))
  1930. # not valid for python 3
  1931. # def test_union_with_regular_index(self):
  1932. # other = Index(['A', 'B', 'C'])
  1933. # result = other.union(self.index)
  1934. # self.assertIn(('foo', 'one'), result)
  1935. # self.assertIn('B', result)
  1936. # result2 = self.index.union(other)
  1937. # self.assertTrue(result.equals(result2))
  1938. def test_intersection(self):
  1939. piece1 = self.index[:5][::-1]
  1940. piece2 = self.index[3:]
  1941. the_int = piece1 & piece2
  1942. tups = sorted(self.index[3:5]._tuple_index)
  1943. expected = MultiIndex.from_tuples(tups)
  1944. self.assertTrue(the_int.equals(expected))
  1945. # corner case, pass self
  1946. the_int = self.index.intersection(self.index)
  1947. self.assertIs(the_int, self.index)
  1948. # empty intersection: disjoint
  1949. empty = self.index[:2] & self.index[2:]
  1950. expected = self.index[:0]
  1951. self.assertTrue(empty.equals(expected))
  1952. # can't do in python 3
  1953. # tuples = self.index._tuple_index
  1954. # result = self.index & tuples
  1955. # self.assertTrue(result.equals(tuples))
  1956. def test_diff(self):
  1957. first = self.index
  1958. result = first - self.index[-3:]
  1959. expected = MultiIndex.from_tuples(sorted(self.index[:-3].values),
  1960. sortorder=0,
  1961. names=self.index.names)
  1962. tm.assert_isinstance(result, MultiIndex)
  1963. self.assertTrue(result.equals(expected))
  1964. self.assertEqual(result.names, self.index.names)
  1965. # empty difference: reflexive
  1966. result = self.index - self.index
  1967. expected = self.index[:0]
  1968. self.assertTrue(result.equals(expected))
  1969. self.assertEqual(result.names, self.index.names)
  1970. # empty difference: superset
  1971. result = self.index[-3:] - self.index
  1972. expected = self.index[:0]
  1973. self.assertTrue(result.equals(expected))
  1974. self.assertEqual(result.names, self.index.names)
  1975. # empty difference: degenerate
  1976. result = self.index[:0] - self.index
  1977. expected = self.index[:0]
  1978. self.assertTrue(result.equals(expected))
  1979. self.assertEqual(result.names, self.index.names)
  1980. # names not the same
  1981. chunklet = self.index[-3:]
  1982. chunklet.names = ['foo', 'baz']
  1983. result = first - chunklet
  1984. self.assertEqual(result.names, (None, None))
  1985. # empty, but non-equal
  1986. result = self.index - self.index.sortlevel(1)[0]
  1987. self.assertEqual(len(result), 0)
  1988. # raise Exception called with non-MultiIndex
  1989. result = first.diff(first._tuple_index)
  1990. self.assertTrue(result.equals(first[:0]))
  1991. # name from empty array
  1992. result = first.diff([])
  1993. self.assertTrue(first.equals(result))
  1994. self.assertEqual(first.names, result.names)
  1995. # name from non-empty array
  1996. result = first.diff([('foo', 'one')])
  1997. expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'),
  1998. ('foo', 'two'), ('qux', 'one'),
  1999. ('qux', 'two')])
  2000. expected.names = first.names
  2001. self.assertEqual(first.names, result.names)
  2002. assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list"
  2003. " of tuples", first.diff, [1, 2, 3, 4, 5])
  2004. def test_from_tuples(self):
  2005. assertRaisesRegexp(TypeError, 'Cannot infer number of levels from'
  2006. ' empty list', MultiIndex.from_tuples, [])
  2007. idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
  2008. self.assertEqual(len(idx), 2)
  2009. def test_argsort(self):
  2010. result = self.index.argsort()
  2011. expected = self.index._tuple_index.argsort()
  2012. self.assert_numpy_array_equal(result, expected)
  2013. def test_sortlevel(self):
  2014. import random
  2015. tuples = list(self.index)
  2016. random.shuffle(tuples)
  2017. index = MultiIndex.from_tuples(tuples)
  2018. sorted_idx, _ = index.sortlevel(0)
  2019. expected = MultiIndex.from_tuples(sorted(tuples))
  2020. self.assertTrue(sorted_idx.equals(expected))
  2021. sorted_idx, _ = index.sortlevel(0, ascending=False)
  2022. self.assertTrue(sorted_idx.equals(expected[::-1]))
  2023. sorted_idx, _ = index.sortlevel(1)
  2024. by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
  2025. expected = MultiIndex.from_tuples(by1)
  2026. self.assertTrue(sorted_idx.equals(expected))
  2027. sorted_idx, _ = index.sortlevel(1, ascending=False)
  2028. self.assertTrue(sorted_idx.equals(expected[::-1]))
  2029. def test_sortlevel_not_sort_remaining(self):
  2030. mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
  2031. sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
  2032. self.assertTrue(sorted_idx.equals(mi))
  2033. def test_sortlevel_deterministic(self):
  2034. tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
  2035. ('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
  2036. index = MultiIndex.from_tuples(tuples)
  2037. sorted_idx, _ = index.sortlevel(0)
  2038. expected = MultiIndex.from_tuples(sorted(tuples))
  2039. self.assertTrue(sorted_idx.equals(expected))
  2040. sorted_idx, _ = index.sortlevel(0, ascending=False)
  2041. self.assertTrue(sorted_idx.equals(expected[::-1]))
  2042. sorted_idx, _ = index.sortlevel(1)
  2043. by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
  2044. expected = MultiIndex.from_tuples(by1)
  2045. self.assertTrue(sorted_idx.equals(expected))
  2046. sorted_idx, _ = index.sortlevel(1, ascending=False)
  2047. self.assertTrue(sorted_idx.equals(expected[::-1]))
  2048. def test_dims(self):
  2049. pass
  2050. def test_drop(self):
  2051. dropped = self.index.drop([('foo', 'two'), ('qux', 'one')])
  2052. index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')])
  2053. dropped2 = self.index.drop(index)
  2054. expected = self.index[[0, 2, 3, 5]]
  2055. self.assertTrue(dropped.equals(expected))
  2056. self.assertTrue(dropped2.equals(expected))
  2057. dropped = self.index.drop(['bar'])
  2058. expected = self.index[[0, 1, 3, 4, 5]]
  2059. self.assertTrue(dropped.equals(expected))
  2060. index = MultiIndex.from_tuples([('bar', 'two')])
  2061. self.assertRaises(KeyError, self.index.drop, [('bar', 'two')])
  2062. self.assertRaises(KeyError, self.index.drop, index)
  2063. # mixed partial / full drop
  2064. dropped = self.index.drop(['foo', ('qux', 'one')])
  2065. expected = self.index[[2, 3, 5]]
  2066. self.assertTrue(dropped.equals(expected))
  2067. def test_droplevel_with_names(self):
  2068. index = self.index[self.index.get_loc('foo')]
  2069. dropped = index.droplevel(0)
  2070. self.assertEqual(dropped.name, 'second')
  2071. index = MultiIndex(levels=[Index(lrange(4)),
  2072. Index(lrange(4)),
  2073. Index(lrange(4))],
  2074. labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  2075. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  2076. np.array([1, 0, 1, 1, 0, 0, 1, 0])],
  2077. names=['one', 'two', 'three'])
  2078. dropped = index.droplevel(0)
  2079. self.assertEqual(dropped.names, ('two', 'three'))
  2080. dropped = index.droplevel('two')
  2081. expected = index.droplevel(1)
  2082. self.assertTrue(dropped.equals(expected))
  2083. def test_droplevel_multiple(self):
  2084. index = MultiIndex(levels=[Index(lrange(4)),
  2085. Index(lrange(4)),
  2086. Index(lrange(4))],
  2087. labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  2088. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  2089. np.array([1, 0, 1, 1, 0, 0, 1, 0])],
  2090. names=['one', 'two', 'three'])
  2091. dropped = index[:2].droplevel(['three', 'one'])
  2092. expected = index[:2].droplevel(2).droplevel(0)
  2093. self.assertTrue(dropped.equals(expected))
  2094. def test_insert(self):
  2095. # key contained in all levels
  2096. new_index = self.index.insert(0, ('bar', 'two'))
  2097. self.assertTrue(new_index.equal_levels(self.index))
  2098. self.assertEqual(new_index[0], ('bar', 'two'))
  2099. # key not contained in all levels
  2100. new_index = self.index.insert(0, ('abc', 'three'))
  2101. self.assert_numpy_array_equal(new_index.levels[0],
  2102. list(self.index.levels[0]) + ['abc'])
  2103. self.assert_numpy_array_equal(new_index.levels[1],
  2104. list(self.index.levels[1]) + ['three'])
  2105. self.assertEqual(new_index[0], ('abc', 'three'))
  2106. # key wrong length
  2107. assertRaisesRegexp(ValueError, "Item must have length equal to number"
  2108. " of levels", self.index.insert, 0, ('foo2',))
  2109. def test_take_preserve_name(self):
  2110. taken = self.index.take([3, 0, 1])
  2111. self.assertEqual(taken.names, self.index.names)
  2112. def test_join_level(self):
  2113. def _check_how(other, how):
  2114. join_index, lidx, ridx = other.join(self.index, how=how,
  2115. level='second',
  2116. return_indexers=True)
  2117. exp_level = other.join(self.index.levels[1], how=how)
  2118. self.assertTrue(join_index.levels[0].equals(self.index.levels[0]))
  2119. self.assertTrue(join_index.levels[1].equals(exp_level))
  2120. # pare down levels
  2121. mask = np.array(
  2122. [x[1] in exp_level for x in self.index], dtype=bool)
  2123. exp_values = self.index.values[mask]
  2124. self.assert_numpy_array_equal(join_index.values, exp_values)
  2125. if how in ('outer', 'inner'):
  2126. join_index2, ridx2, lidx2 = \
  2127. self.index.join(other, how=how, level='second',
  2128. return_indexers=True)
  2129. self.assertTrue(join_index.equals(join_index2))
  2130. self.assert_numpy_array_equal(lidx, lidx2)
  2131. self.assert_numpy_array_equal(ridx, ridx2)
  2132. self.assert_numpy_array_equal(join_index2.values, exp_values)
  2133. def _check_all(other):
  2134. _check_how(other, 'outer')
  2135. _check_how(other, 'inner')
  2136. _check_how(other, 'left')
  2137. _check_how(other, 'right')
  2138. _check_all(Index(['three', 'one', 'two']))
  2139. _check_all(Index(['one']))
  2140. _check_all(Index(['one', 'three']))
  2141. # some corner cases
  2142. idx = Index(['three', 'one', 'two'])
  2143. result = idx.join(self.index, level='second')
  2144. tm.assert_isinstance(result, MultiIndex)
  2145. assertRaisesRegexp(TypeError, "Join.*MultiIndex.*ambiguous",
  2146. self.index.join, self.index, level=1)
  2147. def test_join_self(self):
  2148. kinds = 'outer', 'inner', 'left', 'right'
  2149. for kind in kinds:
  2150. res = self.index
  2151. joined = res.join(res, how=kind)
  2152. self.assertIs(res, joined)
  2153. def test_reindex(self):
  2154. result, indexer = self.index.reindex(list(self.index[:4]))
  2155. tm.assert_isinstance(result, MultiIndex)
  2156. self.check_level_names(result, self.index[:4].names)
  2157. result, indexer = self.index.reindex(list(self.index))
  2158. tm.assert_isinstance(result, MultiIndex)
  2159. self.assertIsNone(indexer)
  2160. self.check_level_names(result, self.index.names)
  2161. def test_reindex_level(self):
  2162. idx = Index(['one'])
  2163. target, indexer = self.index.reindex(idx, level='second')
  2164. target2, indexer2 = idx.reindex(self.index, level='second')
  2165. exp_index = self.index.join(idx, level='second', how='right')
  2166. exp_index2 = self.index.join(idx, level='second', how='left')
  2167. self.assertTrue(target.equals(exp_index))
  2168. exp_indexer = np.array([0, 2, 4])
  2169. self.assert_numpy_array_equal(indexer, exp_indexer)
  2170. self.assertTrue(target2.equals(exp_index2))
  2171. exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
  2172. self.assert_numpy_array_equal(indexer2, exp_indexer2)
  2173. assertRaisesRegexp(TypeError, "Fill method not supported",
  2174. self.index.reindex, self.index, method='pad',
  2175. level='second')
  2176. assertRaisesRegexp(TypeError, "Fill method not supported",
  2177. idx.reindex, idx, method='bfill', level='first')
  2178. def test_has_duplicates(self):
  2179. self.assertFalse(self.index.has_duplicates)
  2180. self.assertTrue(self.index.append(self.index).has_duplicates)
  2181. index = MultiIndex(levels=[[0, 1], [0, 1, 2]],
  2182. labels=[[0, 0, 0, 0, 1, 1, 1],
  2183. [0, 1, 2, 0, 0, 1, 2]])
  2184. self.assertTrue(index.has_duplicates)
  2185. def test_tolist(self):
  2186. result = self.index.tolist()
  2187. exp = list(self.index.values)
  2188. self.assertEqual(result, exp)
  2189. def test_repr_with_unicode_data(self):
  2190. with pd.core.config.option_context("display.encoding",'UTF-8'):
  2191. d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
  2192. index = pd.DataFrame(d).set_index(["a", "b"]).index
  2193. self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped
  2194. def test_repr_roundtrip(self):
  2195. tm.assert_index_equal(eval(repr(self.index)), self.index)
  2196. def test_unicode_string_with_unicode(self):
  2197. d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
  2198. idx = pd.DataFrame(d).set_index(["a", "b"]).index
  2199. if compat.PY3:
  2200. str(idx)
  2201. else:
  2202. compat.text_type(idx)
  2203. def test_bytestring_with_unicode(self):
  2204. d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
  2205. idx = pd.DataFrame(d).set_index(["a", "b"]).index
  2206. if compat.PY3:
  2207. bytes(idx)
  2208. else:
  2209. str(idx)
  2210. def test_slice_keep_name(self):
  2211. x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')],
  2212. names=['x', 'y'])
  2213. self.assertEqual(x[1:].names, x.names)
  2214. def test_isnull_behavior(self):
  2215. # should not segfault GH5123
  2216. # NOTE: if MI representation changes, may make sense to allow
  2217. # isnull(MI)
  2218. with tm.assertRaises(NotImplementedError):
  2219. pd.isnull(self.index)
  2220. def test_level_setting_resets_attributes(self):
  2221. ind = MultiIndex.from_arrays([
  2222. ['A', 'A', 'B', 'B', 'B'],
  2223. [1, 2, 1, 2, 3]])
  2224. assert ind.is_monotonic
  2225. ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
  2226. inplace=True)
  2227. # if this fails, probably didn't reset the cache correctly.
  2228. assert not ind.is_monotonic
  2229. def test_get_combined_index():
  2230. from pandas.core.index import _get_combined_index
  2231. result = _get_combined_index([])
  2232. assert(result.equals(Index([])))
  2233. if __name__ == '__main__':
  2234. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  2235. exit=False)