PageRenderTime 63ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tools/tests/test_concat.py

http://github.com/wesm/pandas
Python | 1464 lines | 1136 code | 253 blank | 75 comment | 30 complexity | e07e0c410918e9b693b6ebb08718e24e MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import nose
  2. import numpy as np
  3. from numpy.random import randn
  4. from datetime import datetime
  5. from pandas.compat import StringIO
  6. import pandas as pd
  7. from pandas import (DataFrame, concat,
  8. read_csv, isnull, Series, date_range,
  9. Index, Panel, MultiIndex, Timestamp,
  10. DatetimeIndex, Categorical)
  11. from pandas.types.concat import union_categoricals
  12. from pandas.util import testing as tm
  13. from pandas.util.testing import (assert_frame_equal,
  14. makeCustomDataframe as mkdf,
  15. assert_almost_equal)
  16. class ConcatenateBase(tm.TestCase):
  17. _multiprocess_can_split_ = True
  18. def setUp(self):
  19. self.frame = DataFrame(tm.getSeriesData())
  20. self.mixed_frame = self.frame.copy()
  21. self.mixed_frame['foo'] = 'bar'
  22. class TestAppend(ConcatenateBase):
  23. def test_append(self):
  24. begin_index = self.frame.index[:5]
  25. end_index = self.frame.index[5:]
  26. begin_frame = self.frame.reindex(begin_index)
  27. end_frame = self.frame.reindex(end_index)
  28. appended = begin_frame.append(end_frame)
  29. assert_almost_equal(appended['A'], self.frame['A'])
  30. del end_frame['A']
  31. partial_appended = begin_frame.append(end_frame)
  32. self.assertIn('A', partial_appended)
  33. partial_appended = end_frame.append(begin_frame)
  34. self.assertIn('A', partial_appended)
  35. # mixed type handling
  36. appended = self.mixed_frame[:5].append(self.mixed_frame[5:])
  37. assert_frame_equal(appended, self.mixed_frame)
  38. # what to test here
  39. mixed_appended = self.mixed_frame[:5].append(self.frame[5:])
  40. mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:])
  41. # all equal except 'foo' column
  42. assert_frame_equal(
  43. mixed_appended.reindex(columns=['A', 'B', 'C', 'D']),
  44. mixed_appended2.reindex(columns=['A', 'B', 'C', 'D']))
  45. # append empty
  46. empty = DataFrame({})
  47. appended = self.frame.append(empty)
  48. assert_frame_equal(self.frame, appended)
  49. self.assertIsNot(appended, self.frame)
  50. appended = empty.append(self.frame)
  51. assert_frame_equal(self.frame, appended)
  52. self.assertIsNot(appended, self.frame)
  53. # overlap
  54. self.assertRaises(ValueError, self.frame.append, self.frame,
  55. verify_integrity=True)
  56. # new columns
  57. # GH 6129
  58. df = DataFrame({'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}})
  59. row = Series([5, 6, 7], index=['a', 'b', 'c'], name='z')
  60. expected = DataFrame({'a': {'x': 1, 'y': 2, 'z': 5}, 'b': {
  61. 'x': 3, 'y': 4, 'z': 6}, 'c': {'z': 7}})
  62. result = df.append(row)
  63. assert_frame_equal(result, expected)
  64. def test_append_length0_frame(self):
  65. df = DataFrame(columns=['A', 'B', 'C'])
  66. df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
  67. df5 = df.append(df3)
  68. expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
  69. assert_frame_equal(df5, expected)
  70. def test_append_records(self):
  71. arr1 = np.zeros((2,), dtype=('i4,f4,a10'))
  72. arr1[:] = [(1, 2., 'Hello'), (2, 3., "World")]
  73. arr2 = np.zeros((3,), dtype=('i4,f4,a10'))
  74. arr2[:] = [(3, 4., 'foo'),
  75. (5, 6., "bar"),
  76. (7., 8., 'baz')]
  77. df1 = DataFrame(arr1)
  78. df2 = DataFrame(arr2)
  79. result = df1.append(df2, ignore_index=True)
  80. expected = DataFrame(np.concatenate((arr1, arr2)))
  81. assert_frame_equal(result, expected)
  82. def test_append_different_columns(self):
  83. df = DataFrame({'bools': np.random.randn(10) > 0,
  84. 'ints': np.random.randint(0, 10, 10),
  85. 'floats': np.random.randn(10),
  86. 'strings': ['foo', 'bar'] * 5})
  87. a = df[:5].ix[:, ['bools', 'ints', 'floats']]
  88. b = df[5:].ix[:, ['strings', 'ints', 'floats']]
  89. appended = a.append(b)
  90. self.assertTrue(isnull(appended['strings'][0:4]).all())
  91. self.assertTrue(isnull(appended['bools'][5:]).all())
  92. def test_append_many(self):
  93. chunks = [self.frame[:5], self.frame[5:10],
  94. self.frame[10:15], self.frame[15:]]
  95. result = chunks[0].append(chunks[1:])
  96. tm.assert_frame_equal(result, self.frame)
  97. chunks[-1] = chunks[-1].copy()
  98. chunks[-1]['foo'] = 'bar'
  99. result = chunks[0].append(chunks[1:])
  100. tm.assert_frame_equal(result.ix[:, self.frame.columns], self.frame)
  101. self.assertTrue((result['foo'][15:] == 'bar').all())
  102. self.assertTrue(result['foo'][:15].isnull().all())
  103. def test_append_preserve_index_name(self):
  104. # #980
  105. df1 = DataFrame(data=None, columns=['A', 'B', 'C'])
  106. df1 = df1.set_index(['A'])
  107. df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]],
  108. columns=['A', 'B', 'C'])
  109. df2 = df2.set_index(['A'])
  110. result = df1.append(df2)
  111. self.assertEqual(result.index.name, 'A')
  112. def test_append_dtype_coerce(self):
  113. # GH 4993
  114. # appending with datetime will incorrectly convert datetime64
  115. import datetime as dt
  116. from pandas import NaT
  117. df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
  118. dt.datetime(2013, 1, 2, 0, 0)],
  119. columns=['start_time'])
  120. df2 = DataFrame(index=[4, 5], data=[[dt.datetime(2013, 1, 3, 0, 0),
  121. dt.datetime(2013, 1, 3, 6, 10)],
  122. [dt.datetime(2013, 1, 4, 0, 0),
  123. dt.datetime(2013, 1, 4, 7, 10)]],
  124. columns=['start_time', 'end_time'])
  125. expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10),
  126. dt.datetime(2013, 1, 4, 7, 10)],
  127. name='end_time'),
  128. Series([dt.datetime(2013, 1, 1, 0, 0),
  129. dt.datetime(2013, 1, 2, 0, 0),
  130. dt.datetime(2013, 1, 3, 0, 0),
  131. dt.datetime(2013, 1, 4, 0, 0)],
  132. name='start_time')], axis=1)
  133. result = df1.append(df2, ignore_index=True)
  134. assert_frame_equal(result, expected)
  135. def test_append_missing_column_proper_upcast(self):
  136. df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')})
  137. df2 = DataFrame({'B': np.array([True, False, True, False],
  138. dtype=bool)})
  139. appended = df1.append(df2, ignore_index=True)
  140. self.assertEqual(appended['A'].dtype, 'f8')
  141. self.assertEqual(appended['B'].dtype, 'O')
  142. class TestConcatenate(ConcatenateBase):
  143. def test_concat_copy(self):
  144. df = DataFrame(np.random.randn(4, 3))
  145. df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
  146. df3 = DataFrame({5: 'foo'}, index=range(4))
  147. # these are actual copies
  148. result = concat([df, df2, df3], axis=1, copy=True)
  149. for b in result._data.blocks:
  150. self.assertIsNone(b.values.base)
  151. # these are the same
  152. result = concat([df, df2, df3], axis=1, copy=False)
  153. for b in result._data.blocks:
  154. if b.is_float:
  155. self.assertTrue(
  156. b.values.base is df._data.blocks[0].values.base)
  157. elif b.is_integer:
  158. self.assertTrue(
  159. b.values.base is df2._data.blocks[0].values.base)
  160. elif b.is_object:
  161. self.assertIsNotNone(b.values.base)
  162. # float block was consolidated
  163. df4 = DataFrame(np.random.randn(4, 1))
  164. result = concat([df, df2, df3, df4], axis=1, copy=False)
  165. for b in result._data.blocks:
  166. if b.is_float:
  167. self.assertIsNone(b.values.base)
  168. elif b.is_integer:
  169. self.assertTrue(
  170. b.values.base is df2._data.blocks[0].values.base)
  171. elif b.is_object:
  172. self.assertIsNotNone(b.values.base)
  173. def test_concat_with_group_keys(self):
  174. df = DataFrame(np.random.randn(4, 3))
  175. df2 = DataFrame(np.random.randn(4, 4))
  176. # axis=0
  177. df = DataFrame(np.random.randn(3, 4))
  178. df2 = DataFrame(np.random.randn(4, 4))
  179. result = concat([df, df2], keys=[0, 1])
  180. exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1],
  181. [0, 1, 2, 0, 1, 2, 3]])
  182. expected = DataFrame(np.r_[df.values, df2.values],
  183. index=exp_index)
  184. tm.assert_frame_equal(result, expected)
  185. result = concat([df, df], keys=[0, 1])
  186. exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
  187. [0, 1, 2, 0, 1, 2]])
  188. expected = DataFrame(np.r_[df.values, df.values],
  189. index=exp_index2)
  190. tm.assert_frame_equal(result, expected)
  191. # axis=1
  192. df = DataFrame(np.random.randn(4, 3))
  193. df2 = DataFrame(np.random.randn(4, 4))
  194. result = concat([df, df2], keys=[0, 1], axis=1)
  195. expected = DataFrame(np.c_[df.values, df2.values],
  196. columns=exp_index)
  197. tm.assert_frame_equal(result, expected)
  198. result = concat([df, df], keys=[0, 1], axis=1)
  199. expected = DataFrame(np.c_[df.values, df.values],
  200. columns=exp_index2)
  201. tm.assert_frame_equal(result, expected)
  202. def test_concat_keys_specific_levels(self):
  203. df = DataFrame(np.random.randn(10, 4))
  204. pieces = [df.ix[:, [0, 1]], df.ix[:, [2]], df.ix[:, [3]]]
  205. level = ['three', 'two', 'one', 'zero']
  206. result = concat(pieces, axis=1, keys=['one', 'two', 'three'],
  207. levels=[level],
  208. names=['group_key'])
  209. self.assert_index_equal(result.columns.levels[0],
  210. Index(level, name='group_key'))
  211. self.assertEqual(result.columns.names[0], 'group_key')
  212. def test_concat_dataframe_keys_bug(self):
  213. t1 = DataFrame({
  214. 'value': Series([1, 2, 3], index=Index(['a', 'b', 'c'],
  215. name='id'))})
  216. t2 = DataFrame({
  217. 'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
  218. # it works
  219. result = concat([t1, t2], axis=1, keys=['t1', 't2'])
  220. self.assertEqual(list(result.columns), [('t1', 'value'),
  221. ('t2', 'value')])
  222. def test_concat_series_partial_columns_names(self):
  223. # GH10698
  224. foo = Series([1, 2], name='foo')
  225. bar = Series([1, 2])
  226. baz = Series([4, 5])
  227. result = concat([foo, bar, baz], axis=1)
  228. expected = DataFrame({'foo': [1, 2], 0: [1, 2], 1: [
  229. 4, 5]}, columns=['foo', 0, 1])
  230. tm.assert_frame_equal(result, expected)
  231. result = concat([foo, bar, baz], axis=1, keys=[
  232. 'red', 'blue', 'yellow'])
  233. expected = DataFrame({'red': [1, 2], 'blue': [1, 2], 'yellow': [
  234. 4, 5]}, columns=['red', 'blue', 'yellow'])
  235. tm.assert_frame_equal(result, expected)
  236. result = concat([foo, bar, baz], axis=1, ignore_index=True)
  237. expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]})
  238. tm.assert_frame_equal(result, expected)
  239. def test_concat_dict(self):
  240. frames = {'foo': DataFrame(np.random.randn(4, 3)),
  241. 'bar': DataFrame(np.random.randn(4, 3)),
  242. 'baz': DataFrame(np.random.randn(4, 3)),
  243. 'qux': DataFrame(np.random.randn(4, 3))}
  244. sorted_keys = sorted(frames)
  245. result = concat(frames)
  246. expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
  247. tm.assert_frame_equal(result, expected)
  248. result = concat(frames, axis=1)
  249. expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys,
  250. axis=1)
  251. tm.assert_frame_equal(result, expected)
  252. keys = ['baz', 'foo', 'bar']
  253. result = concat(frames, keys=keys)
  254. expected = concat([frames[k] for k in keys], keys=keys)
  255. tm.assert_frame_equal(result, expected)
  256. def test_concat_ignore_index(self):
  257. frame1 = DataFrame({"test1": ["a", "b", "c"],
  258. "test2": [1, 2, 3],
  259. "test3": [4.5, 3.2, 1.2]})
  260. frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
  261. frame1.index = Index(["x", "y", "z"])
  262. frame2.index = Index(["x", "y", "q"])
  263. v1 = concat([frame1, frame2], axis=1, ignore_index=True)
  264. nan = np.nan
  265. expected = DataFrame([[nan, nan, nan, 4.3],
  266. ['a', 1, 4.5, 5.2],
  267. ['b', 2, 3.2, 2.2],
  268. ['c', 3, 1.2, nan]],
  269. index=Index(["q", "x", "y", "z"]))
  270. tm.assert_frame_equal(v1, expected)
  271. def test_concat_multiindex_with_keys(self):
  272. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  273. ['one', 'two', 'three']],
  274. labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  275. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  276. names=['first', 'second'])
  277. frame = DataFrame(np.random.randn(10, 3), index=index,
  278. columns=Index(['A', 'B', 'C'], name='exp'))
  279. result = concat([frame, frame], keys=[0, 1], names=['iteration'])
  280. self.assertEqual(result.index.names, ('iteration',) + index.names)
  281. tm.assert_frame_equal(result.ix[0], frame)
  282. tm.assert_frame_equal(result.ix[1], frame)
  283. self.assertEqual(result.index.nlevels, 3)
  284. def test_concat_multiindex_with_tz(self):
  285. # GH 6606
  286. df = DataFrame({'dt': [datetime(2014, 1, 1),
  287. datetime(2014, 1, 2),
  288. datetime(2014, 1, 3)],
  289. 'b': ['A', 'B', 'C'],
  290. 'c': [1, 2, 3], 'd': [4, 5, 6]})
  291. df['dt'] = df['dt'].apply(lambda d: Timestamp(d, tz='US/Pacific'))
  292. df = df.set_index(['dt', 'b'])
  293. exp_idx1 = DatetimeIndex(['2014-01-01', '2014-01-02',
  294. '2014-01-03'] * 2,
  295. tz='US/Pacific', name='dt')
  296. exp_idx2 = Index(['A', 'B', 'C'] * 2, name='b')
  297. exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
  298. expected = DataFrame({'c': [1, 2, 3] * 2, 'd': [4, 5, 6] * 2},
  299. index=exp_idx, columns=['c', 'd'])
  300. result = concat([df, df])
  301. tm.assert_frame_equal(result, expected)
  302. def test_concat_keys_and_levels(self):
  303. df = DataFrame(np.random.randn(1, 3))
  304. df2 = DataFrame(np.random.randn(1, 4))
  305. levels = [['foo', 'baz'], ['one', 'two']]
  306. names = ['first', 'second']
  307. result = concat([df, df2, df, df2],
  308. keys=[('foo', 'one'), ('foo', 'two'),
  309. ('baz', 'one'), ('baz', 'two')],
  310. levels=levels,
  311. names=names)
  312. expected = concat([df, df2, df, df2])
  313. exp_index = MultiIndex(levels=levels + [[0]],
  314. labels=[[0, 0, 1, 1], [0, 1, 0, 1],
  315. [0, 0, 0, 0]],
  316. names=names + [None])
  317. expected.index = exp_index
  318. assert_frame_equal(result, expected)
  319. # no names
  320. result = concat([df, df2, df, df2],
  321. keys=[('foo', 'one'), ('foo', 'two'),
  322. ('baz', 'one'), ('baz', 'two')],
  323. levels=levels)
  324. self.assertEqual(result.index.names, (None,) * 3)
  325. # no levels
  326. result = concat([df, df2, df, df2],
  327. keys=[('foo', 'one'), ('foo', 'two'),
  328. ('baz', 'one'), ('baz', 'two')],
  329. names=['first', 'second'])
  330. self.assertEqual(result.index.names, ('first', 'second') + (None,))
  331. self.assert_index_equal(result.index.levels[0],
  332. Index(['baz', 'foo'], name='first'))
  333. def test_concat_keys_levels_no_overlap(self):
  334. # GH #1406
  335. df = DataFrame(np.random.randn(1, 3), index=['a'])
  336. df2 = DataFrame(np.random.randn(1, 4), index=['b'])
  337. self.assertRaises(ValueError, concat, [df, df],
  338. keys=['one', 'two'], levels=[['foo', 'bar', 'baz']])
  339. self.assertRaises(ValueError, concat, [df, df2],
  340. keys=['one', 'two'], levels=[['foo', 'bar', 'baz']])
  341. def test_concat_rename_index(self):
  342. a = DataFrame(np.random.rand(3, 3),
  343. columns=list('ABC'),
  344. index=Index(list('abc'), name='index_a'))
  345. b = DataFrame(np.random.rand(3, 3),
  346. columns=list('ABC'),
  347. index=Index(list('abc'), name='index_b'))
  348. result = concat([a, b], keys=['key0', 'key1'],
  349. names=['lvl0', 'lvl1'])
  350. exp = concat([a, b], keys=['key0', 'key1'], names=['lvl0'])
  351. names = list(exp.index.names)
  352. names[1] = 'lvl1'
  353. exp.index.set_names(names, inplace=True)
  354. tm.assert_frame_equal(result, exp)
  355. self.assertEqual(result.index.names, exp.index.names)
  356. def test_crossed_dtypes_weird_corner(self):
  357. columns = ['A', 'B', 'C', 'D']
  358. df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='f8'),
  359. 'B': np.array([1, 2, 3, 4], dtype='i8'),
  360. 'C': np.array([1, 2, 3, 4], dtype='f8'),
  361. 'D': np.array([1, 2, 3, 4], dtype='i8')},
  362. columns=columns)
  363. df2 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8'),
  364. 'B': np.array([1, 2, 3, 4], dtype='f8'),
  365. 'C': np.array([1, 2, 3, 4], dtype='i8'),
  366. 'D': np.array([1, 2, 3, 4], dtype='f8')},
  367. columns=columns)
  368. appended = df1.append(df2, ignore_index=True)
  369. expected = DataFrame(np.concatenate([df1.values, df2.values], axis=0),
  370. columns=columns)
  371. tm.assert_frame_equal(appended, expected)
  372. df = DataFrame(np.random.randn(1, 3), index=['a'])
  373. df2 = DataFrame(np.random.randn(1, 4), index=['b'])
  374. result = concat(
  375. [df, df2], keys=['one', 'two'], names=['first', 'second'])
  376. self.assertEqual(result.index.names, ('first', 'second'))
  377. def test_dups_index(self):
  378. # GH 4771
  379. # single dtypes
  380. df = DataFrame(np.random.randint(0, 10, size=40).reshape(
  381. 10, 4), columns=['A', 'A', 'C', 'C'])
  382. result = concat([df, df], axis=1)
  383. assert_frame_equal(result.iloc[:, :4], df)
  384. assert_frame_equal(result.iloc[:, 4:], df)
  385. result = concat([df, df], axis=0)
  386. assert_frame_equal(result.iloc[:10], df)
  387. assert_frame_equal(result.iloc[10:], df)
  388. # multi dtypes
  389. df = concat([DataFrame(np.random.randn(10, 4),
  390. columns=['A', 'A', 'B', 'B']),
  391. DataFrame(np.random.randint(0, 10, size=20)
  392. .reshape(10, 2),
  393. columns=['A', 'C'])],
  394. axis=1)
  395. result = concat([df, df], axis=1)
  396. assert_frame_equal(result.iloc[:, :6], df)
  397. assert_frame_equal(result.iloc[:, 6:], df)
  398. result = concat([df, df], axis=0)
  399. assert_frame_equal(result.iloc[:10], df)
  400. assert_frame_equal(result.iloc[10:], df)
  401. # append
  402. result = df.iloc[0:8, :].append(df.iloc[8:])
  403. assert_frame_equal(result, df)
  404. result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10])
  405. assert_frame_equal(result, df)
  406. expected = concat([df, df], axis=0)
  407. result = df.append(df)
  408. assert_frame_equal(result, expected)
  409. def test_with_mixed_tuples(self):
  410. # 10697
  411. # columns have mixed tuples, so handle properly
  412. df1 = DataFrame({u'A': 'foo', (u'B', 1): 'bar'}, index=range(2))
  413. df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
  414. # it works
  415. concat([df1, df2])
  416. def test_handle_empty_objects(self):
  417. df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
  418. baz = df[:5].copy()
  419. baz['foo'] = 'bar'
  420. empty = df[5:5]
  421. frames = [baz, empty, empty, df[5:]]
  422. concatted = concat(frames, axis=0)
  423. expected = df.ix[:, ['a', 'b', 'c', 'd', 'foo']]
  424. expected['foo'] = expected['foo'].astype('O')
  425. expected.loc[0:4, 'foo'] = 'bar'
  426. tm.assert_frame_equal(concatted, expected)
  427. # empty as first element with time series
  428. # GH3259
  429. df = DataFrame(dict(A=range(10000)), index=date_range(
  430. '20130101', periods=10000, freq='s'))
  431. empty = DataFrame()
  432. result = concat([df, empty], axis=1)
  433. assert_frame_equal(result, df)
  434. result = concat([empty, df], axis=1)
  435. assert_frame_equal(result, df)
  436. result = concat([df, empty])
  437. assert_frame_equal(result, df)
  438. result = concat([empty, df])
  439. assert_frame_equal(result, df)
  440. def test_concat_mixed_objs(self):
  441. # concat mixed series/frames
  442. # G2385
  443. # axis 1
  444. index = date_range('01-Jan-2013', periods=10, freq='H')
  445. arr = np.arange(10, dtype='int64')
  446. s1 = Series(arr, index=index)
  447. s2 = Series(arr, index=index)
  448. df = DataFrame(arr.reshape(-1, 1), index=index)
  449. expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
  450. index=index, columns=[0, 0])
  451. result = concat([df, df], axis=1)
  452. assert_frame_equal(result, expected)
  453. expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
  454. index=index, columns=[0, 1])
  455. result = concat([s1, s2], axis=1)
  456. assert_frame_equal(result, expected)
  457. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  458. index=index, columns=[0, 1, 2])
  459. result = concat([s1, s2, s1], axis=1)
  460. assert_frame_equal(result, expected)
  461. expected = DataFrame(np.repeat(arr, 5).reshape(-1, 5),
  462. index=index, columns=[0, 0, 1, 2, 3])
  463. result = concat([s1, df, s2, s2, s1], axis=1)
  464. assert_frame_equal(result, expected)
  465. # with names
  466. s1.name = 'foo'
  467. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  468. index=index, columns=['foo', 0, 0])
  469. result = concat([s1, df, s2], axis=1)
  470. assert_frame_equal(result, expected)
  471. s2.name = 'bar'
  472. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  473. index=index, columns=['foo', 0, 'bar'])
  474. result = concat([s1, df, s2], axis=1)
  475. assert_frame_equal(result, expected)
  476. # ignore index
  477. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  478. index=index, columns=[0, 1, 2])
  479. result = concat([s1, df, s2], axis=1, ignore_index=True)
  480. assert_frame_equal(result, expected)
  481. # axis 0
  482. expected = DataFrame(np.tile(arr, 3).reshape(-1, 1),
  483. index=index.tolist() * 3, columns=[0])
  484. result = concat([s1, df, s2])
  485. assert_frame_equal(result, expected)
  486. expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
  487. result = concat([s1, df, s2], ignore_index=True)
  488. assert_frame_equal(result, expected)
  489. # invalid concatente of mixed dims
  490. panel = tm.makePanel()
  491. self.assertRaises(ValueError, lambda: concat([panel, s1], axis=1))
  492. def test_empty_dtype_coerce(self):
  493. # xref to #12411
  494. # xref to #12045
  495. # xref to #11594
  496. # see below
  497. # 10571
  498. df1 = DataFrame(data=[[1, None], [2, None]], columns=['a', 'b'])
  499. df2 = DataFrame(data=[[3, None], [4, None]], columns=['a', 'b'])
  500. result = concat([df1, df2])
  501. expected = df1.dtypes
  502. tm.assert_series_equal(result.dtypes, expected)
  503. def test_dtype_coerceion(self):
  504. # 12411
  505. df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'),
  506. pd.NaT]})
  507. result = concat([df.iloc[[0]], df.iloc[[1]]])
  508. tm.assert_series_equal(result.dtypes, df.dtypes)
  509. # 12045
  510. import datetime
  511. df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
  512. datetime.datetime(1012, 1, 2)]})
  513. result = concat([df.iloc[[0]], df.iloc[[1]]])
  514. tm.assert_series_equal(result.dtypes, df.dtypes)
  515. # 11594
  516. df = DataFrame({'text': ['some words'] + [None] * 9})
  517. result = concat([df.iloc[[0]], df.iloc[[1]]])
  518. tm.assert_series_equal(result.dtypes, df.dtypes)
  519. def test_panel_concat_other_axes(self):
  520. panel = tm.makePanel()
  521. p1 = panel.ix[:, :5, :]
  522. p2 = panel.ix[:, 5:, :]
  523. result = concat([p1, p2], axis=1)
  524. tm.assert_panel_equal(result, panel)
  525. p1 = panel.ix[:, :, :2]
  526. p2 = panel.ix[:, :, 2:]
  527. result = concat([p1, p2], axis=2)
  528. tm.assert_panel_equal(result, panel)
  529. # if things are a bit misbehaved
  530. p1 = panel.ix[:2, :, :2]
  531. p2 = panel.ix[:, :, 2:]
  532. p1['ItemC'] = 'baz'
  533. result = concat([p1, p2], axis=2)
  534. expected = panel.copy()
  535. expected['ItemC'] = expected['ItemC'].astype('O')
  536. expected.ix['ItemC', :, :2] = 'baz'
  537. tm.assert_panel_equal(result, expected)
  538. def test_panel_concat_buglet(self):
  539. # #2257
  540. def make_panel():
  541. index = 5
  542. cols = 3
  543. def df():
  544. return DataFrame(np.random.randn(index, cols),
  545. index=["I%s" % i for i in range(index)],
  546. columns=["C%s" % i for i in range(cols)])
  547. return Panel(dict([("Item%s" % x, df()) for x in ['A', 'B', 'C']]))
  548. panel1 = make_panel()
  549. panel2 = make_panel()
  550. panel2 = panel2.rename_axis(dict([(x, "%s_1" % x)
  551. for x in panel2.major_axis]),
  552. axis=1)
  553. panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1)
  554. panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
  555. # it works!
  556. concat([panel1, panel3], axis=1, verify_integrity=True)
  557. def test_panel4d_concat(self):
  558. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  559. p4d = tm.makePanel4D()
  560. p1 = p4d.ix[:, :, :5, :]
  561. p2 = p4d.ix[:, :, 5:, :]
  562. result = concat([p1, p2], axis=2)
  563. tm.assert_panel4d_equal(result, p4d)
  564. p1 = p4d.ix[:, :, :, :2]
  565. p2 = p4d.ix[:, :, :, 2:]
  566. result = concat([p1, p2], axis=3)
  567. tm.assert_panel4d_equal(result, p4d)
  568. def test_panel4d_concat_mixed_type(self):
  569. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  570. p4d = tm.makePanel4D()
  571. # if things are a bit misbehaved
  572. p1 = p4d.ix[:, :2, :, :2]
  573. p2 = p4d.ix[:, :, :, 2:]
  574. p1['L5'] = 'baz'
  575. result = concat([p1, p2], axis=3)
  576. p2['L5'] = np.nan
  577. expected = concat([p1, p2], axis=3)
  578. expected = expected.ix[result.labels]
  579. tm.assert_panel4d_equal(result, expected)
  580. def test_concat_series(self):
  581. ts = tm.makeTimeSeries()
  582. ts.name = 'foo'
  583. pieces = [ts[:5], ts[5:15], ts[15:]]
  584. result = concat(pieces)
  585. tm.assert_series_equal(result, ts)
  586. self.assertEqual(result.name, ts.name)
  587. result = concat(pieces, keys=[0, 1, 2])
  588. expected = ts.copy()
  589. ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[ns]'))
  590. exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
  591. np.arange(len(ts))]
  592. exp_index = MultiIndex(levels=[[0, 1, 2], ts.index],
  593. labels=exp_labels)
  594. expected.index = exp_index
  595. tm.assert_series_equal(result, expected)
  596. def test_concat_series_axis1(self):
  597. ts = tm.makeTimeSeries()
  598. pieces = [ts[:-2], ts[2:], ts[2:-2]]
  599. result = concat(pieces, axis=1)
  600. expected = DataFrame(pieces).T
  601. assert_frame_equal(result, expected)
  602. result = concat(pieces, keys=['A', 'B', 'C'], axis=1)
  603. expected = DataFrame(pieces, index=['A', 'B', 'C']).T
  604. assert_frame_equal(result, expected)
  605. # preserve series names, #2489
  606. s = Series(randn(5), name='A')
  607. s2 = Series(randn(5), name='B')
  608. result = concat([s, s2], axis=1)
  609. expected = DataFrame({'A': s, 'B': s2})
  610. assert_frame_equal(result, expected)
  611. s2.name = None
  612. result = concat([s, s2], axis=1)
  613. self.assertTrue(np.array_equal(
  614. result.columns, Index(['A', 0], dtype='object')))
  615. # must reindex, #2603
  616. s = Series(randn(3), index=['c', 'a', 'b'], name='A')
  617. s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
  618. result = concat([s, s2], axis=1)
  619. expected = DataFrame({'A': s, 'B': s2})
  620. assert_frame_equal(result, expected)
  621. def test_concat_single_with_key(self):
  622. df = DataFrame(np.random.randn(10, 4))
  623. result = concat([df], keys=['foo'])
  624. expected = concat([df, df], keys=['foo', 'bar'])
  625. tm.assert_frame_equal(result, expected[:10])
  626. def test_concat_exclude_none(self):
  627. df = DataFrame(np.random.randn(10, 4))
  628. pieces = [df[:5], None, None, df[5:]]
  629. result = concat(pieces)
  630. tm.assert_frame_equal(result, df)
  631. self.assertRaises(ValueError, concat, [None, None])
  632. def test_concat_datetime64_block(self):
  633. from pandas.tseries.index import date_range
  634. rng = date_range('1/1/2000', periods=10)
  635. df = DataFrame({'time': rng})
  636. result = concat([df, df])
  637. self.assertTrue((result.iloc[:10]['time'] == rng).all())
  638. self.assertTrue((result.iloc[10:]['time'] == rng).all())
  639. def test_concat_timedelta64_block(self):
  640. from pandas import to_timedelta
  641. rng = to_timedelta(np.arange(10), unit='s')
  642. df = DataFrame({'time': rng})
  643. result = concat([df, df])
  644. self.assertTrue((result.iloc[:10]['time'] == rng).all())
  645. self.assertTrue((result.iloc[10:]['time'] == rng).all())
  646. def test_concat_keys_with_none(self):
  647. # #1649
  648. df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])
  649. result = concat(dict(a=None, b=df0, c=df0[:2], d=df0[:1], e=df0))
  650. expected = concat(dict(b=df0, c=df0[:2], d=df0[:1], e=df0))
  651. tm.assert_frame_equal(result, expected)
  652. result = concat([None, df0, df0[:2], df0[:1], df0],
  653. keys=['a', 'b', 'c', 'd', 'e'])
  654. expected = concat([df0, df0[:2], df0[:1], df0],
  655. keys=['b', 'c', 'd', 'e'])
  656. tm.assert_frame_equal(result, expected)
  657. def test_union_categorical(self):
  658. # GH 13361
  659. data = [
  660. (list('abc'), list('abd'), list('abcabd')),
  661. ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
  662. ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
  663. (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'],
  664. ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']),
  665. (pd.date_range('2014-01-01', '2014-01-05'),
  666. pd.date_range('2014-01-06', '2014-01-07'),
  667. pd.date_range('2014-01-01', '2014-01-07')),
  668. (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'),
  669. pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'),
  670. pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')),
  671. (pd.period_range('2014-01-01', '2014-01-05'),
  672. pd.period_range('2014-01-06', '2014-01-07'),
  673. pd.period_range('2014-01-01', '2014-01-07')),
  674. ]
  675. for a, b, combined in data:
  676. result = union_categoricals([Categorical(a), Categorical(b)])
  677. expected = Categorical(combined)
  678. tm.assert_categorical_equal(result, expected,
  679. check_category_order=True)
  680. # new categories ordered by appearance
  681. s = Categorical(['x', 'y', 'z'])
  682. s2 = Categorical(['a', 'b', 'c'])
  683. result = union_categoricals([s, s2])
  684. expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
  685. categories=['x', 'y', 'z', 'a', 'b', 'c'])
  686. tm.assert_categorical_equal(result, expected)
  687. s = Categorical([0, 1.2, 2], ordered=True)
  688. s2 = Categorical([0, 1.2, 2], ordered=True)
  689. result = union_categoricals([s, s2])
  690. expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True)
  691. tm.assert_categorical_equal(result, expected)
  692. # must exactly match types
  693. s = Categorical([0, 1.2, 2])
  694. s2 = Categorical([2, 3, 4])
  695. msg = 'dtype of categories must be the same'
  696. with tm.assertRaisesRegexp(TypeError, msg):
  697. union_categoricals([s, s2])
  698. msg = 'No Categoricals to union'
  699. with tm.assertRaisesRegexp(ValueError, msg):
  700. union_categoricals([])
  701. def test_union_categoricals_nan(self):
  702. # GH 13759
  703. res = union_categoricals([pd.Categorical([1, 2, np.nan]),
  704. pd.Categorical([3, 2, np.nan])])
  705. exp = Categorical([1, 2, np.nan, 3, 2, np.nan])
  706. tm.assert_categorical_equal(res, exp)
  707. res = union_categoricals([pd.Categorical(['A', 'B']),
  708. pd.Categorical(['B', 'B', np.nan])])
  709. exp = Categorical(['A', 'B', 'B', 'B', np.nan])
  710. tm.assert_categorical_equal(res, exp)
  711. val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'),
  712. pd.NaT]
  713. val2 = [pd.NaT, pd.Timestamp('2011-01-01'),
  714. pd.Timestamp('2011-02-01')]
  715. res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)])
  716. exp = Categorical(val1 + val2,
  717. categories=[pd.Timestamp('2011-01-01'),
  718. pd.Timestamp('2011-03-01'),
  719. pd.Timestamp('2011-02-01')])
  720. tm.assert_categorical_equal(res, exp)
  721. # all NaN
  722. res = union_categoricals([pd.Categorical([np.nan, np.nan]),
  723. pd.Categorical(['X'])])
  724. exp = Categorical([np.nan, np.nan, 'X'])
  725. tm.assert_categorical_equal(res, exp)
  726. res = union_categoricals([pd.Categorical([np.nan, np.nan]),
  727. pd.Categorical([np.nan, np.nan])])
  728. exp = Categorical([np.nan, np.nan, np.nan, np.nan])
  729. tm.assert_categorical_equal(res, exp)
  730. def test_union_categoricals_empty(self):
  731. # GH 13759
  732. res = union_categoricals([pd.Categorical([]),
  733. pd.Categorical([])])
  734. exp = Categorical([])
  735. tm.assert_categorical_equal(res, exp)
  736. res = union_categoricals([pd.Categorical([]),
  737. pd.Categorical([1.0])])
  738. exp = Categorical([1.0])
  739. tm.assert_categorical_equal(res, exp)
  740. # to make dtype equal
  741. nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
  742. res = union_categoricals([nanc,
  743. pd.Categorical([])])
  744. tm.assert_categorical_equal(res, nanc)
  745. def test_union_categorical_same_category(self):
  746. # check fastpath
  747. c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
  748. c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4])
  749. res = union_categoricals([c1, c2])
  750. exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan],
  751. categories=[1, 2, 3, 4])
  752. tm.assert_categorical_equal(res, exp)
  753. c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z'])
  754. c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z'])
  755. res = union_categoricals([c1, c2])
  756. exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'],
  757. categories=['x', 'y', 'z'])
  758. tm.assert_categorical_equal(res, exp)
  759. def test_union_categoricals_ordered(self):
  760. c1 = Categorical([1, 2, 3], ordered=True)
  761. c2 = Categorical([1, 2, 3], ordered=False)
  762. msg = 'Categorical.ordered must be the same'
  763. with tm.assertRaisesRegexp(TypeError, msg):
  764. union_categoricals([c1, c2])
  765. res = union_categoricals([c1, c1])
  766. exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
  767. tm.assert_categorical_equal(res, exp)
  768. c1 = Categorical([1, 2, 3, np.nan], ordered=True)
  769. c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
  770. res = union_categoricals([c1, c2])
  771. exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
  772. tm.assert_categorical_equal(res, exp)
  773. c1 = Categorical([1, 2, 3], ordered=True)
  774. c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
  775. msg = "to union ordered Categoricals, all categories must be the same"
  776. with tm.assertRaisesRegexp(TypeError, msg):
  777. union_categoricals([c1, c2])
  778. def test_union_categoricals_sort(self):
  779. # GH 13846
  780. c1 = Categorical(['x', 'y', 'z'])
  781. c2 = Categorical(['a', 'b', 'c'])
  782. result = union_categoricals([c1, c2], sort_categories=True)
  783. expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
  784. categories=['a', 'b', 'c', 'x', 'y', 'z'])
  785. tm.assert_categorical_equal(result, expected)
  786. # fastpath
  787. c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
  788. c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
  789. result = union_categoricals([c1, c2], sort_categories=True)
  790. expected = Categorical(['a', 'b', 'b', 'c'],
  791. categories=['a', 'b', 'c'])
  792. tm.assert_categorical_equal(result, expected)
  793. c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b'])
  794. c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b'])
  795. result = union_categoricals([c1, c2], sort_categories=True)
  796. expected = Categorical(['a', 'b', 'b', 'c'],
  797. categories=['a', 'b', 'c'])
  798. tm.assert_categorical_equal(result, expected)
  799. # fastpath - skip resort
  800. c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
  801. c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
  802. result = union_categoricals([c1, c2], sort_categories=True)
  803. expected = Categorical(['a', 'b', 'b', 'c'],
  804. categories=['a', 'b', 'c'])
  805. tm.assert_categorical_equal(result, expected)
  806. c1 = Categorical(['x', np.nan])
  807. c2 = Categorical([np.nan, 'b'])
  808. result = union_categoricals([c1, c2], sort_categories=True)
  809. expected = Categorical(['x', np.nan, np.nan, 'b'],
  810. categories=['b', 'x'])
  811. tm.assert_categorical_equal(result, expected)
  812. c1 = Categorical([np.nan])
  813. c2 = Categorical([np.nan])
  814. result = union_categoricals([c1, c2], sort_categories=True)
  815. expected = Categorical([np.nan, np.nan], categories=[])
  816. tm.assert_categorical_equal(result, expected)
  817. c1 = Categorical([])
  818. c2 = Categorical([])
  819. result = union_categoricals([c1, c2], sort_categories=True)
  820. expected = Categorical([])
  821. tm.assert_categorical_equal(result, expected)
  822. c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
  823. c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
  824. with tm.assertRaises(TypeError):
  825. union_categoricals([c1, c2], sort_categories=True)
  826. def test_union_categoricals_sort_false(self):
  827. # GH 13846
  828. c1 = Categorical(['x', 'y', 'z'])
  829. c2 = Categorical(['a', 'b', 'c'])
  830. result = union_categoricals([c1, c2], sort_categories=False)
  831. expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
  832. categories=['x', 'y', 'z', 'a', 'b', 'c'])
  833. tm.assert_categorical_equal(result, expected)
  834. # fastpath
  835. c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
  836. c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
  837. result = union_categoricals([c1, c2], sort_categories=False)
  838. expected = Categorical(['a', 'b', 'b', 'c'],
  839. categories=['b', 'a', 'c'])
  840. tm.assert_categorical_equal(result, expected)
  841. # fastpath - skip resort
  842. c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
  843. c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
  844. result = union_categoricals([c1, c2], sort_categories=False)
  845. expected = Categorical(['a', 'b', 'b', 'c'],
  846. categories=['a', 'b', 'c'])
  847. tm.assert_categorical_equal(result, expected)
  848. c1 = Categorical(['x', np.nan])
  849. c2 = Categorical([np.nan, 'b'])
  850. result = union_categoricals([c1, c2], sort_categories=False)
  851. expected = Categorical(['x', np.nan, np.nan, 'b'],
  852. categories=['x', 'b'])
  853. tm.assert_categorical_equal(result, expected)
  854. c1 = Categorical([np.nan])
  855. c2 = Categorical([np.nan])
  856. result = union_categoricals([c1, c2], sort_categories=False)
  857. expected = Categorical([np.nan, np.nan], categories=[])
  858. tm.assert_categorical_equal(result, expected)
  859. c1 = Categorical([])
  860. c2 = Categorical([])
  861. result = union_categoricals([c1, c2], sort_categories=False)
  862. expected = Categorical([])
  863. tm.assert_categorical_equal(result, expected)
  864. c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
  865. c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
  866. result = union_categoricals([c1, c2], sort_categories=False)
  867. expected = Categorical(['b', 'a', 'a', 'c'],
  868. categories=['b', 'a', 'c'], ordered=True)
  869. tm.assert_categorical_equal(result, expected)
  870. def test_concat_bug_1719(self):
  871. ts1 = tm.makeTimeSeries()
  872. ts2 = tm.makeTimeSeries()[::2]
  873. # to join with union
  874. # these two are of different length!
  875. left = concat([ts1, ts2], join='outer', axis=1)
  876. right = concat([ts2, ts1], join='outer', axis=1)
  877. self.assertEqual(len(left), len(right))
  878. def test_concat_bug_2972(self):
  879. ts0 = Series(np.zeros(5))
  880. ts1 = Series(np.ones(5))
  881. ts0.name = ts1.name = 'same name'
  882. result = concat([ts0, ts1], axis=1)
  883. expected = DataFrame({0: ts0, 1: ts1})
  884. expected.columns = ['same name', 'same name']
  885. assert_frame_equal(result, expected)
  886. def test_concat_bug_3602(self):
  887. # GH 3602, duplicate columns
  888. df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'stringvar': [
  889. 'rrr', 'rrr', 'rrr', 'rrr'], 'prc': [6, 6, 6, 6]})
  890. df2 = DataFrame({'misc': [1, 2, 3, 4], 'prc': [
  891. 6, 6, 6, 6], 'C': [9, 10, 11, 12]})
  892. expected = DataFrame([[0, 6, 'rrr', 9, 1, 6],
  893. [0, 6, 'rrr', 10, 2, 6],
  894. [0, 6, 'rrr', 11, 3, 6],
  895. [0, 6, 'rrr', 12, 4, 6]])
  896. expected.columns = ['firmNo', 'prc', 'stringvar', 'C', 'misc', 'prc']
  897. result = concat([df1, df2], axis=1)
  898. assert_frame_equal(result, expected)
  899. def test_concat_series_axis1_same_names_ignore_index(self):
  900. dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1]
  901. s1 = Series(randn(len(dates)), index=dates, name='value')
  902. s2 = Series(randn(len(dates)), index=dates, name='value')
  903. result = concat([s1, s2], axis=1, ignore_index=True)
  904. self.assertTrue(np.array_equal(result.columns, [0, 1]))
  905. def test_concat_iterables(self):
  906. from collections import deque, Iterable
  907. # GH8645 check concat works with tuples, list, generators, and weird
  908. # stuff like deque and custom iterables
  909. df1 = DataFrame([1, 2, 3])
  910. df2 = DataFrame([4, 5, 6])
  911. expected = DataFrame([1, 2, 3, 4, 5, 6])
  912. assert_frame_equal(concat((df1, df2), ignore_index=True), expected)
  913. assert_frame_equal(concat([df1, df2], ignore_index=True), expected)
  914. assert_frame_equal(concat((df for df in (df1, df2)),
  915. ignore_index=True), expected)
  916. assert_frame_equal(
  917. concat(deque((df1, df2)), ignore_index=True), expected)
  918. class CustomIterator1(object):
  919. def __len__(self):
  920. return 2
  921. def __getitem__(self, index):
  922. try:
  923. return {0: df1, 1: df2}[index]
  924. except KeyError:
  925. raise IndexError
  926. assert_frame_equal(pd.concat(CustomIterator1(),
  927. ignore_index=True), expected)
  928. class CustomIterator2(Iterable):
  929. def __iter__(self):
  930. yield df1
  931. yield df2
  932. assert_frame_equal(pd.concat(CustomIterator2(),
  933. ignore_index=True), expected)
  934. def test_concat_invalid(self):
  935. # trying to concat a ndframe with a non-ndframe
  936. df1 = mkdf(10, 2)
  937. for obj in [1, dict(), [1, 2], (1, 2)]:
  938. self.assertRaises(TypeError, lambda x: concat([df1, obj]))
  939. def test_concat_invalid_first_argument(self):
  940. df1 = mkdf(10, 2)
  941. df2 = mkdf(10, 2)
  942. self.assertRaises(TypeError, concat, df1, df2)
  943. # generator ok though
  944. concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))
  945. # text reader ok
  946. # GH6583
  947. data = """index,A,B,C,D
  948. foo,2,3,4,5
  949. bar,7,8,9,10
  950. baz,12,13,14,15
  951. qux,12,13,14,15
  952. foo2,12,13,14,15
  953. bar2,12,13,14,15
  954. """
  955. reader = read_csv(StringIO(data), chunksize=1)
  956. result = concat(reader, ignore_index=True)
  957. expected = read_csv(StringIO(data))
  958. assert_frame_equal(result, expected)
  959. def test_concat_NaT_series(self):
  960. # GH 11693
  961. # test for merging NaT series with datetime series.
  962. x = Series(date_range('20151124 08:00', '20151124 09:00',
  963. freq='1h', tz='US/Eastern'))
  964. y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
  965. expected = Series([x[0], x[1], pd.NaT, pd.NaT])
  966. result = concat([x, y], ignore_index=True)
  967. tm.assert_series_equal(result, expected)
  968. # all NaT with tz
  969. expected = Series(pd.NaT, index=range(4),
  970. dtype='datetime64[ns, US/Eastern]')
  971. result = pd.concat([y, y], ignore_index=True)
  972. tm.assert_series_equal(result, expected)
  973. # without tz
  974. x = pd.Series(pd.date_range('20151124 08:00',
  975. '20151124 09:00', freq='1h'))
  976. y = pd.Series(pd.date_range('20151124 10:00',
  977. '20151124 11:00', freq='1h'))
  978. y[:] = pd.NaT
  979. expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
  980. result = pd.concat([x, y], ignore_index=True)
  981. tm.assert_series_equal(result, expected)
  982. # all NaT without tz
  983. x[:] = pd.NaT
  984. expected = pd.Series(pd.NaT, index=range(4),
  985. dtype='datetime64[ns]')
  986. result = pd.concat([x, y], ignore_index=True)
  987. tm.assert_series_equal(result, expected)
  988. def test_concat_tz_frame(self):
  989. df2 = DataFrame(dict(A=pd.Timestamp('20130102', tz='US/Eastern'),
  990. B=pd.Timestamp('20130603', tz='CET')),
  991. index=range(5))
  992. # concat
  993. df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
  994. assert_frame_equal(df2, df3)
  995. def test_concat_tz_series(self):
  996. # GH 11755
  997. # tz and no tz
  998. x = Series(date_range('20151124 08:00',
  999. '20151124 09:00',
  1000. freq='1h', tz='UTC'))
  1001. y = Series(date_range('2012-01-01', '2012-01-02'))
  1002. expected = Series([x[0], x[1], y[0], y[1]],
  1003. dtype='object')
  1004. result = concat([x, y], ignore_index=True)
  1005. tm.assert_series_equal(result, expected)
  1006. # GH 11887
  1007. # concat tz and object
  1008. x = Series(date_range('20151124 08:00',
  1009. '20151124 09:00',
  1010. freq='1h', tz='UTC'))
  1011. y = Series(['a', 'b'])
  1012. expected = Series([x[0], x[1], y[0], y[1]],
  1013. dtype='object')
  1014. result = concat([x, y], ignore_index=True)
  1015. tm.assert_series_equal(result, expected)
  1016. # 12217
  1017. # 12306 fixed I think
  1018. # Concat'ing two UTC times
  1019. first = pd.DataFrame([[datetime(2016, 1, 1)]])
  1020. first[0] = first[0].dt.tz_localize('UTC')
  1021. second = pd.DataFrame([[datetime(2016, 1, 2)]])
  1022. second[0] = second[0].dt.tz_localize('UTC')
  1023. result = pd.concat([first, second])
  1024. self.assertEqual(result[0].dtype, 'datetime64[ns, UTC]')
  1025. # Concat'ing two London times
  1026. first = pd.DataFrame([[datetime(2016, 1, 1)]])
  1027. first[0] = first[0].dt.tz_localize('Europe/London')
  1028. second = pd.DataFrame([[datetime(2016, 1, 2)]])
  1029. second[0] = second[0].dt.tz_localize('Europe/London')
  1030. result = pd.concat([first, second])
  1031. self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]')
  1032. # Concat'ing 2+1 London times
  1033. first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
  1034. first[0] = first[0].dt.tz_localize('Europe/London')
  1035. second = pd.DataFrame([[datetime(2016, 1, 3)]])
  1036. second[0] = second[0].dt.tz_localize('Europe/London')
  1037. result = pd.concat([first, second])
  1038. self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]')
  1039. # Concat'ing 1+2 London times
  1040. first = pd.DataFrame([[datetime(2016, 1, 1)]])
  1041. first[0] = first[0].dt.tz_localize('Europe/London')
  1042. second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
  1043. second[0] = second[0].dt.tz_localize('Europe/London')
  1044. result = pd.concat([first, second])
  1045. self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]')
  1046. def test_concat_tz_series_with_datetimelike(self):
  1047. # GH 12620
  1048. # tz and timedelta
  1049. x = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  1050. pd.Timestamp('2011-02-01', tz='US/Eastern')]
  1051. y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')]
  1052. result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
  1053. tm.assert_series_equal(result, pd.Series(x + y, dtype='object'))
  1054. # tz and period
  1055. y = [pd.Period('2011-03', freq='M'), pd.Period('2011-04', freq='M')]
  1056. result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
  1057. tm.assert_series_equal(result, pd.Series(x + y, dtype='object'))
  1058. def test_concat_tz_series_tzlocal(self):
  1059. # GH 13583
  1060. tm._skip_if_no_dateutil()
  1061. import dateutil
  1062. x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()),
  1063. pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())]
  1064. y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()),
  1065. pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())]
  1066. result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
  1067. tm.assert_series_equal(result, pd.Series(x + y))
  1068. self.assertEqual(result.dtype, 'datetime64[ns, tzlocal()]')
  1069. def test_concat_period_series(self):
  1070. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1071. y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
  1072. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1073. result = concat([x, y], ignore_index=True)
  1074. tm.assert_series_equal(result, expected)
  1075. self.assertEqual(result.dtype, 'object')
  1076. # different freq
  1077. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1078. y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M'))
  1079. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1080. result = concat([x, y], ignore_index=True)
  1081. tm.assert_series_equal(result, expected)
  1082. self.assertEqual(result.dtype, 'object')
  1083. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1084. y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M'))
  1085. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1086. result = concat([x, y], ignore_index=True)
  1087. tm.assert_series_equal(result, expected)
  1088. self.assertEqual(result.dtype, 'object')
  1089. # non-period
  1090. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1091. y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01']))
  1092. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1093. result = concat([x, y], ignore_index=True)
  1094. tm.assert_series_equal(result, expected)
  1095. self.assertEqual(result.dtype, 'object')
  1096. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1097. y = Series(['A', 'B'])
  1098. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1099. result = concat([x, y], ignore_index=True)
  1100. tm.assert_series_equal(result, expected)
  1101. self.assertEqual(result.dtype, 'object')
  1102. def test_concat_empty_series(self):
  1103. # GH 11082
  1104. s1 = pd.Series([1, 2, 3], name='x')
  1105. s2 = pd.Series(name='y')
  1106. res = pd.concat([s1, s2], axis=1)
  1107. exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
  1108. tm.assert_frame_equal(res, exp)
  1109. s1 = pd.Series([1, 2, 3], name='x')
  1110. s2 = pd.Series(name='y')
  1111. res = pd.concat([s1, s2], axis=0)
  1112. # name will be reset
  1113. exp = pd.Series([1, 2, 3])
  1114. tm.assert_series_equal(res, exp)
  1115. # empty Series with no name
  1116. s1 = pd.Series([1, 2, 3], name='x')
  1117. s2 = pd.Series(name=None)
  1118. res = pd.concat([s1, s2], axis=1)
  1119. exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
  1120. columns=['x', 0])
  1121. tm.assert_frame_equal(res, exp)
  1122. def test_default_index(self):
  1123. # is_series and ignore_index
  1124. s1 = pd.Series([1, 2, 3], name='x')
  1125. s2 = pd.Series([4, 5, 6], name='y')
  1126. res = pd.concat([s1, s2], axis=1, ignore_index=True)
  1127. self.assertIsInstance(res.columns, pd.RangeIndex)
  1128. exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
  1129. # use check_index_type=True to check the result have
  1130. # RangeIndex (default index)
  1131. tm.assert_frame_equal(res, exp, check_index_type=True,
  1132. check_column_type=True)
  1133. # is_series and all inputs have no names
  1134. s1 = pd.Series([1, 2, 3])
  1135. s2 = pd.Series([4, 5, 6])
  1136. res = pd.concat([s1, s2], axis=1, ignore_index=False)
  1137. self.assertIsInstance(res.columns, pd.RangeIndex)
  1138. exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
  1139. exp.columns = pd.RangeIndex(2)
  1140. tm.assert_frame_equal(res, exp, check_index_type=True,
  1141. check_column_type=True)
  1142. # is_dataframe and ignore_index
  1143. df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]})
  1144. df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]})
  1145. res = pd.concat([df1, df2], axis=0, ignore_index=True)
  1146. exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]],
  1147. columns=['A', 'B'])
  1148. tm.assert_frame_equal(res, exp, check_index_type=True,
  1149. check_column_type=True)
  1150. res = pd.concat([df1, df2], axis=1, ignore_index=True)
  1151. exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
  1152. tm.assert_frame_equal(res, exp, check_index_type=True,
  1153. check_column_type=True)
  1154. def test_concat_multiindex_rangeindex(self):
  1155. # GH13542
  1156. # when multi-index levels are RangeIndex objects
  1157. # there is a bug in concat with objects of len 1
  1158. df = DataFrame(np.random.randn(9, 2))
  1159. df.index = MultiIndex(levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
  1160. labels=[np.repeat(np.arange(3), 3),
  1161. np.tile(np.arange(3), 3)])
  1162. res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
  1163. exp = df.iloc[[2, 3, 4, 5], :]
  1164. tm.assert_frame_equal(res, exp)
  1165. if __name__ == '__main__':
  1166. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  1167. exit=False)