PageRenderTime 415ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/tests/series/test_combine_concat.py

https://github.com/neurodebian/pandas
Python | 311 lines | 228 code | 63 blank | 20 comment | 18 complexity | be58fce583536f9022a45bdab39943e4 MD5 | raw file
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. import pytest
  4. from datetime import datetime
  5. from numpy import nan
  6. import numpy as np
  7. import pandas as pd
  8. from pandas import Series, DataFrame, date_range, DatetimeIndex
  9. from pandas import compat
  10. from pandas.util.testing import assert_series_equal
  11. import pandas.util.testing as tm
  12. from .common import TestData
  13. class TestSeriesCombine(TestData):
  14. def test_append(self):
  15. appendedSeries = self.series.append(self.objSeries)
  16. for idx, value in compat.iteritems(appendedSeries):
  17. if idx in self.series.index:
  18. assert value == self.series[idx]
  19. elif idx in self.objSeries.index:
  20. assert value == self.objSeries[idx]
  21. else:
  22. self.fail("orphaned index!")
  23. pytest.raises(ValueError, self.ts.append, self.ts,
  24. verify_integrity=True)
  25. def test_append_many(self):
  26. pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]]
  27. result = pieces[0].append(pieces[1:])
  28. assert_series_equal(result, self.ts)
  29. def test_append_duplicates(self):
  30. # GH 13677
  31. s1 = pd.Series([1, 2, 3])
  32. s2 = pd.Series([4, 5, 6])
  33. exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
  34. tm.assert_series_equal(s1.append(s2), exp)
  35. tm.assert_series_equal(pd.concat([s1, s2]), exp)
  36. # the result must have RangeIndex
  37. exp = pd.Series([1, 2, 3, 4, 5, 6])
  38. tm.assert_series_equal(s1.append(s2, ignore_index=True),
  39. exp, check_index_type=True)
  40. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True),
  41. exp, check_index_type=True)
  42. msg = 'Indexes have overlapping values:'
  43. with tm.assert_raises_regex(ValueError, msg):
  44. s1.append(s2, verify_integrity=True)
  45. with tm.assert_raises_regex(ValueError, msg):
  46. pd.concat([s1, s2], verify_integrity=True)
  47. def test_combine_first(self):
  48. values = tm.makeIntIndex(20).values.astype(float)
  49. series = Series(values, index=tm.makeIntIndex(20))
  50. series_copy = series * 2
  51. series_copy[::2] = np.NaN
  52. # nothing used from the input
  53. combined = series.combine_first(series_copy)
  54. tm.assert_series_equal(combined, series)
  55. # Holes filled from input
  56. combined = series_copy.combine_first(series)
  57. assert np.isfinite(combined).all()
  58. tm.assert_series_equal(combined[::2], series[::2])
  59. tm.assert_series_equal(combined[1::2], series_copy[1::2])
  60. # mixed types
  61. index = tm.makeStringIndex(20)
  62. floats = Series(tm.randn(20), index=index)
  63. strings = Series(tm.makeStringIndex(10), index=index[::2])
  64. combined = strings.combine_first(floats)
  65. tm.assert_series_equal(strings, combined.loc[index[::2]])
  66. tm.assert_series_equal(floats[1::2].astype(object),
  67. combined.loc[index[1::2]])
  68. # corner case
  69. s = Series([1., 2, 3], index=[0, 1, 2])
  70. result = s.combine_first(Series([], index=[]))
  71. assert_series_equal(s, result)
  72. def test_update(self):
  73. s = Series([1.5, nan, 3., 4., nan])
  74. s2 = Series([nan, 3.5, nan, 5.])
  75. s.update(s2)
  76. expected = Series([1.5, 3.5, 3., 5., np.nan])
  77. assert_series_equal(s, expected)
  78. # GH 3217
  79. df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
  80. df['c'] = np.nan
  81. # this will fail as long as series is a sub-class of ndarray
  82. # df['c'].update(Series(['foo'],index=[0])) #####
  83. def test_concat_empty_series_dtypes_roundtrips(self):
  84. # round-tripping with self & like self
  85. dtypes = map(np.dtype, ['float64', 'int8', 'uint8', 'bool', 'm8[ns]',
  86. 'M8[ns]'])
  87. for dtype in dtypes:
  88. assert pd.concat([Series(dtype=dtype)]).dtype == dtype
  89. assert pd.concat([Series(dtype=dtype),
  90. Series(dtype=dtype)]).dtype == dtype
  91. def int_result_type(dtype, dtype2):
  92. typs = set([dtype.kind, dtype2.kind])
  93. if not len(typs - set(['i', 'u', 'b'])) and (dtype.kind == 'i' or
  94. dtype2.kind == 'i'):
  95. return 'i'
  96. elif not len(typs - set(['u', 'b'])) and (dtype.kind == 'u' or
  97. dtype2.kind == 'u'):
  98. return 'u'
  99. return None
  100. def float_result_type(dtype, dtype2):
  101. typs = set([dtype.kind, dtype2.kind])
  102. if not len(typs - set(['f', 'i', 'u'])) and (dtype.kind == 'f' or
  103. dtype2.kind == 'f'):
  104. return 'f'
  105. return None
  106. def get_result_type(dtype, dtype2):
  107. result = float_result_type(dtype, dtype2)
  108. if result is not None:
  109. return result
  110. result = int_result_type(dtype, dtype2)
  111. if result is not None:
  112. return result
  113. return 'O'
  114. for dtype in dtypes:
  115. for dtype2 in dtypes:
  116. if dtype == dtype2:
  117. continue
  118. expected = get_result_type(dtype, dtype2)
  119. result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)
  120. ]).dtype
  121. assert result.kind == expected
  122. def test_concat_empty_series_dtypes(self):
  123. # booleans
  124. assert pd.concat([Series(dtype=np.bool_),
  125. Series(dtype=np.int32)]).dtype == np.int32
  126. assert pd.concat([Series(dtype=np.bool_),
  127. Series(dtype=np.float32)]).dtype == np.object_
  128. # datetime-like
  129. assert pd.concat([Series(dtype='m8[ns]'),
  130. Series(dtype=np.bool)]).dtype == np.object_
  131. assert pd.concat([Series(dtype='m8[ns]'),
  132. Series(dtype=np.int64)]).dtype == np.object_
  133. assert pd.concat([Series(dtype='M8[ns]'),
  134. Series(dtype=np.bool)]).dtype == np.object_
  135. assert pd.concat([Series(dtype='M8[ns]'),
  136. Series(dtype=np.int64)]).dtype == np.object_
  137. assert pd.concat([Series(dtype='M8[ns]'),
  138. Series(dtype=np.bool_),
  139. Series(dtype=np.int64)]).dtype == np.object_
  140. # categorical
  141. assert pd.concat([Series(dtype='category'),
  142. Series(dtype='category')]).dtype == 'category'
  143. assert pd.concat([Series(dtype='category'),
  144. Series(dtype='float64')]).dtype == 'float64'
  145. assert pd.concat([Series(dtype='category'),
  146. Series(dtype='object')]).dtype == 'object'
  147. # sparse
  148. result = pd.concat([Series(dtype='float64').to_sparse(), Series(
  149. dtype='float64').to_sparse()])
  150. assert result.dtype == np.float64
  151. assert result.ftype == 'float64:sparse'
  152. result = pd.concat([Series(dtype='float64').to_sparse(), Series(
  153. dtype='float64')])
  154. assert result.dtype == np.float64
  155. assert result.ftype == 'float64:sparse'
  156. result = pd.concat([Series(dtype='float64').to_sparse(), Series(
  157. dtype='object')])
  158. assert result.dtype == np.object_
  159. assert result.ftype == 'object:dense'
  160. def test_combine_first_dt64(self):
  161. from pandas.core.tools.datetimes import to_datetime
  162. s0 = to_datetime(Series(["2010", np.NaN]))
  163. s1 = to_datetime(Series([np.NaN, "2011"]))
  164. rs = s0.combine_first(s1)
  165. xp = to_datetime(Series(['2010', '2011']))
  166. assert_series_equal(rs, xp)
  167. s0 = to_datetime(Series(["2010", np.NaN]))
  168. s1 = Series([np.NaN, "2011"])
  169. rs = s0.combine_first(s1)
  170. xp = Series([datetime(2010, 1, 1), '2011'])
  171. assert_series_equal(rs, xp)
  172. class TestTimeseries(object):
  173. def test_append_concat(self):
  174. rng = date_range('5/8/2012 1:45', periods=10, freq='5T')
  175. ts = Series(np.random.randn(len(rng)), rng)
  176. df = DataFrame(np.random.randn(len(rng), 4), index=rng)
  177. result = ts.append(ts)
  178. result_df = df.append(df)
  179. ex_index = DatetimeIndex(np.tile(rng.values, 2))
  180. tm.assert_index_equal(result.index, ex_index)
  181. tm.assert_index_equal(result_df.index, ex_index)
  182. appended = rng.append(rng)
  183. tm.assert_index_equal(appended, ex_index)
  184. appended = rng.append([rng, rng])
  185. ex_index = DatetimeIndex(np.tile(rng.values, 3))
  186. tm.assert_index_equal(appended, ex_index)
  187. # different index names
  188. rng1 = rng.copy()
  189. rng2 = rng.copy()
  190. rng1.name = 'foo'
  191. rng2.name = 'bar'
  192. assert rng1.append(rng1).name == 'foo'
  193. assert rng1.append(rng2).name is None
  194. def test_append_concat_tz(self):
  195. # see gh-2938
  196. rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
  197. tz='US/Eastern')
  198. rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
  199. tz='US/Eastern')
  200. rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
  201. tz='US/Eastern')
  202. ts = Series(np.random.randn(len(rng)), rng)
  203. df = DataFrame(np.random.randn(len(rng), 4), index=rng)
  204. ts2 = Series(np.random.randn(len(rng2)), rng2)
  205. df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
  206. result = ts.append(ts2)
  207. result_df = df.append(df2)
  208. tm.assert_index_equal(result.index, rng3)
  209. tm.assert_index_equal(result_df.index, rng3)
  210. appended = rng.append(rng2)
  211. tm.assert_index_equal(appended, rng3)
  212. def test_append_concat_tz_explicit_pytz(self):
  213. # see gh-2938
  214. from pytz import timezone as timezone
  215. rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
  216. tz=timezone('US/Eastern'))
  217. rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
  218. tz=timezone('US/Eastern'))
  219. rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
  220. tz=timezone('US/Eastern'))
  221. ts = Series(np.random.randn(len(rng)), rng)
  222. df = DataFrame(np.random.randn(len(rng), 4), index=rng)
  223. ts2 = Series(np.random.randn(len(rng2)), rng2)
  224. df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
  225. result = ts.append(ts2)
  226. result_df = df.append(df2)
  227. tm.assert_index_equal(result.index, rng3)
  228. tm.assert_index_equal(result_df.index, rng3)
  229. appended = rng.append(rng2)
  230. tm.assert_index_equal(appended, rng3)
  231. def test_append_concat_tz_dateutil(self):
  232. # see gh-2938
  233. rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
  234. tz='dateutil/US/Eastern')
  235. rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
  236. tz='dateutil/US/Eastern')
  237. rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
  238. tz='dateutil/US/Eastern')
  239. ts = Series(np.random.randn(len(rng)), rng)
  240. df = DataFrame(np.random.randn(len(rng), 4), index=rng)
  241. ts2 = Series(np.random.randn(len(rng2)), rng2)
  242. df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
  243. result = ts.append(ts2)
  244. result_df = df.append(df2)
  245. tm.assert_index_equal(result.index, rng3)
  246. tm.assert_index_equal(result_df.index, rng3)
  247. appended = rng.append(rng2)
  248. tm.assert_index_equal(appended, rng3)