/pandas/tests/series/test_internals.py

http://github.com/wesm/pandas · Python · 343 lines · 259 code · 56 blank · 28 comment · 20 complexity · 361c125f49f07f6a5b5510552a8ba38b MD5 · raw file

  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from datetime import datetime
  4. import numpy as np
  5. import pytest
  6. import pandas as pd
  7. from pandas import NaT, Series, Timestamp
  8. from pandas.core.internals.blocks import IntBlock
  9. import pandas.util.testing as tm
  10. from pandas.util.testing import assert_series_equal
  11. class TestSeriesInternals(object):
  12. def test_convert_objects(self):
  13. s = Series([1., 2, 3], index=['a', 'b', 'c'])
  14. with tm.assert_produces_warning(FutureWarning):
  15. result = s.convert_objects(convert_dates=False,
  16. convert_numeric=True)
  17. assert_series_equal(result, s)
  18. # force numeric conversion
  19. r = s.copy().astype('O')
  20. r['a'] = '1'
  21. with tm.assert_produces_warning(FutureWarning):
  22. result = r.convert_objects(convert_dates=False,
  23. convert_numeric=True)
  24. assert_series_equal(result, s)
  25. r = s.copy().astype('O')
  26. r['a'] = '1.'
  27. with tm.assert_produces_warning(FutureWarning):
  28. result = r.convert_objects(convert_dates=False,
  29. convert_numeric=True)
  30. assert_series_equal(result, s)
  31. r = s.copy().astype('O')
  32. r['a'] = 'garbled'
  33. expected = s.copy()
  34. expected['a'] = np.nan
  35. with tm.assert_produces_warning(FutureWarning):
  36. result = r.convert_objects(convert_dates=False,
  37. convert_numeric=True)
  38. assert_series_equal(result, expected)
  39. # GH 4119, not converting a mixed type (e.g.floats and object)
  40. s = Series([1, 'na', 3, 4])
  41. with tm.assert_produces_warning(FutureWarning):
  42. result = s.convert_objects(convert_numeric=True)
  43. expected = Series([1, np.nan, 3, 4])
  44. assert_series_equal(result, expected)
  45. s = Series([1, '', 3, 4])
  46. with tm.assert_produces_warning(FutureWarning):
  47. result = s.convert_objects(convert_numeric=True)
  48. expected = Series([1, np.nan, 3, 4])
  49. assert_series_equal(result, expected)
  50. # dates
  51. s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
  52. datetime(2001, 1, 3, 0, 0)])
  53. s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
  54. datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
  55. Timestamp('20010104'), '20010105'],
  56. dtype='O')
  57. with tm.assert_produces_warning(FutureWarning):
  58. result = s.convert_objects(convert_dates=True,
  59. convert_numeric=False)
  60. expected = Series([Timestamp('20010101'), Timestamp('20010102'),
  61. Timestamp('20010103')], dtype='M8[ns]')
  62. assert_series_equal(result, expected)
  63. with tm.assert_produces_warning(FutureWarning):
  64. result = s.convert_objects(convert_dates='coerce',
  65. convert_numeric=False)
  66. with tm.assert_produces_warning(FutureWarning):
  67. result = s.convert_objects(convert_dates='coerce',
  68. convert_numeric=True)
  69. assert_series_equal(result, expected)
  70. expected = Series([Timestamp('20010101'), Timestamp('20010102'),
  71. Timestamp('20010103'),
  72. NaT, NaT, NaT, Timestamp('20010104'),
  73. Timestamp('20010105')], dtype='M8[ns]')
  74. with tm.assert_produces_warning(FutureWarning):
  75. result = s2.convert_objects(convert_dates='coerce',
  76. convert_numeric=False)
  77. assert_series_equal(result, expected)
  78. with tm.assert_produces_warning(FutureWarning):
  79. result = s2.convert_objects(convert_dates='coerce',
  80. convert_numeric=True)
  81. assert_series_equal(result, expected)
  82. # preserver all-nans (if convert_dates='coerce')
  83. s = Series(['foo', 'bar', 1, 1.0], dtype='O')
  84. with tm.assert_produces_warning(FutureWarning):
  85. result = s.convert_objects(convert_dates='coerce',
  86. convert_numeric=False)
  87. expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
  88. assert_series_equal(result, expected)
  89. # preserver if non-object
  90. s = Series([1], dtype='float32')
  91. with tm.assert_produces_warning(FutureWarning):
  92. result = s.convert_objects(convert_dates='coerce',
  93. convert_numeric=False)
  94. assert_series_equal(result, s)
  95. # r = s.copy()
  96. # r[0] = np.nan
  97. # result = r.convert_objects(convert_dates=True,convert_numeric=False)
  98. # assert result.dtype == 'M8[ns]'
  99. # dateutil parses some single letters into today's value as a date
  100. for x in 'abcdefghijklmnopqrstuvwxyz':
  101. s = Series([x])
  102. with tm.assert_produces_warning(FutureWarning):
  103. result = s.convert_objects(convert_dates='coerce')
  104. assert_series_equal(result, s)
  105. s = Series([x.upper()])
  106. with tm.assert_produces_warning(FutureWarning):
  107. result = s.convert_objects(convert_dates='coerce')
  108. assert_series_equal(result, s)
  109. def test_convert_objects_preserve_bool(self):
  110. s = Series([1, True, 3, 5], dtype=object)
  111. with tm.assert_produces_warning(FutureWarning):
  112. r = s.convert_objects(convert_numeric=True)
  113. e = Series([1, 1, 3, 5], dtype='i8')
  114. tm.assert_series_equal(r, e)
  115. def test_convert_objects_preserve_all_bool(self):
  116. s = Series([False, True, False, False], dtype=object)
  117. with tm.assert_produces_warning(FutureWarning):
  118. r = s.convert_objects(convert_numeric=True)
  119. e = Series([False, True, False, False], dtype=bool)
  120. tm.assert_series_equal(r, e)
  121. # GH 10265
  122. def test_convert(self):
  123. # Tests: All to nans, coerce, true
  124. # Test coercion returns correct type
  125. s = Series(['a', 'b', 'c'])
  126. results = s._convert(datetime=True, coerce=True)
  127. expected = Series([NaT] * 3)
  128. assert_series_equal(results, expected)
  129. results = s._convert(numeric=True, coerce=True)
  130. expected = Series([np.nan] * 3)
  131. assert_series_equal(results, expected)
  132. expected = Series([NaT] * 3, dtype=np.dtype('m8[ns]'))
  133. results = s._convert(timedelta=True, coerce=True)
  134. assert_series_equal(results, expected)
  135. dt = datetime(2001, 1, 1, 0, 0)
  136. td = dt - datetime(2000, 1, 1, 0, 0)
  137. # Test coercion with mixed types
  138. s = Series(['a', '3.1415', dt, td])
  139. results = s._convert(datetime=True, coerce=True)
  140. expected = Series([NaT, NaT, dt, NaT])
  141. assert_series_equal(results, expected)
  142. results = s._convert(numeric=True, coerce=True)
  143. expected = Series([np.nan, 3.1415, np.nan, np.nan])
  144. assert_series_equal(results, expected)
  145. results = s._convert(timedelta=True, coerce=True)
  146. expected = Series([NaT, NaT, NaT, td],
  147. dtype=np.dtype('m8[ns]'))
  148. assert_series_equal(results, expected)
  149. # Test standard conversion returns original
  150. results = s._convert(datetime=True)
  151. assert_series_equal(results, s)
  152. results = s._convert(numeric=True)
  153. expected = Series([np.nan, 3.1415, np.nan, np.nan])
  154. assert_series_equal(results, expected)
  155. results = s._convert(timedelta=True)
  156. assert_series_equal(results, s)
  157. # test pass-through and non-conversion when other types selected
  158. s = Series(['1.0', '2.0', '3.0'])
  159. results = s._convert(datetime=True, numeric=True, timedelta=True)
  160. expected = Series([1.0, 2.0, 3.0])
  161. assert_series_equal(results, expected)
  162. results = s._convert(True, False, True)
  163. assert_series_equal(results, s)
  164. s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)],
  165. dtype='O')
  166. results = s._convert(datetime=True, numeric=True, timedelta=True)
  167. expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0,
  168. 0)])
  169. assert_series_equal(results, expected)
  170. results = s._convert(datetime=False, numeric=True, timedelta=True)
  171. assert_series_equal(results, s)
  172. td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
  173. s = Series([td, td], dtype='O')
  174. results = s._convert(datetime=True, numeric=True, timedelta=True)
  175. expected = Series([td, td])
  176. assert_series_equal(results, expected)
  177. results = s._convert(True, True, False)
  178. assert_series_equal(results, s)
  179. s = Series([1., 2, 3], index=['a', 'b', 'c'])
  180. result = s._convert(numeric=True)
  181. assert_series_equal(result, s)
  182. # force numeric conversion
  183. r = s.copy().astype('O')
  184. r['a'] = '1'
  185. result = r._convert(numeric=True)
  186. assert_series_equal(result, s)
  187. r = s.copy().astype('O')
  188. r['a'] = '1.'
  189. result = r._convert(numeric=True)
  190. assert_series_equal(result, s)
  191. r = s.copy().astype('O')
  192. r['a'] = 'garbled'
  193. result = r._convert(numeric=True)
  194. expected = s.copy()
  195. expected['a'] = np.nan
  196. assert_series_equal(result, expected)
  197. # GH 4119, not converting a mixed type (e.g.floats and object)
  198. s = Series([1, 'na', 3, 4])
  199. result = s._convert(datetime=True, numeric=True)
  200. expected = Series([1, np.nan, 3, 4])
  201. assert_series_equal(result, expected)
  202. s = Series([1, '', 3, 4])
  203. result = s._convert(datetime=True, numeric=True)
  204. assert_series_equal(result, expected)
  205. # dates
  206. s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
  207. datetime(2001, 1, 3, 0, 0)])
  208. s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
  209. datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
  210. Timestamp('20010104'), '20010105'], dtype='O')
  211. result = s._convert(datetime=True)
  212. expected = Series([Timestamp('20010101'), Timestamp('20010102'),
  213. Timestamp('20010103')], dtype='M8[ns]')
  214. assert_series_equal(result, expected)
  215. result = s._convert(datetime=True, coerce=True)
  216. assert_series_equal(result, expected)
  217. expected = Series([Timestamp('20010101'), Timestamp('20010102'),
  218. Timestamp('20010103'), NaT, NaT, NaT,
  219. Timestamp('20010104'), Timestamp('20010105')],
  220. dtype='M8[ns]')
  221. result = s2._convert(datetime=True, numeric=False, timedelta=False,
  222. coerce=True)
  223. assert_series_equal(result, expected)
  224. result = s2._convert(datetime=True, coerce=True)
  225. assert_series_equal(result, expected)
  226. s = Series(['foo', 'bar', 1, 1.0], dtype='O')
  227. result = s._convert(datetime=True, coerce=True)
  228. expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
  229. assert_series_equal(result, expected)
  230. # preserver if non-object
  231. s = Series([1], dtype='float32')
  232. result = s._convert(datetime=True, coerce=True)
  233. assert_series_equal(result, s)
  234. # r = s.copy()
  235. # r[0] = np.nan
  236. # result = r._convert(convert_dates=True,convert_numeric=False)
  237. # assert result.dtype == 'M8[ns]'
  238. # dateutil parses some single letters into today's value as a date
  239. expected = Series([NaT])
  240. for x in 'abcdefghijklmnopqrstuvwxyz':
  241. s = Series([x])
  242. result = s._convert(datetime=True, coerce=True)
  243. assert_series_equal(result, expected)
  244. s = Series([x.upper()])
  245. result = s._convert(datetime=True, coerce=True)
  246. assert_series_equal(result, expected)
  247. def test_convert_no_arg_error(self):
  248. s = Series(['1.0', '2'])
  249. msg = r"At least one of datetime, numeric or timedelta must be True\."
  250. with pytest.raises(ValueError, match=msg):
  251. s._convert()
  252. def test_convert_preserve_bool(self):
  253. s = Series([1, True, 3, 5], dtype=object)
  254. r = s._convert(datetime=True, numeric=True)
  255. e = Series([1, 1, 3, 5], dtype='i8')
  256. tm.assert_series_equal(r, e)
  257. def test_convert_preserve_all_bool(self):
  258. s = Series([False, True, False, False], dtype=object)
  259. r = s._convert(datetime=True, numeric=True)
  260. e = Series([False, True, False, False], dtype=bool)
  261. tm.assert_series_equal(r, e)
  262. def test_constructor_no_pandas_array(self):
  263. ser = pd.Series([1, 2, 3])
  264. result = pd.Series(ser.array)
  265. tm.assert_series_equal(ser, result)
  266. assert isinstance(result._data.blocks[0], IntBlock)
  267. def test_from_array(self):
  268. result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
  269. assert result._data.blocks[0].is_extension is False
  270. result = pd.Series(pd.array(['2015'], dtype='datetime64[ns]'))
  271. assert result._data.blocks[0].is_extension is False
  272. def test_from_list_dtype(self):
  273. result = pd.Series(['1H', '2H'], dtype='timedelta64[ns]')
  274. assert result._data.blocks[0].is_extension is False
  275. result = pd.Series(['2015'], dtype='datetime64[ns]')
  276. assert result._data.blocks[0].is_extension is False
  277. def test_hasnans_unchached_for_series():
  278. # GH#19700
  279. idx = pd.Index([0, 1])
  280. assert idx.hasnans is False
  281. assert 'hasnans' in idx._cache
  282. ser = idx.to_series()
  283. assert ser.hasnans is False
  284. assert not hasattr(ser, '_cache')
  285. ser.iloc[-1] = np.nan
  286. assert ser.hasnans is True
  287. assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__