/pandas/tests/series/test_internals.py
http://github.com/wesm/pandas · Python · 343 lines · 259 code · 56 blank · 28 comment · 20 complexity · 361c125f49f07f6a5b5510552a8ba38b MD5 · raw file
- # coding=utf-8
- # pylint: disable-msg=E1101,W0612
- from datetime import datetime
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import NaT, Series, Timestamp
- from pandas.core.internals.blocks import IntBlock
- import pandas.util.testing as tm
- from pandas.util.testing import assert_series_equal
- class TestSeriesInternals(object):
- def test_convert_objects(self):
- s = Series([1., 2, 3], index=['a', 'b', 'c'])
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates=False,
- convert_numeric=True)
- assert_series_equal(result, s)
- # force numeric conversion
- r = s.copy().astype('O')
- r['a'] = '1'
- with tm.assert_produces_warning(FutureWarning):
- result = r.convert_objects(convert_dates=False,
- convert_numeric=True)
- assert_series_equal(result, s)
- r = s.copy().astype('O')
- r['a'] = '1.'
- with tm.assert_produces_warning(FutureWarning):
- result = r.convert_objects(convert_dates=False,
- convert_numeric=True)
- assert_series_equal(result, s)
- r = s.copy().astype('O')
- r['a'] = 'garbled'
- expected = s.copy()
- expected['a'] = np.nan
- with tm.assert_produces_warning(FutureWarning):
- result = r.convert_objects(convert_dates=False,
- convert_numeric=True)
- assert_series_equal(result, expected)
- # GH 4119, not converting a mixed type (e.g.floats and object)
- s = Series([1, 'na', 3, 4])
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_numeric=True)
- expected = Series([1, np.nan, 3, 4])
- assert_series_equal(result, expected)
- s = Series([1, '', 3, 4])
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_numeric=True)
- expected = Series([1, np.nan, 3, 4])
- assert_series_equal(result, expected)
- # dates
- s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
- datetime(2001, 1, 3, 0, 0)])
- s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
- datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
- Timestamp('20010104'), '20010105'],
- dtype='O')
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates=True,
- convert_numeric=False)
- expected = Series([Timestamp('20010101'), Timestamp('20010102'),
- Timestamp('20010103')], dtype='M8[ns]')
- assert_series_equal(result, expected)
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates='coerce',
- convert_numeric=False)
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates='coerce',
- convert_numeric=True)
- assert_series_equal(result, expected)
- expected = Series([Timestamp('20010101'), Timestamp('20010102'),
- Timestamp('20010103'),
- NaT, NaT, NaT, Timestamp('20010104'),
- Timestamp('20010105')], dtype='M8[ns]')
- with tm.assert_produces_warning(FutureWarning):
- result = s2.convert_objects(convert_dates='coerce',
- convert_numeric=False)
- assert_series_equal(result, expected)
- with tm.assert_produces_warning(FutureWarning):
- result = s2.convert_objects(convert_dates='coerce',
- convert_numeric=True)
- assert_series_equal(result, expected)
- # preserver all-nans (if convert_dates='coerce')
- s = Series(['foo', 'bar', 1, 1.0], dtype='O')
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates='coerce',
- convert_numeric=False)
- expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
- assert_series_equal(result, expected)
- # preserver if non-object
- s = Series([1], dtype='float32')
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates='coerce',
- convert_numeric=False)
- assert_series_equal(result, s)
- # r = s.copy()
- # r[0] = np.nan
- # result = r.convert_objects(convert_dates=True,convert_numeric=False)
- # assert result.dtype == 'M8[ns]'
- # dateutil parses some single letters into today's value as a date
- for x in 'abcdefghijklmnopqrstuvwxyz':
- s = Series([x])
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates='coerce')
- assert_series_equal(result, s)
- s = Series([x.upper()])
- with tm.assert_produces_warning(FutureWarning):
- result = s.convert_objects(convert_dates='coerce')
- assert_series_equal(result, s)
- def test_convert_objects_preserve_bool(self):
- s = Series([1, True, 3, 5], dtype=object)
- with tm.assert_produces_warning(FutureWarning):
- r = s.convert_objects(convert_numeric=True)
- e = Series([1, 1, 3, 5], dtype='i8')
- tm.assert_series_equal(r, e)
- def test_convert_objects_preserve_all_bool(self):
- s = Series([False, True, False, False], dtype=object)
- with tm.assert_produces_warning(FutureWarning):
- r = s.convert_objects(convert_numeric=True)
- e = Series([False, True, False, False], dtype=bool)
- tm.assert_series_equal(r, e)
- # GH 10265
- def test_convert(self):
- # Tests: All to nans, coerce, true
- # Test coercion returns correct type
- s = Series(['a', 'b', 'c'])
- results = s._convert(datetime=True, coerce=True)
- expected = Series([NaT] * 3)
- assert_series_equal(results, expected)
- results = s._convert(numeric=True, coerce=True)
- expected = Series([np.nan] * 3)
- assert_series_equal(results, expected)
- expected = Series([NaT] * 3, dtype=np.dtype('m8[ns]'))
- results = s._convert(timedelta=True, coerce=True)
- assert_series_equal(results, expected)
- dt = datetime(2001, 1, 1, 0, 0)
- td = dt - datetime(2000, 1, 1, 0, 0)
- # Test coercion with mixed types
- s = Series(['a', '3.1415', dt, td])
- results = s._convert(datetime=True, coerce=True)
- expected = Series([NaT, NaT, dt, NaT])
- assert_series_equal(results, expected)
- results = s._convert(numeric=True, coerce=True)
- expected = Series([np.nan, 3.1415, np.nan, np.nan])
- assert_series_equal(results, expected)
- results = s._convert(timedelta=True, coerce=True)
- expected = Series([NaT, NaT, NaT, td],
- dtype=np.dtype('m8[ns]'))
- assert_series_equal(results, expected)
- # Test standard conversion returns original
- results = s._convert(datetime=True)
- assert_series_equal(results, s)
- results = s._convert(numeric=True)
- expected = Series([np.nan, 3.1415, np.nan, np.nan])
- assert_series_equal(results, expected)
- results = s._convert(timedelta=True)
- assert_series_equal(results, s)
- # test pass-through and non-conversion when other types selected
- s = Series(['1.0', '2.0', '3.0'])
- results = s._convert(datetime=True, numeric=True, timedelta=True)
- expected = Series([1.0, 2.0, 3.0])
- assert_series_equal(results, expected)
- results = s._convert(True, False, True)
- assert_series_equal(results, s)
- s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)],
- dtype='O')
- results = s._convert(datetime=True, numeric=True, timedelta=True)
- expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0,
- 0)])
- assert_series_equal(results, expected)
- results = s._convert(datetime=False, numeric=True, timedelta=True)
- assert_series_equal(results, s)
- td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
- s = Series([td, td], dtype='O')
- results = s._convert(datetime=True, numeric=True, timedelta=True)
- expected = Series([td, td])
- assert_series_equal(results, expected)
- results = s._convert(True, True, False)
- assert_series_equal(results, s)
- s = Series([1., 2, 3], index=['a', 'b', 'c'])
- result = s._convert(numeric=True)
- assert_series_equal(result, s)
- # force numeric conversion
- r = s.copy().astype('O')
- r['a'] = '1'
- result = r._convert(numeric=True)
- assert_series_equal(result, s)
- r = s.copy().astype('O')
- r['a'] = '1.'
- result = r._convert(numeric=True)
- assert_series_equal(result, s)
- r = s.copy().astype('O')
- r['a'] = 'garbled'
- result = r._convert(numeric=True)
- expected = s.copy()
- expected['a'] = np.nan
- assert_series_equal(result, expected)
- # GH 4119, not converting a mixed type (e.g.floats and object)
- s = Series([1, 'na', 3, 4])
- result = s._convert(datetime=True, numeric=True)
- expected = Series([1, np.nan, 3, 4])
- assert_series_equal(result, expected)
- s = Series([1, '', 3, 4])
- result = s._convert(datetime=True, numeric=True)
- assert_series_equal(result, expected)
- # dates
- s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
- datetime(2001, 1, 3, 0, 0)])
- s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
- datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
- Timestamp('20010104'), '20010105'], dtype='O')
- result = s._convert(datetime=True)
- expected = Series([Timestamp('20010101'), Timestamp('20010102'),
- Timestamp('20010103')], dtype='M8[ns]')
- assert_series_equal(result, expected)
- result = s._convert(datetime=True, coerce=True)
- assert_series_equal(result, expected)
- expected = Series([Timestamp('20010101'), Timestamp('20010102'),
- Timestamp('20010103'), NaT, NaT, NaT,
- Timestamp('20010104'), Timestamp('20010105')],
- dtype='M8[ns]')
- result = s2._convert(datetime=True, numeric=False, timedelta=False,
- coerce=True)
- assert_series_equal(result, expected)
- result = s2._convert(datetime=True, coerce=True)
- assert_series_equal(result, expected)
- s = Series(['foo', 'bar', 1, 1.0], dtype='O')
- result = s._convert(datetime=True, coerce=True)
- expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
- assert_series_equal(result, expected)
- # preserver if non-object
- s = Series([1], dtype='float32')
- result = s._convert(datetime=True, coerce=True)
- assert_series_equal(result, s)
- # r = s.copy()
- # r[0] = np.nan
- # result = r._convert(convert_dates=True,convert_numeric=False)
- # assert result.dtype == 'M8[ns]'
- # dateutil parses some single letters into today's value as a date
- expected = Series([NaT])
- for x in 'abcdefghijklmnopqrstuvwxyz':
- s = Series([x])
- result = s._convert(datetime=True, coerce=True)
- assert_series_equal(result, expected)
- s = Series([x.upper()])
- result = s._convert(datetime=True, coerce=True)
- assert_series_equal(result, expected)
- def test_convert_no_arg_error(self):
- s = Series(['1.0', '2'])
- msg = r"At least one of datetime, numeric or timedelta must be True\."
- with pytest.raises(ValueError, match=msg):
- s._convert()
- def test_convert_preserve_bool(self):
- s = Series([1, True, 3, 5], dtype=object)
- r = s._convert(datetime=True, numeric=True)
- e = Series([1, 1, 3, 5], dtype='i8')
- tm.assert_series_equal(r, e)
- def test_convert_preserve_all_bool(self):
- s = Series([False, True, False, False], dtype=object)
- r = s._convert(datetime=True, numeric=True)
- e = Series([False, True, False, False], dtype=bool)
- tm.assert_series_equal(r, e)
- def test_constructor_no_pandas_array(self):
- ser = pd.Series([1, 2, 3])
- result = pd.Series(ser.array)
- tm.assert_series_equal(ser, result)
- assert isinstance(result._data.blocks[0], IntBlock)
- def test_from_array(self):
- result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
- assert result._data.blocks[0].is_extension is False
- result = pd.Series(pd.array(['2015'], dtype='datetime64[ns]'))
- assert result._data.blocks[0].is_extension is False
- def test_from_list_dtype(self):
- result = pd.Series(['1H', '2H'], dtype='timedelta64[ns]')
- assert result._data.blocks[0].is_extension is False
- result = pd.Series(['2015'], dtype='datetime64[ns]')
- assert result._data.blocks[0].is_extension is False
- def test_hasnans_unchached_for_series():
- # GH#19700
- idx = pd.Index([0, 1])
- assert idx.hasnans is False
- assert 'hasnans' in idx._cache
- ser = idx.to_series()
- assert ser.hasnans is False
- assert not hasattr(ser, '_cache')
- ser.iloc[-1] = np.nan
- assert ser.hasnans is True
- assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__