PageRenderTime 58ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/test_base.py

http://github.com/wesm/pandas
Python | 1351 lines | 993 code | 249 blank | 109 comment | 163 complexity | 0eda49e602f20ac187160ab5c75c2a57 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import datetime, timedelta
  4. import re
  5. import sys
  6. import numpy as np
  7. import pytest
  8. from pandas._libs.tslib import iNaT
  9. import pandas.compat as compat
  10. from pandas.compat import PYPY, StringIO, long
  11. from pandas.compat.numpy import np_array_datetime64_compat
  12. from pandas.core.dtypes.common import (
  13. is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype,
  14. is_timedelta64_dtype, needs_i8_conversion)
  15. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  16. import pandas as pd
  17. from pandas import (
  18. CategoricalIndex, DataFrame, DatetimeIndex, Index, Interval, IntervalIndex,
  19. Panel, PeriodIndex, Series, Timedelta, TimedeltaIndex, Timestamp)
  20. from pandas.core.accessor import PandasDelegate
  21. from pandas.core.arrays import DatetimeArray, PandasArray, TimedeltaArray
  22. from pandas.core.base import NoNewAttributesMixin, PandasObject
  23. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  24. import pandas.util.testing as tm
  25. class CheckStringMixin(object):
  26. def test_string_methods_dont_fail(self):
  27. repr(self.container)
  28. str(self.container)
  29. bytes(self.container)
  30. if not compat.PY3:
  31. unicode(self.container) # noqa
  32. def test_tricky_container(self):
  33. if not hasattr(self, 'unicode_container'):
  34. pytest.skip('Need unicode_container to test with this')
  35. repr(self.unicode_container)
  36. str(self.unicode_container)
  37. bytes(self.unicode_container)
  38. if not compat.PY3:
  39. unicode(self.unicode_container) # noqa
  40. class CheckImmutable(object):
  41. mutable_regex = re.compile('does not support mutable operations')
  42. def check_mutable_error(self, *args, **kwargs):
  43. # Pass whatever function you normally would to pytest.raises
  44. # (after the Exception kind).
  45. with pytest.raises(TypeError):
  46. self.mutable_regex(*args, **kwargs)
  47. def test_no_mutable_funcs(self):
  48. def setitem():
  49. self.container[0] = 5
  50. self.check_mutable_error(setitem)
  51. def setslice():
  52. self.container[1:2] = 3
  53. self.check_mutable_error(setslice)
  54. def delitem():
  55. del self.container[0]
  56. self.check_mutable_error(delitem)
  57. def delslice():
  58. del self.container[0:3]
  59. self.check_mutable_error(delslice)
  60. mutable_methods = getattr(self, "mutable_methods", [])
  61. for meth in mutable_methods:
  62. self.check_mutable_error(getattr(self.container, meth))
  63. def test_slicing_maintains_type(self):
  64. result = self.container[1:2]
  65. expected = self.lst[1:2]
  66. self.check_result(result, expected)
  67. def check_result(self, result, expected, klass=None):
  68. klass = klass or self.klass
  69. assert isinstance(result, klass)
  70. assert result == expected
  71. class TestPandasDelegate(object):
  72. class Delegator(object):
  73. _properties = ['foo']
  74. _methods = ['bar']
  75. def _set_foo(self, value):
  76. self.foo = value
  77. def _get_foo(self):
  78. return self.foo
  79. foo = property(_get_foo, _set_foo, doc="foo property")
  80. def bar(self, *args, **kwargs):
  81. """ a test bar method """
  82. pass
  83. class Delegate(PandasDelegate, PandasObject):
  84. def __init__(self, obj):
  85. self.obj = obj
  86. def setup_method(self, method):
  87. pass
  88. def test_invalid_delegation(self):
  89. # these show that in order for the delegation to work
  90. # the _delegate_* methods need to be overridden to not raise
  91. # a TypeError
  92. self.Delegate._add_delegate_accessors(
  93. delegate=self.Delegator,
  94. accessors=self.Delegator._properties,
  95. typ='property'
  96. )
  97. self.Delegate._add_delegate_accessors(
  98. delegate=self.Delegator,
  99. accessors=self.Delegator._methods,
  100. typ='method'
  101. )
  102. delegate = self.Delegate(self.Delegator())
  103. with pytest.raises(TypeError):
  104. delegate.foo
  105. with pytest.raises(TypeError):
  106. delegate.foo = 5
  107. with pytest.raises(TypeError):
  108. delegate.foo()
  109. @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
  110. def test_memory_usage(self):
  111. # Delegate does not implement memory_usage.
  112. # Check that we fall back to in-built `__sizeof__`
  113. # GH 12924
  114. delegate = self.Delegate(self.Delegator())
  115. sys.getsizeof(delegate)
  116. class Ops(object):
  117. def _allow_na_ops(self, obj):
  118. """Whether to skip test cases including NaN"""
  119. if (isinstance(obj, Index) and
  120. (obj.is_boolean() or not obj._can_hold_na)):
  121. # don't test boolean / int64 index
  122. return False
  123. return True
  124. def setup_method(self, method):
  125. self.bool_index = tm.makeBoolIndex(10, name='a')
  126. self.int_index = tm.makeIntIndex(10, name='a')
  127. self.float_index = tm.makeFloatIndex(10, name='a')
  128. self.dt_index = tm.makeDateIndex(10, name='a')
  129. self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(
  130. tz='US/Eastern')
  131. self.period_index = tm.makePeriodIndex(10, name='a')
  132. self.string_index = tm.makeStringIndex(10, name='a')
  133. self.unicode_index = tm.makeUnicodeIndex(10, name='a')
  134. arr = np.random.randn(10)
  135. self.bool_series = Series(arr, index=self.bool_index, name='a')
  136. self.int_series = Series(arr, index=self.int_index, name='a')
  137. self.float_series = Series(arr, index=self.float_index, name='a')
  138. self.dt_series = Series(arr, index=self.dt_index, name='a')
  139. self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
  140. self.period_series = Series(arr, index=self.period_index, name='a')
  141. self.string_series = Series(arr, index=self.string_index, name='a')
  142. self.unicode_series = Series(arr, index=self.unicode_index, name='a')
  143. types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string',
  144. 'unicode']
  145. self.indexes = [getattr(self, '{}_index'.format(t)) for t in types]
  146. self.series = [getattr(self, '{}_series'.format(t)) for t in types]
  147. self.objs = self.indexes + self.series
  148. def check_ops_properties(self, props, filter=None, ignore_failures=False):
  149. for op in props:
  150. for o in self.is_valid_objs:
  151. # if a filter, skip if it doesn't match
  152. if filter is not None:
  153. filt = o.index if isinstance(o, Series) else o
  154. if not filter(filt):
  155. continue
  156. try:
  157. if isinstance(o, Series):
  158. expected = Series(
  159. getattr(o.index, op), index=o.index, name='a')
  160. else:
  161. expected = getattr(o, op)
  162. except (AttributeError):
  163. if ignore_failures:
  164. continue
  165. result = getattr(o, op)
  166. # these couuld be series, arrays or scalars
  167. if isinstance(result, Series) and isinstance(expected, Series):
  168. tm.assert_series_equal(result, expected)
  169. elif isinstance(result, Index) and isinstance(expected, Index):
  170. tm.assert_index_equal(result, expected)
  171. elif isinstance(result, np.ndarray) and isinstance(expected,
  172. np.ndarray):
  173. tm.assert_numpy_array_equal(result, expected)
  174. else:
  175. assert result == expected
  176. # freq raises AttributeError on an Int64Index because its not
  177. # defined we mostly care about Series here anyhow
  178. if not ignore_failures:
  179. for o in self.not_valid_objs:
  180. # an object that is datetimelike will raise a TypeError,
  181. # otherwise an AttributeError
  182. err = AttributeError
  183. if issubclass(type(o), DatetimeIndexOpsMixin):
  184. err = TypeError
  185. with pytest.raises(err):
  186. getattr(o, op)
  187. @pytest.mark.parametrize('klass', [Series, DataFrame, Panel])
  188. def test_binary_ops_docs(self, klass):
  189. op_map = {'add': '+',
  190. 'sub': '-',
  191. 'mul': '*',
  192. 'mod': '%',
  193. 'pow': '**',
  194. 'truediv': '/',
  195. 'floordiv': '//'}
  196. for op_name in op_map:
  197. operand1 = klass.__name__.lower()
  198. operand2 = 'other'
  199. op = op_map[op_name]
  200. expected_str = ' '.join([operand1, op, operand2])
  201. assert expected_str in getattr(klass, op_name).__doc__
  202. # reverse version of the binary ops
  203. expected_str = ' '.join([operand2, op, operand1])
  204. assert expected_str in getattr(klass, 'r' + op_name).__doc__
  205. class TestIndexOps(Ops):
  206. def setup_method(self, method):
  207. super(TestIndexOps, self).setup_method(method)
  208. self.is_valid_objs = self.objs
  209. self.not_valid_objs = []
  210. def test_none_comparison(self):
  211. # bug brought up by #1079
  212. # changed from TypeError in 0.17.0
  213. for o in self.is_valid_objs:
  214. if isinstance(o, Series):
  215. o[0] = np.nan
  216. # noinspection PyComparisonWithNone
  217. result = o == None # noqa
  218. assert not result.iat[0]
  219. assert not result.iat[1]
  220. # noinspection PyComparisonWithNone
  221. result = o != None # noqa
  222. assert result.iat[0]
  223. assert result.iat[1]
  224. result = None == o # noqa
  225. assert not result.iat[0]
  226. assert not result.iat[1]
  227. result = None != o # noqa
  228. assert result.iat[0]
  229. assert result.iat[1]
  230. if (is_datetime64_dtype(o) or is_datetime64tz_dtype(o)):
  231. # Following DatetimeIndex (and Timestamp) convention,
  232. # inequality comparisons with Series[datetime64] raise
  233. with pytest.raises(TypeError):
  234. None > o
  235. with pytest.raises(TypeError):
  236. o > None
  237. else:
  238. result = None > o
  239. assert not result.iat[0]
  240. assert not result.iat[1]
  241. result = o < None
  242. assert not result.iat[0]
  243. assert not result.iat[1]
  244. def test_ndarray_compat_properties(self):
  245. for o in self.objs:
  246. # Check that we work.
  247. for p in ['shape', 'dtype', 'T', 'nbytes']:
  248. assert getattr(o, p, None) is not None
  249. # deprecated properties
  250. for p in ['flags', 'strides', 'itemsize']:
  251. with tm.assert_produces_warning(FutureWarning):
  252. assert getattr(o, p, None) is not None
  253. with tm.assert_produces_warning(FutureWarning):
  254. assert hasattr(o, 'base')
  255. # If we have a datetime-like dtype then needs a view to work
  256. # but the user is responsible for that
  257. try:
  258. with tm.assert_produces_warning(FutureWarning):
  259. assert o.data is not None
  260. except ValueError:
  261. pass
  262. with pytest.raises(ValueError):
  263. o.item() # len > 1
  264. assert o.ndim == 1
  265. assert o.size == len(o)
  266. assert Index([1]).item() == 1
  267. assert Series([1]).item() == 1
  268. def test_value_counts_unique_nunique(self):
  269. for orig in self.objs:
  270. o = orig.copy()
  271. klass = type(o)
  272. values = o._values
  273. if isinstance(values, Index):
  274. # reset name not to affect latter process
  275. values.name = None
  276. # create repeated values, 'n'th element is repeated by n+1 times
  277. # skip boolean, because it only has 2 values at most
  278. if isinstance(o, Index) and o.is_boolean():
  279. continue
  280. elif isinstance(o, Index):
  281. expected_index = Index(o[::-1])
  282. expected_index.name = None
  283. o = o.repeat(range(1, len(o) + 1))
  284. o.name = 'a'
  285. else:
  286. expected_index = Index(values[::-1])
  287. idx = o.index.repeat(range(1, len(o) + 1))
  288. # take-based repeat
  289. indices = np.repeat(np.arange(len(o)), range(1, len(o) + 1))
  290. rep = values.take(indices)
  291. o = klass(rep, index=idx, name='a')
  292. # check values has the same dtype as the original
  293. assert o.dtype == orig.dtype
  294. expected_s = Series(range(10, 0, -1), index=expected_index,
  295. dtype='int64', name='a')
  296. result = o.value_counts()
  297. tm.assert_series_equal(result, expected_s)
  298. assert result.index.name is None
  299. assert result.name == 'a'
  300. result = o.unique()
  301. if isinstance(o, Index):
  302. assert isinstance(result, o.__class__)
  303. tm.assert_index_equal(result, orig)
  304. elif is_datetime64tz_dtype(o):
  305. # datetimetz Series returns array of Timestamp
  306. assert result[0] == orig[0]
  307. for r in result:
  308. assert isinstance(r, Timestamp)
  309. tm.assert_numpy_array_equal(
  310. result.astype(object),
  311. orig._values.astype(object))
  312. else:
  313. tm.assert_numpy_array_equal(result, orig.values)
  314. assert o.nunique() == len(np.unique(o.values))
  315. @pytest.mark.parametrize('null_obj', [np.nan, None])
  316. def test_value_counts_unique_nunique_null(self, null_obj):
  317. for orig in self.objs:
  318. o = orig.copy()
  319. klass = type(o)
  320. values = o._ndarray_values
  321. if not self._allow_na_ops(o):
  322. continue
  323. # special assign to the numpy array
  324. if is_datetime64tz_dtype(o):
  325. if isinstance(o, DatetimeIndex):
  326. v = o.asi8
  327. v[0:2] = iNaT
  328. values = o._shallow_copy(v)
  329. else:
  330. o = o.copy()
  331. o[0:2] = iNaT
  332. values = o._values
  333. elif needs_i8_conversion(o):
  334. values[0:2] = iNaT
  335. values = o._shallow_copy(values)
  336. else:
  337. values[0:2] = null_obj
  338. # check values has the same dtype as the original
  339. assert values.dtype == o.dtype
  340. # create repeated values, 'n'th element is repeated by n+1
  341. # times
  342. if isinstance(o, (DatetimeIndex, PeriodIndex)):
  343. expected_index = o.copy()
  344. expected_index.name = None
  345. # attach name to klass
  346. o = klass(values.repeat(range(1, len(o) + 1)))
  347. o.name = 'a'
  348. else:
  349. if isinstance(o, DatetimeIndex):
  350. expected_index = orig._values._shallow_copy(values)
  351. else:
  352. expected_index = Index(values)
  353. expected_index.name = None
  354. o = o.repeat(range(1, len(o) + 1))
  355. o.name = 'a'
  356. # check values has the same dtype as the original
  357. assert o.dtype == orig.dtype
  358. # check values correctly have NaN
  359. nanloc = np.zeros(len(o), dtype=np.bool)
  360. nanloc[:3] = True
  361. if isinstance(o, Index):
  362. tm.assert_numpy_array_equal(pd.isna(o), nanloc)
  363. else:
  364. exp = Series(nanloc, o.index, name='a')
  365. tm.assert_series_equal(pd.isna(o), exp)
  366. expected_s_na = Series(list(range(10, 2, -1)) + [3],
  367. index=expected_index[9:0:-1],
  368. dtype='int64', name='a')
  369. expected_s = Series(list(range(10, 2, -1)),
  370. index=expected_index[9:1:-1],
  371. dtype='int64', name='a')
  372. result_s_na = o.value_counts(dropna=False)
  373. tm.assert_series_equal(result_s_na, expected_s_na)
  374. assert result_s_na.index.name is None
  375. assert result_s_na.name == 'a'
  376. result_s = o.value_counts()
  377. tm.assert_series_equal(o.value_counts(), expected_s)
  378. assert result_s.index.name is None
  379. assert result_s.name == 'a'
  380. result = o.unique()
  381. if isinstance(o, Index):
  382. tm.assert_index_equal(result,
  383. Index(values[1:], name='a'))
  384. elif is_datetime64tz_dtype(o):
  385. # unable to compare NaT / nan
  386. tm.assert_extension_array_equal(result[1:], values[2:])
  387. assert result[0] is pd.NaT
  388. else:
  389. tm.assert_numpy_array_equal(result[1:], values[2:])
  390. assert pd.isna(result[0])
  391. assert result.dtype == orig.dtype
  392. assert o.nunique() == 8
  393. assert o.nunique(dropna=False) == 9
  394. @pytest.mark.parametrize('klass', [Index, Series])
  395. def test_value_counts_inferred(self, klass):
  396. s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
  397. s = klass(s_values)
  398. expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
  399. tm.assert_series_equal(s.value_counts(), expected)
  400. if isinstance(s, Index):
  401. exp = Index(np.unique(np.array(s_values, dtype=np.object_)))
  402. tm.assert_index_equal(s.unique(), exp)
  403. else:
  404. exp = np.unique(np.array(s_values, dtype=np.object_))
  405. tm.assert_numpy_array_equal(s.unique(), exp)
  406. assert s.nunique() == 4
  407. # don't sort, have to sort after the fact as not sorting is
  408. # platform-dep
  409. hist = s.value_counts(sort=False).sort_values()
  410. expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
  411. tm.assert_series_equal(hist, expected)
  412. # sort ascending
  413. hist = s.value_counts(ascending=True)
  414. expected = Series([1, 2, 3, 4], index=list('cdab'))
  415. tm.assert_series_equal(hist, expected)
  416. # relative histogram.
  417. hist = s.value_counts(normalize=True)
  418. expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
  419. tm.assert_series_equal(hist, expected)
  420. @pytest.mark.parametrize('klass', [Index, Series])
  421. def test_value_counts_bins(self, klass):
  422. s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
  423. s = klass(s_values)
  424. # bins
  425. with pytest.raises(TypeError):
  426. s.value_counts(bins=1)
  427. s1 = Series([1, 1, 2, 3])
  428. res1 = s1.value_counts(bins=1)
  429. exp1 = Series({Interval(0.997, 3.0): 4})
  430. tm.assert_series_equal(res1, exp1)
  431. res1n = s1.value_counts(bins=1, normalize=True)
  432. exp1n = Series({Interval(0.997, 3.0): 1.0})
  433. tm.assert_series_equal(res1n, exp1n)
  434. if isinstance(s1, Index):
  435. tm.assert_index_equal(s1.unique(), Index([1, 2, 3]))
  436. else:
  437. exp = np.array([1, 2, 3], dtype=np.int64)
  438. tm.assert_numpy_array_equal(s1.unique(), exp)
  439. assert s1.nunique() == 3
  440. # these return the same
  441. res4 = s1.value_counts(bins=4, dropna=True)
  442. intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
  443. exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
  444. tm.assert_series_equal(res4, exp4)
  445. res4 = s1.value_counts(bins=4, dropna=False)
  446. intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
  447. exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
  448. tm.assert_series_equal(res4, exp4)
  449. res4n = s1.value_counts(bins=4, normalize=True)
  450. exp4n = Series([0.5, 0.25, 0.25, 0],
  451. index=intervals.take([0, 3, 1, 2]))
  452. tm.assert_series_equal(res4n, exp4n)
  453. # handle NA's properly
  454. s_values = ['a', 'b', 'b', 'b', np.nan, np.nan,
  455. 'd', 'd', 'a', 'a', 'b']
  456. s = klass(s_values)
  457. expected = Series([4, 3, 2], index=['b', 'a', 'd'])
  458. tm.assert_series_equal(s.value_counts(), expected)
  459. if isinstance(s, Index):
  460. exp = Index(['a', 'b', np.nan, 'd'])
  461. tm.assert_index_equal(s.unique(), exp)
  462. else:
  463. exp = np.array(['a', 'b', np.nan, 'd'], dtype=object)
  464. tm.assert_numpy_array_equal(s.unique(), exp)
  465. assert s.nunique() == 3
  466. s = klass({})
  467. expected = Series([], dtype=np.int64)
  468. tm.assert_series_equal(s.value_counts(), expected,
  469. check_index_type=False)
  470. # returned dtype differs depending on original
  471. if isinstance(s, Index):
  472. tm.assert_index_equal(s.unique(), Index([]), exact=False)
  473. else:
  474. tm.assert_numpy_array_equal(s.unique(), np.array([]),
  475. check_dtype=False)
  476. assert s.nunique() == 0
  477. @pytest.mark.parametrize('klass', [Index, Series])
  478. def test_value_counts_datetime64(self, klass):
  479. # GH 3002, datetime64[ns]
  480. # don't test names though
  481. txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM',
  482. 'xxyyzz20100101EGG', 'xxyyww20090101EGG',
  483. 'foofoo20080909PIE', 'foofoo20080909GUM'])
  484. f = StringIO(txt)
  485. df = pd.read_fwf(f, widths=[6, 8, 3],
  486. names=["person_id", "dt", "food"],
  487. parse_dates=["dt"])
  488. s = klass(df['dt'].copy())
  489. s.name = None
  490. idx = pd.to_datetime(['2010-01-01 00:00:00',
  491. '2008-09-09 00:00:00',
  492. '2009-01-01 00:00:00'])
  493. expected_s = Series([3, 2, 1], index=idx)
  494. tm.assert_series_equal(s.value_counts(), expected_s)
  495. expected = np_array_datetime64_compat(['2010-01-01 00:00:00',
  496. '2009-01-01 00:00:00',
  497. '2008-09-09 00:00:00'],
  498. dtype='datetime64[ns]')
  499. if isinstance(s, Index):
  500. tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
  501. else:
  502. tm.assert_numpy_array_equal(s.unique(), expected)
  503. assert s.nunique() == 3
  504. # with NaT
  505. s = df['dt'].copy()
  506. s = klass([v for v in s.values] + [pd.NaT])
  507. result = s.value_counts()
  508. assert result.index.dtype == 'datetime64[ns]'
  509. tm.assert_series_equal(result, expected_s)
  510. result = s.value_counts(dropna=False)
  511. expected_s[pd.NaT] = 1
  512. tm.assert_series_equal(result, expected_s)
  513. unique = s.unique()
  514. assert unique.dtype == 'datetime64[ns]'
  515. # numpy_array_equal cannot compare pd.NaT
  516. if isinstance(s, Index):
  517. exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
  518. tm.assert_index_equal(unique, exp_idx)
  519. else:
  520. tm.assert_numpy_array_equal(unique[:3], expected)
  521. assert pd.isna(unique[3])
  522. assert s.nunique() == 3
  523. assert s.nunique(dropna=False) == 4
  524. # timedelta64[ns]
  525. td = df.dt - df.dt + timedelta(1)
  526. td = klass(td, name='dt')
  527. result = td.value_counts()
  528. expected_s = Series([6], index=[Timedelta('1day')], name='dt')
  529. tm.assert_series_equal(result, expected_s)
  530. expected = TimedeltaIndex(['1 days'], name='dt')
  531. if isinstance(td, Index):
  532. tm.assert_index_equal(td.unique(), expected)
  533. else:
  534. tm.assert_numpy_array_equal(td.unique(), expected.values)
  535. td2 = timedelta(1) + (df.dt - df.dt)
  536. td2 = klass(td2, name='dt')
  537. result2 = td2.value_counts()
  538. tm.assert_series_equal(result2, expected_s)
  539. def test_factorize(self):
  540. for orig in self.objs:
  541. o = orig.copy()
  542. if isinstance(o, Index) and o.is_boolean():
  543. exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp)
  544. exp_uniques = o
  545. exp_uniques = Index([False, True])
  546. else:
  547. exp_arr = np.array(range(len(o)), dtype=np.intp)
  548. exp_uniques = o
  549. labels, uniques = o.factorize()
  550. tm.assert_numpy_array_equal(labels, exp_arr)
  551. if isinstance(o, Series):
  552. tm.assert_index_equal(uniques, Index(orig),
  553. check_names=False)
  554. else:
  555. # factorize explicitly resets name
  556. tm.assert_index_equal(uniques, exp_uniques,
  557. check_names=False)
  558. def test_factorize_repeated(self):
  559. for orig in self.objs:
  560. o = orig.copy()
  561. # don't test boolean
  562. if isinstance(o, Index) and o.is_boolean():
  563. continue
  564. # sort by value, and create duplicates
  565. if isinstance(o, Series):
  566. o = o.sort_values()
  567. n = o.iloc[5:].append(o)
  568. else:
  569. indexer = o.argsort()
  570. o = o.take(indexer)
  571. n = o[5:].append(o)
  572. exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  573. dtype=np.intp)
  574. labels, uniques = n.factorize(sort=True)
  575. tm.assert_numpy_array_equal(labels, exp_arr)
  576. if isinstance(o, Series):
  577. tm.assert_index_equal(uniques, Index(orig).sort_values(),
  578. check_names=False)
  579. else:
  580. tm.assert_index_equal(uniques, o, check_names=False)
  581. exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4],
  582. np.intp)
  583. labels, uniques = n.factorize(sort=False)
  584. tm.assert_numpy_array_equal(labels, exp_arr)
  585. if isinstance(o, Series):
  586. expected = Index(o.iloc[5:10].append(o.iloc[:5]))
  587. tm.assert_index_equal(uniques, expected, check_names=False)
  588. else:
  589. expected = o[5:10].append(o[:5])
  590. tm.assert_index_equal(uniques, expected, check_names=False)
  591. def test_duplicated_drop_duplicates_index(self):
  592. # GH 4060
  593. for original in self.objs:
  594. if isinstance(original, Index):
  595. # special case
  596. if original.is_boolean():
  597. result = original.drop_duplicates()
  598. expected = Index([False, True], name='a')
  599. tm.assert_index_equal(result, expected)
  600. continue
  601. # original doesn't have duplicates
  602. expected = np.array([False] * len(original), dtype=bool)
  603. duplicated = original.duplicated()
  604. tm.assert_numpy_array_equal(duplicated, expected)
  605. assert duplicated.dtype == bool
  606. result = original.drop_duplicates()
  607. tm.assert_index_equal(result, original)
  608. assert result is not original
  609. # has_duplicates
  610. assert not original.has_duplicates
  611. # create repeated values, 3rd and 5th values are duplicated
  612. idx = original[list(range(len(original))) + [5, 3]]
  613. expected = np.array([False] * len(original) + [True, True],
  614. dtype=bool)
  615. duplicated = idx.duplicated()
  616. tm.assert_numpy_array_equal(duplicated, expected)
  617. assert duplicated.dtype == bool
  618. tm.assert_index_equal(idx.drop_duplicates(), original)
  619. base = [False] * len(idx)
  620. base[3] = True
  621. base[5] = True
  622. expected = np.array(base)
  623. duplicated = idx.duplicated(keep='last')
  624. tm.assert_numpy_array_equal(duplicated, expected)
  625. assert duplicated.dtype == bool
  626. result = idx.drop_duplicates(keep='last')
  627. tm.assert_index_equal(result, idx[~expected])
  628. base = [False] * len(original) + [True, True]
  629. base[3] = True
  630. base[5] = True
  631. expected = np.array(base)
  632. duplicated = idx.duplicated(keep=False)
  633. tm.assert_numpy_array_equal(duplicated, expected)
  634. assert duplicated.dtype == bool
  635. result = idx.drop_duplicates(keep=False)
  636. tm.assert_index_equal(result, idx[~expected])
  637. with pytest.raises(TypeError,
  638. match=(r"drop_duplicates\(\) got an "
  639. r"unexpected keyword argument")):
  640. idx.drop_duplicates(inplace=True)
  641. else:
  642. expected = Series([False] * len(original),
  643. index=original.index, name='a')
  644. tm.assert_series_equal(original.duplicated(), expected)
  645. result = original.drop_duplicates()
  646. tm.assert_series_equal(result, original)
  647. assert result is not original
  648. idx = original.index[list(range(len(original))) + [5, 3]]
  649. values = original._values[list(range(len(original))) + [5, 3]]
  650. s = Series(values, index=idx, name='a')
  651. expected = Series([False] * len(original) + [True, True],
  652. index=idx, name='a')
  653. tm.assert_series_equal(s.duplicated(), expected)
  654. tm.assert_series_equal(s.drop_duplicates(), original)
  655. base = [False] * len(idx)
  656. base[3] = True
  657. base[5] = True
  658. expected = Series(base, index=idx, name='a')
  659. tm.assert_series_equal(s.duplicated(keep='last'), expected)
  660. tm.assert_series_equal(s.drop_duplicates(keep='last'),
  661. s[~np.array(base)])
  662. base = [False] * len(original) + [True, True]
  663. base[3] = True
  664. base[5] = True
  665. expected = Series(base, index=idx, name='a')
  666. tm.assert_series_equal(s.duplicated(keep=False), expected)
  667. tm.assert_series_equal(s.drop_duplicates(keep=False),
  668. s[~np.array(base)])
  669. s.drop_duplicates(inplace=True)
  670. tm.assert_series_equal(s, original)
  671. def test_drop_duplicates_series_vs_dataframe(self):
  672. # GH 14192
  673. df = pd.DataFrame({'a': [1, 1, 1, 'one', 'one'],
  674. 'b': [2, 2, np.nan, np.nan, np.nan],
  675. 'c': [3, 3, np.nan, np.nan, 'three'],
  676. 'd': [1, 2, 3, 4, 4],
  677. 'e': [datetime(2015, 1, 1), datetime(2015, 1, 1),
  678. datetime(2015, 2, 1), pd.NaT, pd.NaT]
  679. })
  680. for column in df.columns:
  681. for keep in ['first', 'last', False]:
  682. dropped_frame = df[[column]].drop_duplicates(keep=keep)
  683. dropped_series = df[column].drop_duplicates(keep=keep)
  684. tm.assert_frame_equal(dropped_frame, dropped_series.to_frame())
  685. def test_fillna(self):
  686. # # GH 11343
  687. # though Index.fillna and Series.fillna has separate impl,
  688. # test here to confirm these works as the same
  689. for orig in self.objs:
  690. o = orig.copy()
  691. values = o.values
  692. # values will not be changed
  693. result = o.fillna(o.astype(object).values[0])
  694. if isinstance(o, Index):
  695. tm.assert_index_equal(o, result)
  696. else:
  697. tm.assert_series_equal(o, result)
  698. # check shallow_copied
  699. assert o is not result
  700. for null_obj in [np.nan, None]:
  701. for orig in self.objs:
  702. o = orig.copy()
  703. klass = type(o)
  704. if not self._allow_na_ops(o):
  705. continue
  706. if needs_i8_conversion(o):
  707. values = o.astype(object).values
  708. fill_value = values[0]
  709. values[0:2] = pd.NaT
  710. else:
  711. values = o.values.copy()
  712. fill_value = o.values[0]
  713. values[0:2] = null_obj
  714. expected = [fill_value] * 2 + list(values[2:])
  715. expected = klass(expected)
  716. o = klass(values)
  717. # check values has the same dtype as the original
  718. assert o.dtype == orig.dtype
  719. result = o.fillna(fill_value)
  720. if isinstance(o, Index):
  721. tm.assert_index_equal(result, expected)
  722. else:
  723. tm.assert_series_equal(result, expected)
  724. # check shallow_copied
  725. assert o is not result
  726. @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
  727. def test_memory_usage(self):
  728. for o in self.objs:
  729. res = o.memory_usage()
  730. res_deep = o.memory_usage(deep=True)
  731. if (is_object_dtype(o) or (isinstance(o, Series) and
  732. is_object_dtype(o.index))):
  733. # if there are objects, only deep will pick them up
  734. assert res_deep > res
  735. else:
  736. assert res == res_deep
  737. if isinstance(o, Series):
  738. assert ((o.memory_usage(index=False) +
  739. o.index.memory_usage()) ==
  740. o.memory_usage(index=True))
  741. # sys.getsizeof will call the .memory_usage with
  742. # deep=True, and add on some GC overhead
  743. diff = res_deep - sys.getsizeof(o)
  744. assert abs(diff) < 100
  745. def test_searchsorted(self):
  746. # See gh-12238
  747. for o in self.objs:
  748. index = np.searchsorted(o, max(o))
  749. assert 0 <= index <= len(o)
  750. index = np.searchsorted(o, max(o), sorter=range(len(o)))
  751. assert 0 <= index <= len(o)
  752. def test_validate_bool_args(self):
  753. invalid_values = [1, "True", [1, 2, 3], 5.0]
  754. for value in invalid_values:
  755. with pytest.raises(ValueError):
  756. self.int_series.drop_duplicates(inplace=value)
  757. def test_getitem(self):
  758. for i in self.indexes:
  759. s = pd.Series(i)
  760. assert i[0] == s.iloc[0]
  761. assert i[5] == s.iloc[5]
  762. assert i[-1] == s.iloc[-1]
  763. assert i[-1] == i[9]
  764. with pytest.raises(IndexError):
  765. i[20]
  766. with pytest.raises(IndexError):
  767. s.iloc[20]
  768. @pytest.mark.parametrize('indexer_klass', [list, pd.Index])
  769. @pytest.mark.parametrize('indexer', [[True] * 10, [False] * 10,
  770. [True, False, True, True, False,
  771. False, True, True, False, True]])
  772. def test_bool_indexing(self, indexer_klass, indexer):
  773. # GH 22533
  774. for idx in self.indexes:
  775. exp_idx = [i for i in range(len(indexer)) if indexer[i]]
  776. tm.assert_index_equal(idx[indexer_klass(indexer)], idx[exp_idx])
  777. s = pd.Series(idx)
  778. tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
  779. class TestTranspose(Ops):
  780. errmsg = "the 'axes' parameter is not supported"
  781. def test_transpose(self):
  782. for obj in self.objs:
  783. tm.assert_equal(obj.transpose(), obj)
  784. def test_transpose_non_default_axes(self):
  785. for obj in self.objs:
  786. with pytest.raises(ValueError, match=self.errmsg):
  787. obj.transpose(1)
  788. with pytest.raises(ValueError, match=self.errmsg):
  789. obj.transpose(axes=1)
  790. def test_numpy_transpose(self):
  791. for obj in self.objs:
  792. tm.assert_equal(np.transpose(obj), obj)
  793. with pytest.raises(ValueError, match=self.errmsg):
  794. np.transpose(obj, axes=1)
  795. class TestNoNewAttributesMixin(object):
  796. def test_mixin(self):
  797. class T(NoNewAttributesMixin):
  798. pass
  799. t = T()
  800. assert not hasattr(t, "__frozen")
  801. t.a = "test"
  802. assert t.a == "test"
  803. t._freeze()
  804. assert "__frozen" in dir(t)
  805. assert getattr(t, "__frozen")
  806. with pytest.raises(AttributeError):
  807. t.b = "test"
  808. assert not hasattr(t, "b")
  809. class TestToIterable(object):
  810. # test that we convert an iterable to python types
  811. dtypes = [
  812. ('int8', (int, long)),
  813. ('int16', (int, long)),
  814. ('int32', (int, long)),
  815. ('int64', (int, long)),
  816. ('uint8', (int, long)),
  817. ('uint16', (int, long)),
  818. ('uint32', (int, long)),
  819. ('uint64', (int, long)),
  820. ('float16', float),
  821. ('float32', float),
  822. ('float64', float),
  823. ('datetime64[ns]', Timestamp),
  824. ('datetime64[ns, US/Eastern]', Timestamp),
  825. ('timedelta64[ns]', Timedelta)]
  826. @pytest.mark.parametrize(
  827. 'dtype, rdtype', dtypes)
  828. @pytest.mark.parametrize(
  829. 'method',
  830. [
  831. lambda x: x.tolist(),
  832. lambda x: x.to_list(),
  833. lambda x: list(x),
  834. lambda x: list(x.__iter__()),
  835. ], ids=['tolist', 'to_list', 'list', 'iter'])
  836. @pytest.mark.parametrize('typ', [Series, Index])
  837. @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning")
  838. # TODO(GH-24559): Remove the filterwarnings
  839. def test_iterable(self, typ, method, dtype, rdtype):
  840. # gh-10904
  841. # gh-13258
  842. # coerce iteration to underlying python / pandas types
  843. s = typ([1], dtype=dtype)
  844. result = method(s)[0]
  845. assert isinstance(result, rdtype)
  846. @pytest.mark.parametrize(
  847. 'dtype, rdtype, obj',
  848. [
  849. ('object', object, 'a'),
  850. ('object', (int, long), 1),
  851. ('category', object, 'a'),
  852. ('category', (int, long), 1)])
  853. @pytest.mark.parametrize(
  854. 'method',
  855. [
  856. lambda x: x.tolist(),
  857. lambda x: x.to_list(),
  858. lambda x: list(x),
  859. lambda x: list(x.__iter__()),
  860. ], ids=['tolist', 'to_list', 'list', 'iter'])
  861. @pytest.mark.parametrize('typ', [Series, Index])
  862. def test_iterable_object_and_category(self, typ, method,
  863. dtype, rdtype, obj):
  864. # gh-10904
  865. # gh-13258
  866. # coerce iteration to underlying python / pandas types
  867. s = typ([obj], dtype=dtype)
  868. result = method(s)[0]
  869. assert isinstance(result, rdtype)
  870. @pytest.mark.parametrize(
  871. 'dtype, rdtype', dtypes)
  872. def test_iterable_items(self, dtype, rdtype):
  873. # gh-13258
  874. # test items / iteritems yields the correct boxed scalars
  875. # this only applies to series
  876. s = Series([1], dtype=dtype)
  877. _, result = list(s.items())[0]
  878. assert isinstance(result, rdtype)
  879. _, result = list(s.iteritems())[0]
  880. assert isinstance(result, rdtype)
  881. @pytest.mark.parametrize(
  882. 'dtype, rdtype',
  883. dtypes + [
  884. ('object', (int, long)),
  885. ('category', (int, long))])
  886. @pytest.mark.parametrize('typ', [Series, Index])
  887. @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning")
  888. # TODO(GH-24559): Remove the filterwarnings
  889. def test_iterable_map(self, typ, dtype, rdtype):
  890. # gh-13236
  891. # coerce iteration to underlying python / pandas types
  892. s = typ([1], dtype=dtype)
  893. result = s.map(type)[0]
  894. if not isinstance(rdtype, tuple):
  895. rdtype = tuple([rdtype])
  896. assert result in rdtype
  897. @pytest.mark.parametrize(
  898. 'method',
  899. [
  900. lambda x: x.tolist(),
  901. lambda x: x.to_list(),
  902. lambda x: list(x),
  903. lambda x: list(x.__iter__()),
  904. ], ids=['tolist', 'to_list', 'list', 'iter'])
  905. def test_categorial_datetimelike(self, method):
  906. i = CategoricalIndex([Timestamp('1999-12-31'),
  907. Timestamp('2000-12-31')])
  908. result = method(i)[0]
  909. assert isinstance(result, Timestamp)
  910. def test_iter_box(self):
  911. vals = [Timestamp('2011-01-01'), Timestamp('2011-01-02')]
  912. s = Series(vals)
  913. assert s.dtype == 'datetime64[ns]'
  914. for res, exp in zip(s, vals):
  915. assert isinstance(res, Timestamp)
  916. assert res.tz is None
  917. assert res == exp
  918. vals = [Timestamp('2011-01-01', tz='US/Eastern'),
  919. Timestamp('2011-01-02', tz='US/Eastern')]
  920. s = Series(vals)
  921. assert s.dtype == 'datetime64[ns, US/Eastern]'
  922. for res, exp in zip(s, vals):
  923. assert isinstance(res, Timestamp)
  924. assert res.tz == exp.tz
  925. assert res == exp
  926. # timedelta
  927. vals = [Timedelta('1 days'), Timedelta('2 days')]
  928. s = Series(vals)
  929. assert s.dtype == 'timedelta64[ns]'
  930. for res, exp in zip(s, vals):
  931. assert isinstance(res, Timedelta)
  932. assert res == exp
  933. # period
  934. vals = [pd.Period('2011-01-01', freq='M'),
  935. pd.Period('2011-01-02', freq='M')]
  936. s = Series(vals)
  937. assert s.dtype == 'Period[M]'
  938. for res, exp in zip(s, vals):
  939. assert isinstance(res, pd.Period)
  940. assert res.freq == 'M'
  941. assert res == exp
  942. @pytest.mark.parametrize('array, expected_type, dtype', [
  943. (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
  944. (np.array(['a', 'b']), np.ndarray, 'object'),
  945. (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
  946. (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), DatetimeArray,
  947. 'datetime64[ns, US/Central]'),
  948. (pd.PeriodIndex([2018, 2019], freq='A'), pd.core.arrays.PeriodArray,
  949. pd.core.dtypes.dtypes.PeriodDtype("A-DEC")),
  950. (pd.IntervalIndex.from_breaks([0, 1, 2]), pd.core.arrays.IntervalArray,
  951. 'interval'),
  952. # This test is currently failing for datetime64[ns] and timedelta64[ns].
  953. # The NumPy type system is sufficient for representing these types, so
  954. # we just use NumPy for Series / DataFrame columns of these types (so
  955. # we get consolidation and so on).
  956. # However, DatetimeIndex and TimedeltaIndex use the DateLikeArray
  957. # abstraction to for code reuse.
  958. # At the moment, we've judged that allowing this test to fail is more
  959. # practical that overriding Series._values to special case
  960. # Series[M8[ns]] and Series[m8[ns]] to return a DateLikeArray.
  961. pytest.param(
  962. pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]',
  963. marks=[pytest.mark.xfail(reason="datetime _values", strict=True)]
  964. ),
  965. pytest.param(
  966. pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]',
  967. marks=[pytest.mark.xfail(reason="timedelta _values", strict=True)]
  968. ),
  969. ])
  970. def test_values_consistent(array, expected_type, dtype):
  971. l_values = pd.Series(array)._values
  972. r_values = pd.Index(array)._values
  973. assert type(l_values) is expected_type
  974. assert type(l_values) is type(r_values)
  975. tm.assert_equal(l_values, r_values)
  976. @pytest.mark.parametrize('array, expected', [
  977. (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
  978. (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
  979. (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')),
  980. (pd.DatetimeIndex(['2017-01-01T00:00:00']),
  981. np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
  982. (pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"),
  983. np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')),
  984. (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')),
  985. (pd.PeriodIndex(['2017', '2018'], freq='D'),
  986. np.array([17167, 17532], dtype=np.int64)),
  987. ])
  988. def test_ndarray_values(array, expected):
  989. l_values = pd.Series(array)._ndarray_values
  990. r_values = pd.Index(array)._ndarray_values
  991. tm.assert_numpy_array_equal(l_values, r_values)
  992. tm.assert_numpy_array_equal(l_values, expected)
  993. @pytest.mark.parametrize("arr", [
  994. np.array([1, 2, 3]),
  995. ])
  996. def test_numpy_array(arr):
  997. ser = pd.Series(arr)
  998. result = ser.array
  999. expected = PandasArray(arr)
  1000. tm.assert_extension_array_equal(result, expected)
  1001. def test_numpy_array_all_dtypes(any_numpy_dtype):
  1002. ser = pd.Series(dtype=any_numpy_dtype)
  1003. result = ser.array
  1004. if is_datetime64_dtype(any_numpy_dtype):
  1005. assert isinstance(result, DatetimeArray)
  1006. elif is_timedelta64_dtype(any_numpy_dtype):
  1007. assert isinstance(result, TimedeltaArray)
  1008. else:
  1009. assert isinstance(result, PandasArray)
  1010. @pytest.mark.parametrize("array, attr", [
  1011. (pd.Categorical(['a', 'b']), '_codes'),
  1012. (pd.core.arrays.period_array(['2000', '2001'], freq='D'), '_data'),
  1013. (pd.core.arrays.integer_array([0, np.nan]), '_data'),
  1014. (pd.core.arrays.IntervalArray.from_breaks([0, 1]), '_left'),
  1015. (pd.SparseArray([0, 1]), '_sparse_values'),
  1016. (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"),
  1017. # tz-aware Datetime
  1018. (DatetimeArray(np.array(['2000-01-01T12:00:00',
  1019. '2000-01-02T12:00:00'],
  1020. dtype='M8[ns]'),
  1021. dtype=DatetimeTZDtype(tz="US/Central")),
  1022. '_data'),
  1023. ])
  1024. @pytest.mark.parametrize('box', [pd.Series, pd.Index])
  1025. def test_array(array, attr, box):
  1026. if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index:
  1027. pytest.skip("No index type for {}".format(array.dtype))
  1028. result = box(array, copy=False).array
  1029. if attr:
  1030. array = getattr(array, attr)
  1031. result = getattr(result, attr)
  1032. assert result is array
  1033. def test_array_multiindex_raises():
  1034. idx = pd.MultiIndex.from_product([['A'], ['a', 'b']])
  1035. with pytest.raises(ValueError, match='MultiIndex'):
  1036. idx.array
  1037. @pytest.mark.parametrize('array, expected', [
  1038. (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
  1039. (pd.Categorical(['a', 'b']), np.array(['a', 'b'], dtype=object)),
  1040. (pd.core.arrays.period_array(['2000', '2001'], freq='D'),
  1041. np.array([pd.Period('2000', freq="D"), pd.Period('2001', freq='D')])),
  1042. (pd.core.arrays.integer_array([0, np.nan]),
  1043. np.array([0, np.nan], dtype=object)),
  1044. (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]),
  1045. np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)),
  1046. (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
  1047. # tz-naive datetime
  1048. (DatetimeArray(np.array(['2000', '2001'], dtype='M8[ns]')),
  1049. np.array(['2000', '2001'], dtype='M8[ns]')),
  1050. # tz-aware stays tz`-aware
  1051. (DatetimeArray(np.array(['2000-01-01T06:00:00',
  1052. '2000-01-02T06:00:00'],
  1053. dtype='M8[ns]'),
  1054. dtype=DatetimeTZDtype(tz='US/Central')),
  1055. np.array([pd.Timestamp('2000-01-01', tz='US/Central'),
  1056. pd.Timestamp('2000-01-02', tz='US/Central')])),
  1057. # Timedelta
  1058. (TimedeltaArray(np.array([0, 3600000000000], dtype='i8'), freq='H'),
  1059. np.array([0, 3600000000000], dtype='m8[ns]')),
  1060. ])
  1061. @pytest.mark.parametrize('box', [pd.Series, pd.Index])
  1062. def test_to_numpy(array, expected, box):
  1063. thing = box(array)
  1064. if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index:
  1065. pytest.skip("No index type for {}".format(array.dtype))
  1066. result = thing.to_numpy()
  1067. tm.assert_numpy_array_equal(result, expected)
  1068. @pytest.mark.parametrize("as_series", [True, False])
  1069. @pytest.mark.parametrize("arr", [
  1070. np.array([1, 2, 3], dtype="int64"),
  1071. np.array(['a', 'b', 'c'], dtype=object),
  1072. ])
  1073. def test_to_numpy_copy(arr, as_series):
  1074. obj = pd.Index(arr, copy=False)
  1075. if as_series:
  1076. obj = pd.Series(obj.values, copy=False)
  1077. # no copy by default
  1078. result = obj.to_numpy()
  1079. assert np.shares_memory(arr, result) is True
  1080. result = obj.to_numpy(copy=False)
  1081. assert np.shares_memory(arr, result) is True
  1082. # copy=True
  1083. result = obj.to_numpy(copy=True)
  1084. assert np.shares_memory(arr, result) is False
  1085. @pytest.mark.parametrize("as_series", [True, False])
  1086. def test_to_numpy_dtype(as_series):
  1087. tz = "US/Eastern"
  1088. obj = pd.DatetimeIndex(['2000', '2001'], tz=tz)
  1089. if as_series:
  1090. obj = pd.Series(obj)
  1091. # preserve tz by default
  1092. result = obj.to_numpy()
  1093. expected = np.array([pd.Timestamp('2000', tz=tz),
  1094. pd.Timestamp('2001', tz=tz)],
  1095. dtype=object)
  1096. tm.assert_numpy_array_equal(result, expected)
  1097. result = obj.to_numpy(dtype="object")
  1098. tm.assert_numpy_array_equal(result, expected)
  1099. result = obj.to_numpy(dtype="M8[ns]")
  1100. expected = np.array(['2000-01-01T05', '2001-01-01T05'],
  1101. dtype='M8[ns]')
  1102. tm.assert_numpy_array_equal(result, expected)