PageRenderTime 60ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/tests/types/test_inference.py

http://github.com/wesm/pandas
Python | 847 lines | 796 code | 39 blank | 12 comment | 12 complexity | 5c6e46cd79d9f937933cec8c6796d0c4 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # -*- coding: utf-8 -*-
  2. """
  3. These the test the public routines exposed in types/common.py
  4. related to inference and not otherwise tested in types/test_common.py
  5. """
  6. import nose
  7. import collections
  8. import re
  9. from datetime import datetime, date, timedelta, time
  10. import numpy as np
  11. import pandas as pd
  12. from pandas import lib, tslib
  13. from pandas import (Series, Index, DataFrame, Timedelta,
  14. DatetimeIndex, TimedeltaIndex, Timestamp,
  15. Panel, Period)
  16. from pandas.compat import u, PY2, lrange
  17. from pandas.types import inference
  18. from pandas.types.common import (is_timedelta64_dtype,
  19. is_timedelta64_ns_dtype,
  20. is_number,
  21. is_integer,
  22. is_float,
  23. is_bool,
  24. is_scalar,
  25. _ensure_int32)
  26. from pandas.types.missing import isnull
  27. from pandas.util import testing as tm
  28. _multiprocess_can_split_ = True
  29. def test_is_sequence():
  30. is_seq = inference.is_sequence
  31. assert (is_seq((1, 2)))
  32. assert (is_seq([1, 2]))
  33. assert (not is_seq("abcd"))
  34. assert (not is_seq(u("abcd")))
  35. assert (not is_seq(np.int64))
  36. class A(object):
  37. def __getitem__(self):
  38. return 1
  39. assert (not is_seq(A()))
  40. def test_is_list_like():
  41. passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
  42. Series([]), Series(['a']).str)
  43. fails = (1, '2', object())
  44. for p in passes:
  45. assert inference.is_list_like(p)
  46. for f in fails:
  47. assert not inference.is_list_like(f)
  48. def test_is_dict_like():
  49. passes = [{}, {'A': 1}, Series([1])]
  50. fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])]
  51. for p in passes:
  52. assert inference.is_dict_like(p)
  53. for f in fails:
  54. assert not inference.is_dict_like(f)
  55. def test_is_named_tuple():
  56. passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), )
  57. fails = ((1, 2, 3), 'a', Series({'pi': 3.14}))
  58. for p in passes:
  59. assert inference.is_named_tuple(p)
  60. for f in fails:
  61. assert not inference.is_named_tuple(f)
  62. def test_is_hashable():
  63. # all new-style classes are hashable by default
  64. class HashableClass(object):
  65. pass
  66. class UnhashableClass1(object):
  67. __hash__ = None
  68. class UnhashableClass2(object):
  69. def __hash__(self):
  70. raise TypeError("Not hashable")
  71. hashable = (1,
  72. 3.14,
  73. np.float64(3.14),
  74. 'a',
  75. tuple(),
  76. (1, ),
  77. HashableClass(), )
  78. not_hashable = ([], UnhashableClass1(), )
  79. abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), )
  80. for i in hashable:
  81. assert inference.is_hashable(i)
  82. for i in not_hashable:
  83. assert not inference.is_hashable(i)
  84. for i in abc_hashable_not_really_hashable:
  85. assert not inference.is_hashable(i)
  86. # numpy.array is no longer collections.Hashable as of
  87. # https://github.com/numpy/numpy/pull/5326, just test
  88. # is_hashable()
  89. assert not inference.is_hashable(np.array([]))
  90. # old-style classes in Python 2 don't appear hashable to
  91. # collections.Hashable but also seem to support hash() by default
  92. if PY2:
  93. class OldStyleClass():
  94. pass
  95. c = OldStyleClass()
  96. assert not isinstance(c, collections.Hashable)
  97. assert inference.is_hashable(c)
  98. hash(c) # this will not raise
  99. def test_is_re():
  100. passes = re.compile('ad'),
  101. fails = 'x', 2, 3, object()
  102. for p in passes:
  103. assert inference.is_re(p)
  104. for f in fails:
  105. assert not inference.is_re(f)
  106. def test_is_recompilable():
  107. passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'),
  108. re.compile(r''))
  109. fails = 1, [], object()
  110. for p in passes:
  111. assert inference.is_re_compilable(p)
  112. for f in fails:
  113. assert not inference.is_re_compilable(f)
  114. class TestInference(tm.TestCase):
  115. def test_infer_dtype_bytes(self):
  116. compare = 'string' if PY2 else 'bytes'
  117. # string array of bytes
  118. arr = np.array(list('abc'), dtype='S1')
  119. self.assertEqual(lib.infer_dtype(arr), compare)
  120. # object array of bytes
  121. arr = arr.astype(object)
  122. self.assertEqual(lib.infer_dtype(arr), compare)
  123. def test_isinf_scalar(self):
  124. # GH 11352
  125. self.assertTrue(lib.isposinf_scalar(float('inf')))
  126. self.assertTrue(lib.isposinf_scalar(np.inf))
  127. self.assertFalse(lib.isposinf_scalar(-np.inf))
  128. self.assertFalse(lib.isposinf_scalar(1))
  129. self.assertFalse(lib.isposinf_scalar('a'))
  130. self.assertTrue(lib.isneginf_scalar(float('-inf')))
  131. self.assertTrue(lib.isneginf_scalar(-np.inf))
  132. self.assertFalse(lib.isneginf_scalar(np.inf))
  133. self.assertFalse(lib.isneginf_scalar(1))
  134. self.assertFalse(lib.isneginf_scalar('a'))
  135. def test_maybe_convert_numeric_infinities(self):
  136. # see gh-13274
  137. infinities = ['inf', 'inF', 'iNf', 'Inf',
  138. 'iNF', 'InF', 'INf', 'INF']
  139. na_values = set(['', 'NULL', 'nan'])
  140. pos = np.array(['inf'], dtype=np.float64)
  141. neg = np.array(['-inf'], dtype=np.float64)
  142. msg = "Unable to parse string"
  143. for infinity in infinities:
  144. for maybe_int in (True, False):
  145. out = lib.maybe_convert_numeric(
  146. np.array([infinity], dtype=object),
  147. na_values, maybe_int)
  148. tm.assert_numpy_array_equal(out, pos)
  149. out = lib.maybe_convert_numeric(
  150. np.array(['-' + infinity], dtype=object),
  151. na_values, maybe_int)
  152. tm.assert_numpy_array_equal(out, neg)
  153. out = lib.maybe_convert_numeric(
  154. np.array([u(infinity)], dtype=object),
  155. na_values, maybe_int)
  156. tm.assert_numpy_array_equal(out, pos)
  157. out = lib.maybe_convert_numeric(
  158. np.array(['+' + infinity], dtype=object),
  159. na_values, maybe_int)
  160. tm.assert_numpy_array_equal(out, pos)
  161. # too many characters
  162. with tm.assertRaisesRegexp(ValueError, msg):
  163. lib.maybe_convert_numeric(
  164. np.array(['foo_' + infinity], dtype=object),
  165. na_values, maybe_int)
  166. def test_maybe_convert_numeric_post_floatify_nan(self):
  167. # see gh-13314
  168. data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
  169. expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
  170. nan_values = set([-999, -999.0])
  171. for coerce_type in (True, False):
  172. out = lib.maybe_convert_numeric(data, nan_values, coerce_type)
  173. tm.assert_numpy_array_equal(out, expected)
  174. def test_convert_infs(self):
  175. arr = np.array(['inf', 'inf', 'inf'], dtype='O')
  176. result = lib.maybe_convert_numeric(arr, set(), False)
  177. self.assertTrue(result.dtype == np.float64)
  178. arr = np.array(['-inf', '-inf', '-inf'], dtype='O')
  179. result = lib.maybe_convert_numeric(arr, set(), False)
  180. self.assertTrue(result.dtype == np.float64)
  181. def test_scientific_no_exponent(self):
  182. # See PR 12215
  183. arr = np.array(['42E', '2E', '99e', '6e'], dtype='O')
  184. result = lib.maybe_convert_numeric(arr, set(), False, True)
  185. self.assertTrue(np.all(np.isnan(result)))
  186. def test_convert_non_hashable(self):
  187. # GH13324
  188. # make sure that we are handing non-hashables
  189. arr = np.array([[10.0, 2], 1.0, 'apple'])
  190. result = lib.maybe_convert_numeric(arr, set(), False, True)
  191. tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
  192. class TestTypeInference(tm.TestCase):
  193. _multiprocess_can_split_ = True
  194. def test_length_zero(self):
  195. result = lib.infer_dtype(np.array([], dtype='i4'))
  196. self.assertEqual(result, 'integer')
  197. result = lib.infer_dtype([])
  198. self.assertEqual(result, 'empty')
  199. def test_integers(self):
  200. arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O')
  201. result = lib.infer_dtype(arr)
  202. self.assertEqual(result, 'integer')
  203. arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O')
  204. result = lib.infer_dtype(arr)
  205. self.assertEqual(result, 'mixed-integer')
  206. arr = np.array([1, 2, 3, 4, 5], dtype='i4')
  207. result = lib.infer_dtype(arr)
  208. self.assertEqual(result, 'integer')
  209. def test_bools(self):
  210. arr = np.array([True, False, True, True, True], dtype='O')
  211. result = lib.infer_dtype(arr)
  212. self.assertEqual(result, 'boolean')
  213. arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
  214. result = lib.infer_dtype(arr)
  215. self.assertEqual(result, 'boolean')
  216. arr = np.array([True, False, True, 'foo'], dtype='O')
  217. result = lib.infer_dtype(arr)
  218. self.assertEqual(result, 'mixed')
  219. arr = np.array([True, False, True], dtype=bool)
  220. result = lib.infer_dtype(arr)
  221. self.assertEqual(result, 'boolean')
  222. def test_floats(self):
  223. arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
  224. result = lib.infer_dtype(arr)
  225. self.assertEqual(result, 'floating')
  226. arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
  227. dtype='O')
  228. result = lib.infer_dtype(arr)
  229. self.assertEqual(result, 'mixed-integer')
  230. arr = np.array([1, 2, 3, 4, 5], dtype='f4')
  231. result = lib.infer_dtype(arr)
  232. self.assertEqual(result, 'floating')
  233. arr = np.array([1, 2, 3, 4, 5], dtype='f8')
  234. result = lib.infer_dtype(arr)
  235. self.assertEqual(result, 'floating')
  236. def test_string(self):
  237. pass
  238. def test_unicode(self):
  239. pass
  240. def test_datetime(self):
  241. dates = [datetime(2012, 1, x) for x in range(1, 20)]
  242. index = Index(dates)
  243. self.assertEqual(index.inferred_type, 'datetime64')
  244. def test_infer_dtype_datetime(self):
  245. arr = np.array([Timestamp('2011-01-01'),
  246. Timestamp('2011-01-02')])
  247. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  248. arr = np.array([np.datetime64('2011-01-01'),
  249. np.datetime64('2011-01-01')], dtype=object)
  250. self.assertEqual(lib.infer_dtype(arr), 'datetime64')
  251. arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)])
  252. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  253. # starts with nan
  254. for n in [pd.NaT, np.nan]:
  255. arr = np.array([n, pd.Timestamp('2011-01-02')])
  256. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  257. arr = np.array([n, np.datetime64('2011-01-02')])
  258. self.assertEqual(lib.infer_dtype(arr), 'datetime64')
  259. arr = np.array([n, datetime(2011, 1, 1)])
  260. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  261. arr = np.array([n, pd.Timestamp('2011-01-02'), n])
  262. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  263. arr = np.array([n, np.datetime64('2011-01-02'), n])
  264. self.assertEqual(lib.infer_dtype(arr), 'datetime64')
  265. arr = np.array([n, datetime(2011, 1, 1), n])
  266. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  267. # different type of nat
  268. arr = np.array([np.timedelta64('nat'),
  269. np.datetime64('2011-01-02')], dtype=object)
  270. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  271. arr = np.array([np.datetime64('2011-01-02'),
  272. np.timedelta64('nat')], dtype=object)
  273. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  274. # mixed datetime
  275. arr = np.array([datetime(2011, 1, 1),
  276. pd.Timestamp('2011-01-02')])
  277. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  278. # should be datetime?
  279. arr = np.array([np.datetime64('2011-01-01'),
  280. pd.Timestamp('2011-01-02')])
  281. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  282. arr = np.array([pd.Timestamp('2011-01-02'),
  283. np.datetime64('2011-01-01')])
  284. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  285. arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1])
  286. self.assertEqual(lib.infer_dtype(arr), 'mixed-integer')
  287. arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1])
  288. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  289. arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')])
  290. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  291. def test_infer_dtype_timedelta(self):
  292. arr = np.array([pd.Timedelta('1 days'),
  293. pd.Timedelta('2 days')])
  294. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  295. arr = np.array([np.timedelta64(1, 'D'),
  296. np.timedelta64(2, 'D')], dtype=object)
  297. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  298. arr = np.array([timedelta(1), timedelta(2)])
  299. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  300. # starts with nan
  301. for n in [pd.NaT, np.nan]:
  302. arr = np.array([n, Timedelta('1 days')])
  303. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  304. arr = np.array([n, np.timedelta64(1, 'D')])
  305. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  306. arr = np.array([n, timedelta(1)])
  307. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  308. arr = np.array([n, pd.Timedelta('1 days'), n])
  309. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  310. arr = np.array([n, np.timedelta64(1, 'D'), n])
  311. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  312. arr = np.array([n, timedelta(1), n])
  313. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  314. # different type of nat
  315. arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')],
  316. dtype=object)
  317. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  318. arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')],
  319. dtype=object)
  320. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  321. def test_infer_dtype_period(self):
  322. # GH 13664
  323. arr = np.array([pd.Period('2011-01', freq='D'),
  324. pd.Period('2011-02', freq='D')])
  325. self.assertEqual(pd.lib.infer_dtype(arr), 'period')
  326. arr = np.array([pd.Period('2011-01', freq='D'),
  327. pd.Period('2011-02', freq='M')])
  328. self.assertEqual(pd.lib.infer_dtype(arr), 'period')
  329. # starts with nan
  330. for n in [pd.NaT, np.nan]:
  331. arr = np.array([n, pd.Period('2011-01', freq='D')])
  332. self.assertEqual(pd.lib.infer_dtype(arr), 'period')
  333. arr = np.array([n, pd.Period('2011-01', freq='D'), n])
  334. self.assertEqual(pd.lib.infer_dtype(arr), 'period')
  335. # different type of nat
  336. arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
  337. dtype=object)
  338. self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')
  339. arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
  340. dtype=object)
  341. self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')
  342. def test_infer_dtype_all_nan_nat_like(self):
  343. arr = np.array([np.nan, np.nan])
  344. self.assertEqual(lib.infer_dtype(arr), 'floating')
  345. # nan and None mix are result in mixed
  346. arr = np.array([np.nan, np.nan, None])
  347. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  348. arr = np.array([None, np.nan, np.nan])
  349. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  350. # pd.NaT
  351. arr = np.array([pd.NaT])
  352. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  353. arr = np.array([pd.NaT, np.nan])
  354. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  355. arr = np.array([np.nan, pd.NaT])
  356. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  357. arr = np.array([np.nan, pd.NaT, np.nan])
  358. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  359. arr = np.array([None, pd.NaT, None])
  360. self.assertEqual(lib.infer_dtype(arr), 'datetime')
  361. # np.datetime64(nat)
  362. arr = np.array([np.datetime64('nat')])
  363. self.assertEqual(lib.infer_dtype(arr), 'datetime64')
  364. for n in [np.nan, pd.NaT, None]:
  365. arr = np.array([n, np.datetime64('nat'), n])
  366. self.assertEqual(lib.infer_dtype(arr), 'datetime64')
  367. arr = np.array([pd.NaT, n, np.datetime64('nat'), n])
  368. self.assertEqual(lib.infer_dtype(arr), 'datetime64')
  369. arr = np.array([np.timedelta64('nat')], dtype=object)
  370. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  371. for n in [np.nan, pd.NaT, None]:
  372. arr = np.array([n, np.timedelta64('nat'), n])
  373. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  374. arr = np.array([pd.NaT, n, np.timedelta64('nat'), n])
  375. self.assertEqual(lib.infer_dtype(arr), 'timedelta')
  376. # datetime / timedelta mixed
  377. arr = np.array([pd.NaT, np.datetime64('nat'),
  378. np.timedelta64('nat'), np.nan])
  379. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  380. arr = np.array([np.timedelta64('nat'), np.datetime64('nat')],
  381. dtype=object)
  382. self.assertEqual(lib.infer_dtype(arr), 'mixed')
  383. def test_is_datetimelike_array_all_nan_nat_like(self):
  384. arr = np.array([np.nan, pd.NaT, np.datetime64('nat')])
  385. self.assertTrue(lib.is_datetime_array(arr))
  386. self.assertTrue(lib.is_datetime64_array(arr))
  387. self.assertFalse(lib.is_timedelta_array(arr))
  388. self.assertFalse(lib.is_timedelta64_array(arr))
  389. self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr))
  390. arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')])
  391. self.assertFalse(lib.is_datetime_array(arr))
  392. self.assertFalse(lib.is_datetime64_array(arr))
  393. self.assertTrue(lib.is_timedelta_array(arr))
  394. self.assertTrue(lib.is_timedelta64_array(arr))
  395. self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr))
  396. arr = np.array([np.nan, pd.NaT, np.datetime64('nat'),
  397. np.timedelta64('nat')])
  398. self.assertFalse(lib.is_datetime_array(arr))
  399. self.assertFalse(lib.is_datetime64_array(arr))
  400. self.assertFalse(lib.is_timedelta_array(arr))
  401. self.assertFalse(lib.is_timedelta64_array(arr))
  402. self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr))
  403. arr = np.array([np.nan, pd.NaT])
  404. self.assertTrue(lib.is_datetime_array(arr))
  405. self.assertTrue(lib.is_datetime64_array(arr))
  406. self.assertTrue(lib.is_timedelta_array(arr))
  407. self.assertTrue(lib.is_timedelta64_array(arr))
  408. self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr))
  409. arr = np.array([np.nan, np.nan], dtype=object)
  410. self.assertFalse(lib.is_datetime_array(arr))
  411. self.assertFalse(lib.is_datetime64_array(arr))
  412. self.assertFalse(lib.is_timedelta_array(arr))
  413. self.assertFalse(lib.is_timedelta64_array(arr))
  414. self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr))
  415. def test_date(self):
  416. dates = [date(2012, 1, x) for x in range(1, 20)]
  417. index = Index(dates)
  418. self.assertEqual(index.inferred_type, 'date')
  419. def test_to_object_array_tuples(self):
  420. r = (5, 6)
  421. values = [r]
  422. result = lib.to_object_array_tuples(values)
  423. try:
  424. # make sure record array works
  425. from collections import namedtuple
  426. record = namedtuple('record', 'x y')
  427. r = record(5, 6)
  428. values = [r]
  429. result = lib.to_object_array_tuples(values) # noqa
  430. except ImportError:
  431. pass
  432. def test_object(self):
  433. # GH 7431
  434. # cannot infer more than this as only a single element
  435. arr = np.array([None], dtype='O')
  436. result = lib.infer_dtype(arr)
  437. self.assertEqual(result, 'mixed')
  438. def test_to_object_array_width(self):
  439. # see gh-13320
  440. rows = [[1, 2, 3], [4, 5, 6]]
  441. expected = np.array(rows, dtype=object)
  442. out = lib.to_object_array(rows)
  443. tm.assert_numpy_array_equal(out, expected)
  444. expected = np.array(rows, dtype=object)
  445. out = lib.to_object_array(rows, min_width=1)
  446. tm.assert_numpy_array_equal(out, expected)
  447. expected = np.array([[1, 2, 3, None, None],
  448. [4, 5, 6, None, None]], dtype=object)
  449. out = lib.to_object_array(rows, min_width=5)
  450. tm.assert_numpy_array_equal(out, expected)
  451. def test_is_period(self):
  452. self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M')))
  453. self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')))
  454. self.assertFalse(lib.is_period(pd.Timestamp('2011-01')))
  455. self.assertFalse(lib.is_period(1))
  456. self.assertFalse(lib.is_period(np.nan))
  457. def test_categorical(self):
  458. # GH 8974
  459. from pandas import Categorical, Series
  460. arr = Categorical(list('abc'))
  461. result = lib.infer_dtype(arr)
  462. self.assertEqual(result, 'categorical')
  463. result = lib.infer_dtype(Series(arr))
  464. self.assertEqual(result, 'categorical')
  465. arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
  466. result = lib.infer_dtype(arr)
  467. self.assertEqual(result, 'categorical')
  468. result = lib.infer_dtype(Series(arr))
  469. self.assertEqual(result, 'categorical')
  470. class TestNumberScalar(tm.TestCase):
  471. def test_is_number(self):
  472. self.assertTrue(is_number(True))
  473. self.assertTrue(is_number(1))
  474. self.assertTrue(is_number(1.1))
  475. self.assertTrue(is_number(1 + 3j))
  476. self.assertTrue(is_number(np.bool(False)))
  477. self.assertTrue(is_number(np.int64(1)))
  478. self.assertTrue(is_number(np.float64(1.1)))
  479. self.assertTrue(is_number(np.complex128(1 + 3j)))
  480. self.assertTrue(is_number(np.nan))
  481. self.assertFalse(is_number(None))
  482. self.assertFalse(is_number('x'))
  483. self.assertFalse(is_number(datetime(2011, 1, 1)))
  484. self.assertFalse(is_number(np.datetime64('2011-01-01')))
  485. self.assertFalse(is_number(Timestamp('2011-01-01')))
  486. self.assertFalse(is_number(Timestamp('2011-01-01',
  487. tz='US/Eastern')))
  488. self.assertFalse(is_number(timedelta(1000)))
  489. self.assertFalse(is_number(Timedelta('1 days')))
  490. # questionable
  491. self.assertFalse(is_number(np.bool_(False)))
  492. self.assertTrue(is_number(np.timedelta64(1, 'D')))
  493. def test_is_bool(self):
  494. self.assertTrue(is_bool(True))
  495. self.assertTrue(is_bool(np.bool(False)))
  496. self.assertTrue(is_bool(np.bool_(False)))
  497. self.assertFalse(is_bool(1))
  498. self.assertFalse(is_bool(1.1))
  499. self.assertFalse(is_bool(1 + 3j))
  500. self.assertFalse(is_bool(np.int64(1)))
  501. self.assertFalse(is_bool(np.float64(1.1)))
  502. self.assertFalse(is_bool(np.complex128(1 + 3j)))
  503. self.assertFalse(is_bool(np.nan))
  504. self.assertFalse(is_bool(None))
  505. self.assertFalse(is_bool('x'))
  506. self.assertFalse(is_bool(datetime(2011, 1, 1)))
  507. self.assertFalse(is_bool(np.datetime64('2011-01-01')))
  508. self.assertFalse(is_bool(Timestamp('2011-01-01')))
  509. self.assertFalse(is_bool(Timestamp('2011-01-01',
  510. tz='US/Eastern')))
  511. self.assertFalse(is_bool(timedelta(1000)))
  512. self.assertFalse(is_bool(np.timedelta64(1, 'D')))
  513. self.assertFalse(is_bool(Timedelta('1 days')))
  514. def test_is_integer(self):
  515. self.assertTrue(is_integer(1))
  516. self.assertTrue(is_integer(np.int64(1)))
  517. self.assertFalse(is_integer(True))
  518. self.assertFalse(is_integer(1.1))
  519. self.assertFalse(is_integer(1 + 3j))
  520. self.assertFalse(is_integer(np.bool(False)))
  521. self.assertFalse(is_integer(np.bool_(False)))
  522. self.assertFalse(is_integer(np.float64(1.1)))
  523. self.assertFalse(is_integer(np.complex128(1 + 3j)))
  524. self.assertFalse(is_integer(np.nan))
  525. self.assertFalse(is_integer(None))
  526. self.assertFalse(is_integer('x'))
  527. self.assertFalse(is_integer(datetime(2011, 1, 1)))
  528. self.assertFalse(is_integer(np.datetime64('2011-01-01')))
  529. self.assertFalse(is_integer(Timestamp('2011-01-01')))
  530. self.assertFalse(is_integer(Timestamp('2011-01-01',
  531. tz='US/Eastern')))
  532. self.assertFalse(is_integer(timedelta(1000)))
  533. self.assertFalse(is_integer(Timedelta('1 days')))
  534. # questionable
  535. self.assertTrue(is_integer(np.timedelta64(1, 'D')))
  536. def test_is_float(self):
  537. self.assertTrue(is_float(1.1))
  538. self.assertTrue(is_float(np.float64(1.1)))
  539. self.assertTrue(is_float(np.nan))
  540. self.assertFalse(is_float(True))
  541. self.assertFalse(is_float(1))
  542. self.assertFalse(is_float(1 + 3j))
  543. self.assertFalse(is_float(np.bool(False)))
  544. self.assertFalse(is_float(np.bool_(False)))
  545. self.assertFalse(is_float(np.int64(1)))
  546. self.assertFalse(is_float(np.complex128(1 + 3j)))
  547. self.assertFalse(is_float(None))
  548. self.assertFalse(is_float('x'))
  549. self.assertFalse(is_float(datetime(2011, 1, 1)))
  550. self.assertFalse(is_float(np.datetime64('2011-01-01')))
  551. self.assertFalse(is_float(Timestamp('2011-01-01')))
  552. self.assertFalse(is_float(Timestamp('2011-01-01',
  553. tz='US/Eastern')))
  554. self.assertFalse(is_float(timedelta(1000)))
  555. self.assertFalse(is_float(np.timedelta64(1, 'D')))
  556. self.assertFalse(is_float(Timedelta('1 days')))
  557. def test_is_timedelta(self):
  558. self.assertTrue(is_timedelta64_dtype('timedelta64'))
  559. self.assertTrue(is_timedelta64_dtype('timedelta64[ns]'))
  560. self.assertFalse(is_timedelta64_ns_dtype('timedelta64'))
  561. self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]'))
  562. tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64')
  563. self.assertTrue(is_timedelta64_dtype(tdi))
  564. self.assertTrue(is_timedelta64_ns_dtype(tdi))
  565. self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]')))
  566. # Conversion to Int64Index:
  567. self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64')))
  568. self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')))
  569. class Testisscalar(tm.TestCase):
  570. def test_isscalar_builtin_scalars(self):
  571. self.assertTrue(is_scalar(None))
  572. self.assertTrue(is_scalar(True))
  573. self.assertTrue(is_scalar(False))
  574. self.assertTrue(is_scalar(0.))
  575. self.assertTrue(is_scalar(np.nan))
  576. self.assertTrue(is_scalar('foobar'))
  577. self.assertTrue(is_scalar(b'foobar'))
  578. self.assertTrue(is_scalar(u('efoobar')))
  579. self.assertTrue(is_scalar(datetime(2014, 1, 1)))
  580. self.assertTrue(is_scalar(date(2014, 1, 1)))
  581. self.assertTrue(is_scalar(time(12, 0)))
  582. self.assertTrue(is_scalar(timedelta(hours=1)))
  583. self.assertTrue(is_scalar(pd.NaT))
  584. def test_isscalar_builtin_nonscalars(self):
  585. self.assertFalse(is_scalar({}))
  586. self.assertFalse(is_scalar([]))
  587. self.assertFalse(is_scalar([1]))
  588. self.assertFalse(is_scalar(()))
  589. self.assertFalse(is_scalar((1, )))
  590. self.assertFalse(is_scalar(slice(None)))
  591. self.assertFalse(is_scalar(Ellipsis))
  592. def test_isscalar_numpy_array_scalars(self):
  593. self.assertTrue(is_scalar(np.int64(1)))
  594. self.assertTrue(is_scalar(np.float64(1.)))
  595. self.assertTrue(is_scalar(np.int32(1)))
  596. self.assertTrue(is_scalar(np.object_('foobar')))
  597. self.assertTrue(is_scalar(np.str_('foobar')))
  598. self.assertTrue(is_scalar(np.unicode_(u('foobar'))))
  599. self.assertTrue(is_scalar(np.bytes_(b'foobar')))
  600. self.assertTrue(is_scalar(np.datetime64('2014-01-01')))
  601. self.assertTrue(is_scalar(np.timedelta64(1, 'h')))
  602. def test_isscalar_numpy_zerodim_arrays(self):
  603. for zerodim in [np.array(1), np.array('foobar'),
  604. np.array(np.datetime64('2014-01-01')),
  605. np.array(np.timedelta64(1, 'h')),
  606. np.array(np.datetime64('NaT'))]:
  607. self.assertFalse(is_scalar(zerodim))
  608. self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim)))
  609. def test_isscalar_numpy_arrays(self):
  610. self.assertFalse(is_scalar(np.array([])))
  611. self.assertFalse(is_scalar(np.array([[]])))
  612. self.assertFalse(is_scalar(np.matrix('1; 2')))
  613. def test_isscalar_pandas_scalars(self):
  614. self.assertTrue(is_scalar(Timestamp('2014-01-01')))
  615. self.assertTrue(is_scalar(Timedelta(hours=1)))
  616. self.assertTrue(is_scalar(Period('2014-01-01')))
  617. def test_lisscalar_pandas_containers(self):
  618. self.assertFalse(is_scalar(Series()))
  619. self.assertFalse(is_scalar(Series([1])))
  620. self.assertFalse(is_scalar(DataFrame()))
  621. self.assertFalse(is_scalar(DataFrame([[1]])))
  622. self.assertFalse(is_scalar(Panel()))
  623. self.assertFalse(is_scalar(Panel([[[1]]])))
  624. self.assertFalse(is_scalar(Index([])))
  625. self.assertFalse(is_scalar(Index([1])))
  626. def test_datetimeindex_from_empty_datetime64_array():
  627. for unit in ['ms', 'us', 'ns']:
  628. idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))
  629. assert (len(idx) == 0)
  630. def test_nan_to_nat_conversions():
  631. df = DataFrame(dict({
  632. 'A': np.asarray(
  633. lrange(10), dtype='float64'),
  634. 'B': Timestamp('20010101')
  635. }))
  636. df.iloc[3:6, :] = np.nan
  637. result = df.loc[4, 'B'].value
  638. assert (result == tslib.iNaT)
  639. s = df['B'].copy()
  640. s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan)
  641. assert (isnull(s[8]))
  642. # numpy < 1.7.0 is wrong
  643. from distutils.version import LooseVersion
  644. if LooseVersion(np.__version__) >= '1.7.0':
  645. assert (s[8].value == np.datetime64('NaT').astype(np.int64))
  646. def test_ensure_int32():
  647. values = np.arange(10, dtype=np.int32)
  648. result = _ensure_int32(values)
  649. assert (result.dtype == np.int32)
  650. values = np.arange(10, dtype=np.int64)
  651. result = _ensure_int32(values)
  652. assert (result.dtype == np.int32)
  653. if __name__ == '__main__':
  654. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  655. exit=False)