PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/test_base.py

http://github.com/pydata/pandas
Python | 738 lines | 585 code | 114 blank | 39 comment | 83 complexity | f37f0479d05975ca3831996bba81a72c MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import re
  2. from datetime import datetime, timedelta
  3. import numpy as np
  4. import pandas.compat as compat
  5. import pandas as pd
  6. from pandas.compat import u, StringIO
  7. from pandas.core.base import FrozenList, FrozenNDArray, DatetimeIndexOpsMixin
  8. from pandas.util.testing import assertRaisesRegexp, assert_isinstance
  9. from pandas import Series, Index, Int64Index, DatetimeIndex, PeriodIndex
  10. from pandas import _np_version_under1p7
  11. import pandas.tslib as tslib
  12. import nose
  13. import pandas.util.testing as tm
  14. class CheckStringMixin(object):
  15. def test_string_methods_dont_fail(self):
  16. repr(self.container)
  17. str(self.container)
  18. bytes(self.container)
  19. if not compat.PY3:
  20. unicode(self.container)
  21. def test_tricky_container(self):
  22. if not hasattr(self, 'unicode_container'):
  23. raise nose.SkipTest('Need unicode_container to test with this')
  24. repr(self.unicode_container)
  25. str(self.unicode_container)
  26. bytes(self.unicode_container)
  27. if not compat.PY3:
  28. unicode(self.unicode_container)
  29. class CheckImmutable(object):
  30. mutable_regex = re.compile('does not support mutable operations')
  31. def check_mutable_error(self, *args, **kwargs):
  32. # pass whatever functions you normally would to assertRaises (after the Exception kind)
  33. assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs)
  34. def test_no_mutable_funcs(self):
  35. def setitem(): self.container[0] = 5
  36. self.check_mutable_error(setitem)
  37. def setslice(): self.container[1:2] = 3
  38. self.check_mutable_error(setslice)
  39. def delitem(): del self.container[0]
  40. self.check_mutable_error(delitem)
  41. def delslice(): del self.container[0:3]
  42. self.check_mutable_error(delslice)
  43. mutable_methods = getattr(self, "mutable_methods", [])
  44. for meth in mutable_methods:
  45. self.check_mutable_error(getattr(self.container, meth))
  46. def test_slicing_maintains_type(self):
  47. result = self.container[1:2]
  48. expected = self.lst[1:2]
  49. self.check_result(result, expected)
  50. def check_result(self, result, expected, klass=None):
  51. klass = klass or self.klass
  52. assert_isinstance(result, klass)
  53. self.assertEqual(result, expected)
  54. class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase):
  55. mutable_methods = ('extend', 'pop', 'remove', 'insert')
  56. unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"])
  57. def setUp(self):
  58. self.lst = [1, 2, 3, 4, 5]
  59. self.container = FrozenList(self.lst)
  60. self.klass = FrozenList
  61. def test_add(self):
  62. result = self.container + (1, 2, 3)
  63. expected = FrozenList(self.lst + [1, 2, 3])
  64. self.check_result(result, expected)
  65. result = (1, 2, 3) + self.container
  66. expected = FrozenList([1, 2, 3] + self.lst)
  67. self.check_result(result, expected)
  68. def test_inplace(self):
  69. q = r = self.container
  70. q += [5]
  71. self.check_result(q, self.lst + [5])
  72. # other shouldn't be mutated
  73. self.check_result(r, self.lst)
  74. class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase):
  75. mutable_methods = ('put', 'itemset', 'fill')
  76. unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"])
  77. def setUp(self):
  78. self.lst = [3, 5, 7, -2]
  79. self.container = FrozenNDArray(self.lst)
  80. self.klass = FrozenNDArray
  81. def test_shallow_copying(self):
  82. original = self.container.copy()
  83. assert_isinstance(self.container.view(), FrozenNDArray)
  84. self.assertFalse(isinstance(self.container.view(np.ndarray), FrozenNDArray))
  85. self.assertIsNot(self.container.view(), self.container)
  86. self.assert_numpy_array_equal(self.container, original)
  87. # shallow copy should be the same too
  88. assert_isinstance(self.container._shallow_copy(), FrozenNDArray)
  89. # setting should not be allowed
  90. def testit(container): container[0] = 16
  91. self.check_mutable_error(testit, self.container)
  92. def test_values(self):
  93. original = self.container.view(np.ndarray).copy()
  94. n = original[0] + 15
  95. vals = self.container.values()
  96. self.assert_numpy_array_equal(original, vals)
  97. self.assertIsNot(original, vals)
  98. vals[0] = n
  99. self.assert_numpy_array_equal(self.container, original)
  100. self.assertEqual(vals[0], n)
  101. class Ops(tm.TestCase):
  102. def setUp(self):
  103. self.int_index = tm.makeIntIndex(10)
  104. self.float_index = tm.makeFloatIndex(10)
  105. self.dt_index = tm.makeDateIndex(10)
  106. self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern')
  107. self.period_index = tm.makePeriodIndex(10)
  108. self.string_index = tm.makeStringIndex(10)
  109. arr = np.random.randn(10)
  110. self.int_series = Series(arr, index=self.int_index)
  111. self.float_series = Series(arr, index=self.int_index)
  112. self.dt_series = Series(arr, index=self.dt_index)
  113. self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
  114. self.period_series = Series(arr, index=self.period_index)
  115. self.string_series = Series(arr, index=self.string_index)
  116. types = ['int','float','dt', 'dt_tz', 'period','string']
  117. self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in types for f in ['index','series'] ]
  118. def check_ops_properties(self, props, filter=None, ignore_failures=False):
  119. for op in props:
  120. for o in self.is_valid_objs:
  121. # if a filter, skip if it doesn't match
  122. if filter is not None:
  123. filt = o.index if isinstance(o, Series) else o
  124. if not filter(filt):
  125. continue
  126. try:
  127. if isinstance(o, Series):
  128. expected = Series(getattr(o.index,op),index=o.index)
  129. else:
  130. expected = getattr(o,op)
  131. except (AttributeError):
  132. if ignore_failures:
  133. continue
  134. result = getattr(o,op)
  135. # these couuld be series, arrays or scalars
  136. if isinstance(result,Series) and isinstance(expected,Series):
  137. tm.assert_series_equal(result,expected)
  138. elif isinstance(result,Index) and isinstance(expected,Index):
  139. tm.assert_index_equal(result,expected)
  140. elif isinstance(result,np.ndarray) and isinstance(expected,np.ndarray):
  141. self.assert_numpy_array_equal(result,expected)
  142. else:
  143. self.assertEqual(result, expected)
  144. # freq raises AttributeError on an Int64Index because its not defined
  145. # we mostly care about Series hwere anyhow
  146. if not ignore_failures:
  147. for o in self.not_valid_objs:
  148. # an object that is datetimelike will raise a TypeError, otherwise
  149. # an AttributeError
  150. if issubclass(type(o), DatetimeIndexOpsMixin):
  151. self.assertRaises(TypeError, lambda : getattr(o,op))
  152. else:
  153. self.assertRaises(AttributeError, lambda : getattr(o,op))
  154. class TestIndexOps(Ops):
  155. def setUp(self):
  156. super(TestIndexOps, self).setUp()
  157. self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ]
  158. self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ]
  159. def test_ops(self):
  160. tm._skip_if_not_numpy17_friendly()
  161. for op in ['max','min']:
  162. for o in self.objs:
  163. result = getattr(o,op)()
  164. if not isinstance(o, PeriodIndex):
  165. expected = getattr(o.values, op)()
  166. else:
  167. expected = pd.Period(ordinal=getattr(o.values, op)(), freq=o.freq)
  168. try:
  169. self.assertEqual(result, expected)
  170. except ValueError:
  171. # comparing tz-aware series with np.array results in ValueError
  172. expected = expected.astype('M8[ns]').astype('int64')
  173. self.assertEqual(result.value, expected)
  174. def test_nanops(self):
  175. # GH 7261
  176. for op in ['max','min']:
  177. for klass in [Index, Series]:
  178. obj = klass([np.nan, 2.0])
  179. self.assertEqual(getattr(obj, op)(), 2.0)
  180. obj = klass([np.nan])
  181. self.assertTrue(pd.isnull(getattr(obj, op)()))
  182. obj = klass([])
  183. self.assertTrue(pd.isnull(getattr(obj, op)()))
  184. obj = klass([pd.NaT, datetime(2011, 11, 1)])
  185. # check DatetimeIndex monotonic path
  186. self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1))
  187. obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
  188. # check DatetimeIndex non-monotonic path
  189. self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1))
  190. def test_value_counts_unique_nunique(self):
  191. for o in self.objs:
  192. klass = type(o)
  193. values = o.values
  194. # create repeated values, 'n'th element is repeated by n+1 times
  195. if isinstance(o, PeriodIndex):
  196. # freq must be specified because repeat makes freq ambiguous
  197. o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
  198. else:
  199. o = klass(np.repeat(values, range(1, len(o) + 1)))
  200. expected_s = Series(range(10, 0, -1), index=values[::-1], dtype='int64')
  201. tm.assert_series_equal(o.value_counts(), expected_s)
  202. if isinstance(o, DatetimeIndex):
  203. # DatetimeIndex.unique returns DatetimeIndex
  204. self.assertTrue(o.unique().equals(klass(values)))
  205. else:
  206. self.assert_numpy_array_equal(o.unique(), values)
  207. self.assertEqual(o.nunique(), len(np.unique(o.values)))
  208. for null_obj in [np.nan, None]:
  209. for o in self.objs:
  210. klass = type(o)
  211. values = o.values
  212. if o.values.dtype == 'int64':
  213. # skips int64 because it doesn't allow to include nan or None
  214. continue
  215. if o.values.dtype == 'datetime64[ns]' and _np_version_under1p7:
  216. # Unable to assign None
  217. continue
  218. # special assign to the numpy array
  219. if o.values.dtype == 'datetime64[ns]':
  220. values[0:2] = pd.tslib.iNaT
  221. else:
  222. values[0:2] = null_obj
  223. # create repeated values, 'n'th element is repeated by n+1 times
  224. if isinstance(o, PeriodIndex):
  225. o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
  226. else:
  227. o = klass(np.repeat(values, range(1, len(o) + 1)))
  228. if isinstance(o, DatetimeIndex):
  229. expected_s_na = Series(list(range(10, 2, -1)) + [3], index=values[9:0:-1])
  230. expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1])
  231. else:
  232. expected_s_na = Series(list(range(10, 2, -1)) +[3], index=values[9:0:-1], dtype='int64')
  233. expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1], dtype='int64')
  234. tm.assert_series_equal(o.value_counts(dropna=False), expected_s_na)
  235. tm.assert_series_equal(o.value_counts(), expected_s)
  236. # numpy_array_equal cannot compare arrays includes nan
  237. result = o.unique()
  238. self.assert_numpy_array_equal(result[1:], values[2:])
  239. if isinstance(o, DatetimeIndex):
  240. self.assertTrue(result[0] is pd.NaT)
  241. else:
  242. self.assertTrue(pd.isnull(result[0]))
  243. self.assertEqual(o.nunique(), 8)
  244. self.assertEqual(o.nunique(dropna=False), 9)
  245. def test_value_counts_inferred(self):
  246. klasses = [Index, Series]
  247. for klass in klasses:
  248. s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
  249. s = klass(s_values)
  250. expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
  251. tm.assert_series_equal(s.value_counts(), expected)
  252. self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
  253. self.assertEqual(s.nunique(), 4)
  254. # don't sort, have to sort after the fact as not sorting is platform-dep
  255. hist = s.value_counts(sort=False)
  256. hist.sort()
  257. expected = Series([3, 1, 4, 2], index=list('acbd'))
  258. expected.sort()
  259. tm.assert_series_equal(hist, expected)
  260. # sort ascending
  261. hist = s.value_counts(ascending=True)
  262. expected = Series([1, 2, 3, 4], index=list('cdab'))
  263. tm.assert_series_equal(hist, expected)
  264. # relative histogram.
  265. hist = s.value_counts(normalize=True)
  266. expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
  267. tm.assert_series_equal(hist, expected)
  268. # bins
  269. self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1)
  270. s1 = Series([1, 1, 2, 3])
  271. res1 = s1.value_counts(bins=1)
  272. exp1 = Series({0.998: 4})
  273. tm.assert_series_equal(res1, exp1)
  274. res1n = s1.value_counts(bins=1, normalize=True)
  275. exp1n = Series({0.998: 1.0})
  276. tm.assert_series_equal(res1n, exp1n)
  277. self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
  278. self.assertEqual(s1.nunique(), 3)
  279. res4 = s1.value_counts(bins=4)
  280. exp4 = Series({0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1}, index=[0.998, 2.5, 1.5, 2.0])
  281. tm.assert_series_equal(res4, exp4)
  282. res4n = s1.value_counts(bins=4, normalize=True)
  283. exp4n = Series({0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0])
  284. tm.assert_series_equal(res4n, exp4n)
  285. # handle NA's properly
  286. s_values = ['a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b']
  287. s = klass(s_values)
  288. expected = Series([4, 3, 2], index=['b', 'a', 'd'])
  289. tm.assert_series_equal(s.value_counts(), expected)
  290. self.assert_numpy_array_equal(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
  291. self.assertEqual(s.nunique(), 3)
  292. s = klass({})
  293. expected = Series([], dtype=np.int64)
  294. tm.assert_series_equal(s.value_counts(), expected)
  295. self.assert_numpy_array_equal(s.unique(), np.array([]))
  296. self.assertEqual(s.nunique(), 0)
  297. # GH 3002, datetime64[ns]
  298. txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
  299. 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'])
  300. f = StringIO(txt)
  301. df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"],
  302. parse_dates=["dt"])
  303. s = klass(df['dt'].copy())
  304. idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X'])
  305. expected_s = Series([3, 2, 1], index=idx)
  306. tm.assert_series_equal(s.value_counts(), expected_s)
  307. expected = np.array(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z'],
  308. dtype='datetime64[ns]')
  309. if isinstance(s, DatetimeIndex):
  310. expected = DatetimeIndex(expected)
  311. self.assertTrue(s.unique().equals(expected))
  312. else:
  313. self.assert_numpy_array_equal(s.unique(), expected)
  314. self.assertEqual(s.nunique(), 3)
  315. # with NaT
  316. s = df['dt'].copy()
  317. s = klass([v for v in s.values] + [pd.NaT])
  318. result = s.value_counts()
  319. self.assertEqual(result.index.dtype, 'datetime64[ns]')
  320. tm.assert_series_equal(result, expected_s)
  321. result = s.value_counts(dropna=False)
  322. expected_s[pd.NaT] = 1
  323. tm.assert_series_equal(result, expected_s)
  324. unique = s.unique()
  325. self.assertEqual(unique.dtype, 'datetime64[ns]')
  326. # numpy_array_equal cannot compare pd.NaT
  327. self.assert_numpy_array_equal(unique[:3], expected)
  328. self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT)
  329. self.assertEqual(s.nunique(), 3)
  330. self.assertEqual(s.nunique(dropna=False), 4)
  331. # timedelta64[ns]
  332. td = df.dt - df.dt + timedelta(1)
  333. td = klass(td)
  334. result = td.value_counts()
  335. expected_s = Series([6], index=[86400000000000])
  336. self.assertEqual(result.index.dtype, 'int64')
  337. tm.assert_series_equal(result, expected_s)
  338. # get nanoseconds to compare
  339. expected = np.array([86400000000000])
  340. self.assert_numpy_array_equal(td.unique(), expected)
  341. self.assertEqual(td.nunique(), 1)
  342. td2 = timedelta(1) + (df.dt - df.dt)
  343. td2 = klass(td2)
  344. result2 = td2.value_counts()
  345. self.assertEqual(result2.index.dtype, 'int64')
  346. tm.assert_series_equal(result2, expected_s)
  347. self.assert_numpy_array_equal(td.unique(), expected)
  348. self.assertEqual(td.nunique(), 1)
  349. def test_factorize(self):
  350. for o in self.objs:
  351. exp_arr = np.array(range(len(o)))
  352. labels, uniques = o.factorize()
  353. self.assert_numpy_array_equal(labels, exp_arr)
  354. if isinstance(o, Series):
  355. expected = Index(o.values)
  356. self.assert_numpy_array_equal(uniques, expected)
  357. else:
  358. self.assertTrue(uniques.equals(o))
  359. for o in self.objs:
  360. # sort by value, and create duplicates
  361. if isinstance(o, Series):
  362. o.sort()
  363. else:
  364. indexer = o.argsort()
  365. o = o.take(indexer)
  366. n = o[5:].append(o)
  367. exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
  368. labels, uniques = n.factorize(sort=True)
  369. self.assert_numpy_array_equal(labels, exp_arr)
  370. if isinstance(o, Series):
  371. expected = Index(o.values)
  372. self.assert_numpy_array_equal(uniques, expected)
  373. else:
  374. self.assertTrue(uniques.equals(o))
  375. exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4])
  376. labels, uniques = n.factorize(sort=False)
  377. self.assert_numpy_array_equal(labels, exp_arr)
  378. if isinstance(o, Series):
  379. expected = Index(np.concatenate([o.values[5:10], o.values[:5]]))
  380. self.assert_numpy_array_equal(uniques, expected)
  381. else:
  382. expected = o[5:].append(o[:5])
  383. self.assertTrue(uniques.equals(expected))
  384. class TestDatetimeIndexOps(Ops):
  385. _allowed = '_allow_datetime_index_ops'
  386. def setUp(self):
  387. super(TestDatetimeIndexOps, self).setUp()
  388. mask = lambda x: x._allow_datetime_index_ops or x._allow_period_index_ops
  389. self.is_valid_objs = [ o for o in self.objs if mask(o) ]
  390. self.not_valid_objs = [ o for o in self.objs if not mask(o) ]
  391. def test_ops_properties(self):
  392. self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter'])
  393. self.check_ops_properties(['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start',
  394. 'is_quarter_end', 'is_year_start', 'is_year_end'], lambda x: isinstance(x,DatetimeIndex))
  395. def test_ops_properties_basic(self):
  396. # sanity check that the behavior didn't change
  397. # GH7206
  398. for op in ['year','day','second','weekday']:
  399. self.assertRaises(TypeError, lambda x: getattr(self.dt_series,op))
  400. # attribute access should still work!
  401. s = Series(dict(year=2000,month=1,day=10))
  402. self.assertEquals(s.year,2000)
  403. self.assertEquals(s.month,1)
  404. self.assertEquals(s.day,10)
  405. self.assertRaises(AttributeError, lambda : s.weekday)
  406. def test_asobject_tolist(self):
  407. idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx')
  408. expected_list = [pd.Timestamp('2013-01-31'), pd.Timestamp('2013-02-28'),
  409. pd.Timestamp('2013-03-31'), pd.Timestamp('2013-04-30')]
  410. expected = pd.Index(expected_list, dtype=object, name='idx')
  411. result = idx.asobject
  412. self.assertTrue(isinstance(result, Index))
  413. self.assertEqual(result.dtype, object)
  414. self.assertTrue(result.equals(expected))
  415. self.assertEqual(result.name, expected.name)
  416. self.assertEqual(idx.tolist(), expected_list)
  417. idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo')
  418. expected_list = [pd.Timestamp('2013-01-31', tz='Asia/Tokyo'),
  419. pd.Timestamp('2013-02-28', tz='Asia/Tokyo'),
  420. pd.Timestamp('2013-03-31', tz='Asia/Tokyo'),
  421. pd.Timestamp('2013-04-30', tz='Asia/Tokyo')]
  422. expected = pd.Index(expected_list, dtype=object, name='idx')
  423. result = idx.asobject
  424. self.assertTrue(isinstance(result, Index))
  425. self.assertEqual(result.dtype, object)
  426. self.assertTrue(result.equals(expected))
  427. self.assertEqual(result.name, expected.name)
  428. self.assertEqual(idx.tolist(), expected_list)
  429. idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
  430. pd.NaT, datetime(2013, 1, 4)], name='idx')
  431. expected_list = [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'),
  432. pd.NaT, pd.Timestamp('2013-01-04')]
  433. expected = pd.Index(expected_list, dtype=object, name='idx')
  434. result = idx.asobject
  435. self.assertTrue(isinstance(result, Index))
  436. self.assertEqual(result.dtype, object)
  437. self.assertTrue(result.equals(expected))
  438. self.assertEqual(result.name, expected.name)
  439. self.assertEqual(idx.tolist(), expected_list)
  440. def test_minmax(self):
  441. for tz in [None, 'Asia/Tokyo', 'US/Eastern']:
  442. # monotonic
  443. idx1 = pd.DatetimeIndex([pd.NaT, '2011-01-01', '2011-01-02',
  444. '2011-01-03'], tz=tz)
  445. self.assertTrue(idx1.is_monotonic)
  446. # non-monotonic
  447. idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03',
  448. '2011-01-02', pd.NaT], tz=tz)
  449. self.assertFalse(idx2.is_monotonic)
  450. for idx in [idx1, idx2]:
  451. self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz))
  452. self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz))
  453. for op in ['min', 'max']:
  454. # Return NaT
  455. obj = DatetimeIndex([])
  456. self.assertTrue(pd.isnull(getattr(obj, op)()))
  457. obj = DatetimeIndex([pd.NaT])
  458. self.assertTrue(pd.isnull(getattr(obj, op)()))
  459. obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT])
  460. self.assertTrue(pd.isnull(getattr(obj, op)()))
  461. def test_representation(self):
  462. idx1 = DatetimeIndex([], freq='D')
  463. idx2 = DatetimeIndex(['2011-01-01'], freq='D')
  464. idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
  465. idx4 = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
  466. idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'],
  467. freq='H', tz='Asia/Tokyo')
  468. idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
  469. tz='US/Eastern')
  470. exp1 = """<class 'pandas.tseries.index.DatetimeIndex'>
  471. Length: 0, Freq: D, Timezone: None"""
  472. exp2 = """<class 'pandas.tseries.index.DatetimeIndex'>
  473. [2011-01-01]
  474. Length: 1, Freq: D, Timezone: None"""
  475. exp3 = """<class 'pandas.tseries.index.DatetimeIndex'>
  476. [2011-01-01, 2011-01-02]
  477. Length: 2, Freq: D, Timezone: None"""
  478. exp4 = """<class 'pandas.tseries.index.DatetimeIndex'>
  479. [2011-01-01, ..., 2011-01-03]
  480. Length: 3, Freq: D, Timezone: None"""
  481. exp5 = """<class 'pandas.tseries.index.DatetimeIndex'>
  482. [2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00]
  483. Length: 3, Freq: H, Timezone: Asia/Tokyo"""
  484. exp6 = """<class 'pandas.tseries.index.DatetimeIndex'>
  485. [2011-01-01 09:00:00-05:00, ..., NaT]
  486. Length: 3, Freq: None, Timezone: US/Eastern"""
  487. for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
  488. [exp1, exp2, exp3, exp4, exp5, exp6]):
  489. for func in ['__repr__', '__unicode__', '__str__']:
  490. result = getattr(idx, func)()
  491. self.assertEqual(result, expected)
  492. class TestPeriodIndexOps(Ops):
  493. _allowed = '_allow_period_index_ops'
  494. def setUp(self):
  495. super(TestPeriodIndexOps, self).setUp()
  496. mask = lambda x: x._allow_datetime_index_ops or x._allow_period_index_ops
  497. self.is_valid_objs = [ o for o in self.objs if mask(o) ]
  498. self.not_valid_objs = [ o for o in self.objs if not mask(o) ]
  499. def test_ops_properties(self):
  500. self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter'])
  501. self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex))
  502. def test_asobject_tolist(self):
  503. idx = pd.period_range(start='2013-01-01', periods=4, freq='M', name='idx')
  504. expected_list = [pd.Period('2013-01-31', freq='M'), pd.Period('2013-02-28', freq='M'),
  505. pd.Period('2013-03-31', freq='M'), pd.Period('2013-04-30', freq='M')]
  506. expected = pd.Index(expected_list, dtype=object, name='idx')
  507. result = idx.asobject
  508. self.assertTrue(isinstance(result, Index))
  509. self.assertEqual(result.dtype, object)
  510. self.assertTrue(result.equals(expected))
  511. self.assertEqual(result.name, expected.name)
  512. self.assertEqual(idx.tolist(), expected_list)
  513. idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx')
  514. expected_list = [pd.Period('2013-01-01', freq='D'), pd.Period('2013-01-02', freq='D'),
  515. pd.Period('NaT', freq='D'), pd.Period('2013-01-04', freq='D')]
  516. expected = pd.Index(expected_list, dtype=object, name='idx')
  517. result = idx.asobject
  518. self.assertTrue(isinstance(result, Index))
  519. self.assertEqual(result.dtype, object)
  520. for i in [0, 1, 3]:
  521. self.assertTrue(result[i], expected[i])
  522. self.assertTrue(result[2].ordinal, pd.tslib.iNaT)
  523. self.assertTrue(result[2].freq, 'D')
  524. self.assertEqual(result.name, expected.name)
  525. result_list = idx.tolist()
  526. for i in [0, 1, 3]:
  527. self.assertTrue(result_list[i], expected_list[i])
  528. self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT)
  529. self.assertTrue(result_list[2].freq, 'D')
  530. def test_minmax(self):
  531. # monotonic
  532. idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02',
  533. '2011-01-03'], freq='D')
  534. self.assertTrue(idx1.is_monotonic)
  535. # non-monotonic
  536. idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03',
  537. '2011-01-02', pd.NaT], freq='D')
  538. self.assertFalse(idx2.is_monotonic)
  539. for idx in [idx1, idx2]:
  540. self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D'))
  541. self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D'))
  542. for op in ['min', 'max']:
  543. # Return NaT
  544. obj = PeriodIndex([], freq='M')
  545. result = getattr(obj, op)()
  546. self.assertEqual(result.ordinal, tslib.iNaT)
  547. self.assertEqual(result.freq, 'M')
  548. obj = PeriodIndex([pd.NaT], freq='M')
  549. result = getattr(obj, op)()
  550. self.assertEqual(result.ordinal, tslib.iNaT)
  551. self.assertEqual(result.freq, 'M')
  552. obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M')
  553. result = getattr(obj, op)()
  554. self.assertEqual(result.ordinal, tslib.iNaT)
  555. self.assertEqual(result.freq, 'M')
  556. def test_representation(self):
  557. # GH 7601
  558. idx1 = PeriodIndex([], freq='D')
  559. idx2 = PeriodIndex(['2011-01-01'], freq='D')
  560. idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
  561. idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
  562. idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A')
  563. idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], freq='H')
  564. idx7 = pd.period_range('2013Q1', periods=1, freq="Q")
  565. idx8 = pd.period_range('2013Q1', periods=2, freq="Q")
  566. idx9 = pd.period_range('2013Q1', periods=3, freq="Q")
  567. exp1 = """<class 'pandas.tseries.period.PeriodIndex'>
  568. Length: 0, Freq: D"""
  569. exp2 = """<class 'pandas.tseries.period.PeriodIndex'>
  570. [2011-01-01]
  571. Length: 1, Freq: D"""
  572. exp3 = """<class 'pandas.tseries.period.PeriodIndex'>
  573. [2011-01-01, 2011-01-02]
  574. Length: 2, Freq: D"""
  575. exp4 = """<class 'pandas.tseries.period.PeriodIndex'>
  576. [2011-01-01, ..., 2011-01-03]
  577. Length: 3, Freq: D"""
  578. exp5 = """<class 'pandas.tseries.period.PeriodIndex'>
  579. [2011, ..., 2013]
  580. Length: 3, Freq: A-DEC"""
  581. exp6 = """<class 'pandas.tseries.period.PeriodIndex'>
  582. [2011-01-01 09:00, ..., NaT]
  583. Length: 3, Freq: H"""
  584. exp7 = """<class 'pandas.tseries.period.PeriodIndex'>
  585. [2013Q1]
  586. Length: 1, Freq: Q-DEC"""
  587. exp8 = """<class 'pandas.tseries.period.PeriodIndex'>
  588. [2013Q1, 2013Q2]
  589. Length: 2, Freq: Q-DEC"""
  590. exp9 = """<class 'pandas.tseries.period.PeriodIndex'>
  591. [2013Q1, ..., 2013Q3]
  592. Length: 3, Freq: Q-DEC"""
  593. for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
  594. [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):
  595. for func in ['__repr__', '__unicode__', '__str__']:
  596. result = getattr(idx, func)()
  597. self.assertEqual(result, expected)
  598. if __name__ == '__main__':
  599. import nose
  600. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  601. # '--with-coverage', '--cover-package=pandas.core'],
  602. exit=False)