PageRenderTime 939ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/series/test_misc_api.py

https://github.com/hoffstein/pandas
Python | 343 lines | 232 code | 74 blank | 37 comment | 27 complexity | 362abf522518a8987f5af2c68e7196fe MD5 | raw file
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. import numpy as np
  4. import pandas as pd
  5. from pandas import Index, Series, DataFrame, date_range
  6. from pandas.tseries.index import Timestamp
  7. from pandas.compat import range
  8. from pandas import compat
  9. import pandas.formats.printing as printing
  10. from pandas.util.testing import (assert_series_equal,
  11. ensure_clean)
  12. import pandas.util.testing as tm
  13. from .common import TestData
  14. class SharedWithSparse(object):
  15. def test_scalarop_preserve_name(self):
  16. result = self.ts * 2
  17. self.assertEqual(result.name, self.ts.name)
  18. def test_copy_name(self):
  19. result = self.ts.copy()
  20. self.assertEqual(result.name, self.ts.name)
  21. def test_copy_index_name_checking(self):
  22. # don't want to be able to modify the index stored elsewhere after
  23. # making a copy
  24. self.ts.index.name = None
  25. self.assertIsNone(self.ts.index.name)
  26. self.assertIs(self.ts, self.ts)
  27. cp = self.ts.copy()
  28. cp.index.name = 'foo'
  29. printing.pprint_thing(self.ts.index.name)
  30. self.assertIsNone(self.ts.index.name)
  31. def test_append_preserve_name(self):
  32. result = self.ts[:5].append(self.ts[5:])
  33. self.assertEqual(result.name, self.ts.name)
  34. def test_binop_maybe_preserve_name(self):
  35. # names match, preserve
  36. result = self.ts * self.ts
  37. self.assertEqual(result.name, self.ts.name)
  38. result = self.ts.mul(self.ts)
  39. self.assertEqual(result.name, self.ts.name)
  40. result = self.ts * self.ts[:-2]
  41. self.assertEqual(result.name, self.ts.name)
  42. # names don't match, don't preserve
  43. cp = self.ts.copy()
  44. cp.name = 'something else'
  45. result = self.ts + cp
  46. self.assertIsNone(result.name)
  47. result = self.ts.add(cp)
  48. self.assertIsNone(result.name)
  49. ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow']
  50. ops = ops + ['r' + op for op in ops]
  51. for op in ops:
  52. # names match, preserve
  53. s = self.ts.copy()
  54. result = getattr(s, op)(s)
  55. self.assertEqual(result.name, self.ts.name)
  56. # names don't match, don't preserve
  57. cp = self.ts.copy()
  58. cp.name = 'changed'
  59. result = getattr(s, op)(cp)
  60. self.assertIsNone(result.name)
  61. def test_combine_first_name(self):
  62. result = self.ts.combine_first(self.ts[:5])
  63. self.assertEqual(result.name, self.ts.name)
  64. def test_getitem_preserve_name(self):
  65. result = self.ts[self.ts > 0]
  66. self.assertEqual(result.name, self.ts.name)
  67. result = self.ts[[0, 2, 4]]
  68. self.assertEqual(result.name, self.ts.name)
  69. result = self.ts[5:10]
  70. self.assertEqual(result.name, self.ts.name)
  71. def test_pickle(self):
  72. unp_series = self._pickle_roundtrip(self.series)
  73. unp_ts = self._pickle_roundtrip(self.ts)
  74. assert_series_equal(unp_series, self.series)
  75. assert_series_equal(unp_ts, self.ts)
  76. def _pickle_roundtrip(self, obj):
  77. with ensure_clean() as path:
  78. obj.to_pickle(path)
  79. unpickled = pd.read_pickle(path)
  80. return unpickled
  81. def test_argsort_preserve_name(self):
  82. result = self.ts.argsort()
  83. self.assertEqual(result.name, self.ts.name)
  84. def test_sort_index_name(self):
  85. result = self.ts.sort_index(ascending=False)
  86. self.assertEqual(result.name, self.ts.name)
  87. def test_to_sparse_pass_name(self):
  88. result = self.ts.to_sparse()
  89. self.assertEqual(result.name, self.ts.name)
  90. class TestSeriesMisc(TestData, SharedWithSparse, tm.TestCase):
  91. _multiprocess_can_split_ = True
  92. def test_tab_completion(self):
  93. # GH 9910
  94. s = Series(list('abcd'))
  95. # Series of str values should have .str but not .dt/.cat in __dir__
  96. self.assertTrue('str' in dir(s))
  97. self.assertTrue('dt' not in dir(s))
  98. self.assertTrue('cat' not in dir(s))
  99. # similiarly for .dt
  100. s = Series(date_range('1/1/2015', periods=5))
  101. self.assertTrue('dt' in dir(s))
  102. self.assertTrue('str' not in dir(s))
  103. self.assertTrue('cat' not in dir(s))
  104. # similiarly for .cat, but with the twist that str and dt should be
  105. # there if the categories are of that type first cat and str
  106. s = Series(list('abbcd'), dtype="category")
  107. self.assertTrue('cat' in dir(s))
  108. self.assertTrue('str' in dir(s)) # as it is a string categorical
  109. self.assertTrue('dt' not in dir(s))
  110. # similar to cat and str
  111. s = Series(date_range('1/1/2015', periods=5)).astype("category")
  112. self.assertTrue('cat' in dir(s))
  113. self.assertTrue('str' not in dir(s))
  114. self.assertTrue('dt' in dir(s)) # as it is a datetime categorical
  115. def test_not_hashable(self):
  116. s_empty = Series()
  117. s = Series([1])
  118. self.assertRaises(TypeError, hash, s_empty)
  119. self.assertRaises(TypeError, hash, s)
  120. def test_contains(self):
  121. tm.assert_contains_all(self.ts.index, self.ts)
  122. def test_iter(self):
  123. for i, val in enumerate(self.series):
  124. self.assertEqual(val, self.series[i])
  125. for i, val in enumerate(self.ts):
  126. self.assertEqual(val, self.ts[i])
  127. def test_iter_box(self):
  128. vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
  129. s = pd.Series(vals)
  130. self.assertEqual(s.dtype, 'datetime64[ns]')
  131. for res, exp in zip(s, vals):
  132. self.assertIsInstance(res, pd.Timestamp)
  133. self.assertEqual(res, exp)
  134. self.assertIsNone(res.tz)
  135. vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  136. pd.Timestamp('2011-01-02', tz='US/Eastern')]
  137. s = pd.Series(vals)
  138. self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]')
  139. for res, exp in zip(s, vals):
  140. self.assertIsInstance(res, pd.Timestamp)
  141. self.assertEqual(res, exp)
  142. self.assertEqual(res.tz, exp.tz)
  143. # timedelta
  144. vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
  145. s = pd.Series(vals)
  146. self.assertEqual(s.dtype, 'timedelta64[ns]')
  147. for res, exp in zip(s, vals):
  148. self.assertIsInstance(res, pd.Timedelta)
  149. self.assertEqual(res, exp)
  150. # period (object dtype, not boxed)
  151. vals = [pd.Period('2011-01-01', freq='M'),
  152. pd.Period('2011-01-02', freq='M')]
  153. s = pd.Series(vals)
  154. self.assertEqual(s.dtype, 'object')
  155. for res, exp in zip(s, vals):
  156. self.assertIsInstance(res, pd.Period)
  157. self.assertEqual(res, exp)
  158. self.assertEqual(res.freq, 'M')
  159. def test_keys(self):
  160. # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
  161. # to .keys() in a list()
  162. getkeys = self.ts.keys
  163. self.assertIs(getkeys(), self.ts.index)
  164. def test_values(self):
  165. self.assert_numpy_array_equal(self.ts, self.ts.values)
  166. def test_iteritems(self):
  167. for idx, val in compat.iteritems(self.series):
  168. self.assertEqual(val, self.series[idx])
  169. for idx, val in compat.iteritems(self.ts):
  170. self.assertEqual(val, self.ts[idx])
  171. # assert is lazy (genrators don't define reverse, lists do)
  172. self.assertFalse(hasattr(self.series.iteritems(), 'reverse'))
  173. def test_raise_on_info(self):
  174. s = Series(np.random.randn(10))
  175. with tm.assertRaises(AttributeError):
  176. s.info()
  177. def test_copy(self):
  178. for deep in [None, False, True]:
  179. s = Series(np.arange(10), dtype='float64')
  180. # default deep is True
  181. if deep is None:
  182. s2 = s.copy()
  183. else:
  184. s2 = s.copy(deep=deep)
  185. s2[::2] = np.NaN
  186. if deep is None or deep is True:
  187. # Did not modify original Series
  188. self.assertTrue(np.isnan(s2[0]))
  189. self.assertFalse(np.isnan(s[0]))
  190. else:
  191. # we DID modify the original Series
  192. self.assertTrue(np.isnan(s2[0]))
  193. self.assertTrue(np.isnan(s[0]))
  194. # GH 11794
  195. # copy of tz-aware
  196. expected = Series([Timestamp('2012/01/01', tz='UTC')])
  197. expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
  198. for deep in [None, False, True]:
  199. s = Series([Timestamp('2012/01/01', tz='UTC')])
  200. if deep is None:
  201. s2 = s.copy()
  202. else:
  203. s2 = s.copy(deep=deep)
  204. s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
  205. # default deep is True
  206. if deep is None or deep is True:
  207. assert_series_equal(s, expected)
  208. assert_series_equal(s2, expected2)
  209. else:
  210. assert_series_equal(s, expected2)
  211. assert_series_equal(s2, expected2)
  212. def test_axis_alias(self):
  213. s = Series([1, 2, np.nan])
  214. assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
  215. self.assertEqual(s.dropna().sum('rows'), 3)
  216. self.assertEqual(s._get_axis_number('rows'), 0)
  217. self.assertEqual(s._get_axis_name('rows'), 'index')
  218. def test_numpy_unique(self):
  219. # it works!
  220. np.unique(self.ts)
  221. def test_ndarray_compat(self):
  222. # test numpy compat with Series as sub-class of NDFrame
  223. tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
  224. index=date_range('1/1/2000', periods=1000))
  225. def f(x):
  226. return x[x.argmax()]
  227. result = tsdf.apply(f)
  228. expected = tsdf.max()
  229. assert_series_equal(result, expected)
  230. # .item()
  231. s = Series([1])
  232. result = s.item()
  233. self.assertEqual(result, 1)
  234. self.assertEqual(s.item(), s.iloc[0])
  235. # using an ndarray like function
  236. s = Series(np.random.randn(10))
  237. result = np.ones_like(s)
  238. expected = Series(1, index=range(10), dtype='float64')
  239. # assert_series_equal(result,expected)
  240. # ravel
  241. s = Series(np.random.randn(10))
  242. tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F'))
  243. # compress
  244. # GH 6658
  245. s = Series([0, 1., -1], index=list('abc'))
  246. result = np.compress(s > 0, s)
  247. assert_series_equal(result, Series([1.], index=['b']))
  248. result = np.compress(s < -1, s)
  249. # result empty Index(dtype=object) as the same as original
  250. exp = Series([], dtype='float64', index=Index([], dtype='object'))
  251. assert_series_equal(result, exp)
  252. s = Series([0, 1., -1], index=[.1, .2, .3])
  253. result = np.compress(s > 0, s)
  254. assert_series_equal(result, Series([1.], index=[.2]))
  255. result = np.compress(s < -1, s)
  256. # result empty Float64Index as the same as original
  257. exp = Series([], dtype='float64', index=Index([], dtype='float64'))
  258. assert_series_equal(result, exp)
  259. def test_str_attribute(self):
  260. # GH9068
  261. methods = ['strip', 'rstrip', 'lstrip']
  262. s = Series([' jack', 'jill ', ' jesse ', 'frank'])
  263. for method in methods:
  264. expected = Series([getattr(str, method)(x) for x in s.values])
  265. assert_series_equal(getattr(Series.str, method)(s.str), expected)
  266. # str accessor only valid with string values
  267. s = Series(range(5))
  268. with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'):
  269. s.str.repeat(2)