PageRenderTime 69ms CodeModel.GetById 47ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/tests/arrays/categorical/test_indexing.py

https://github.com/jseabold/pandas
Python | 289 lines | 223 code | 43 blank | 23 comment | 9 complexity | ce9068d273194a3825f5772ade378d4e MD5 | raw file
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
  5. import pandas._testing as tm
  6. import pandas.core.common as com
  7. from pandas.tests.arrays.categorical.common import TestCategorical
  8. class TestCategoricalIndexingWithFactor(TestCategorical):
  9. def test_getitem(self):
  10. assert self.factor[0] == "a"
  11. assert self.factor[-1] == "c"
  12. subf = self.factor[[0, 1, 2]]
  13. tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8))
  14. subf = self.factor[np.asarray(self.factor) == "c"]
  15. tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8))
  16. def test_setitem(self):
  17. # int/positional
  18. c = self.factor.copy()
  19. c[0] = "b"
  20. assert c[0] == "b"
  21. c[-1] = "a"
  22. assert c[-1] == "a"
  23. # boolean
  24. c = self.factor.copy()
  25. indexer = np.zeros(len(c), dtype="bool")
  26. indexer[0] = True
  27. indexer[-1] = True
  28. c[indexer] = "c"
  29. expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
  30. tm.assert_categorical_equal(c, expected)
  31. @pytest.mark.parametrize(
  32. "other",
  33. [pd.Categorical(["b", "a"]), pd.Categorical(["b", "a"], categories=["b", "a"])],
  34. )
  35. def test_setitem_same_but_unordered(self, other):
  36. # GH-24142
  37. target = pd.Categorical(["a", "b"], categories=["a", "b"])
  38. mask = np.array([True, False])
  39. target[mask] = other[mask]
  40. expected = pd.Categorical(["b", "b"], categories=["a", "b"])
  41. tm.assert_categorical_equal(target, expected)
  42. @pytest.mark.parametrize(
  43. "other",
  44. [
  45. pd.Categorical(["b", "a"], categories=["b", "a", "c"]),
  46. pd.Categorical(["b", "a"], categories=["a", "b", "c"]),
  47. pd.Categorical(["a", "a"], categories=["a"]),
  48. pd.Categorical(["b", "b"], categories=["b"]),
  49. ],
  50. )
  51. def test_setitem_different_unordered_raises(self, other):
  52. # GH-24142
  53. target = pd.Categorical(["a", "b"], categories=["a", "b"])
  54. mask = np.array([True, False])
  55. msg = "Cannot set a Categorical with another, without identical categories"
  56. with pytest.raises(ValueError, match=msg):
  57. target[mask] = other[mask]
  58. @pytest.mark.parametrize(
  59. "other",
  60. [
  61. pd.Categorical(["b", "a"]),
  62. pd.Categorical(["b", "a"], categories=["b", "a"], ordered=True),
  63. pd.Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True),
  64. ],
  65. )
  66. def test_setitem_same_ordered_rasies(self, other):
  67. # Gh-24142
  68. target = pd.Categorical(["a", "b"], categories=["a", "b"], ordered=True)
  69. mask = np.array([True, False])
  70. msg = "Cannot set a Categorical with another, without identical categories"
  71. with pytest.raises(ValueError, match=msg):
  72. target[mask] = other[mask]
  73. class TestCategoricalIndexing:
  74. def test_getitem_slice(self):
  75. cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
  76. sliced = cat[3]
  77. assert sliced == "d"
  78. sliced = cat[3:5]
  79. expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"])
  80. tm.assert_categorical_equal(sliced, expected)
  81. def test_getitem_listlike(self):
  82. # GH 9469
  83. # properly coerce the input indexers
  84. np.random.seed(1)
  85. c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
  86. result = c.codes[np.array([100000]).astype(np.int64)]
  87. expected = c[np.array([100000]).astype(np.int64)].codes
  88. tm.assert_numpy_array_equal(result, expected)
  89. def test_periodindex(self):
  90. idx1 = PeriodIndex(
  91. ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M"
  92. )
  93. cat1 = Categorical(idx1)
  94. str(cat1)
  95. exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
  96. exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
  97. tm.assert_numpy_array_equal(cat1._codes, exp_arr)
  98. tm.assert_index_equal(cat1.categories, exp_idx)
  99. idx2 = PeriodIndex(
  100. ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M"
  101. )
  102. cat2 = Categorical(idx2, ordered=True)
  103. str(cat2)
  104. exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
  105. exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
  106. tm.assert_numpy_array_equal(cat2._codes, exp_arr)
  107. tm.assert_index_equal(cat2.categories, exp_idx2)
  108. idx3 = PeriodIndex(
  109. [
  110. "2013-12",
  111. "2013-11",
  112. "2013-10",
  113. "2013-09",
  114. "2013-08",
  115. "2013-07",
  116. "2013-05",
  117. ],
  118. freq="M",
  119. )
  120. cat3 = Categorical(idx3, ordered=True)
  121. exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
  122. exp_idx = PeriodIndex(
  123. [
  124. "2013-05",
  125. "2013-07",
  126. "2013-08",
  127. "2013-09",
  128. "2013-10",
  129. "2013-11",
  130. "2013-12",
  131. ],
  132. freq="M",
  133. )
  134. tm.assert_numpy_array_equal(cat3._codes, exp_arr)
  135. tm.assert_index_equal(cat3.categories, exp_idx)
  136. def test_categories_assignments(self):
  137. s = Categorical(["a", "b", "c", "a"])
  138. exp = np.array([1, 2, 3, 1], dtype=np.int64)
  139. s.categories = [1, 2, 3]
  140. tm.assert_numpy_array_equal(s.__array__(), exp)
  141. tm.assert_index_equal(s.categories, Index([1, 2, 3]))
  142. @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]])
  143. def test_categories_assignments_wrong_length_raises(self, new_categories):
  144. cat = Categorical(["a", "b", "c", "a"])
  145. msg = (
  146. "new categories need to have the same number of items "
  147. "as the old categories!"
  148. )
  149. with pytest.raises(ValueError, match=msg):
  150. cat.categories = new_categories
  151. # Combinations of sorted/unique:
  152. @pytest.mark.parametrize(
  153. "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]]
  154. )
  155. # Combinations of missing/unique
  156. @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
  157. @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
  158. def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
  159. # GH 21448
  160. key = key_class(key_values, categories=range(1, 5))
  161. # Test for flat index and CategoricalIndex with same/different cats:
  162. for dtype in None, "category", key.dtype:
  163. idx = Index(idx_values, dtype=dtype)
  164. expected, exp_miss = idx.get_indexer_non_unique(key_values)
  165. result, res_miss = idx.get_indexer_non_unique(key)
  166. tm.assert_numpy_array_equal(expected, result)
  167. tm.assert_numpy_array_equal(exp_miss, res_miss)
  168. def test_where_unobserved_nan(self):
  169. ser = pd.Series(pd.Categorical(["a", "b"]))
  170. result = ser.where([True, False])
  171. expected = pd.Series(pd.Categorical(["a", None], categories=["a", "b"]))
  172. tm.assert_series_equal(result, expected)
  173. # all NA
  174. ser = pd.Series(pd.Categorical(["a", "b"]))
  175. result = ser.where([False, False])
  176. expected = pd.Series(pd.Categorical([None, None], categories=["a", "b"]))
  177. tm.assert_series_equal(result, expected)
  178. def test_where_unobserved_categories(self):
  179. ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
  180. result = ser.where([True, True, False], other="b")
  181. expected = pd.Series(
  182. Categorical(["a", "b", "b"], categories=ser.cat.categories)
  183. )
  184. tm.assert_series_equal(result, expected)
  185. def test_where_other_categorical(self):
  186. ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
  187. other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"])
  188. result = ser.where([True, False, True], other)
  189. expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype))
  190. tm.assert_series_equal(result, expected)
  191. def test_where_new_category_raises(self):
  192. ser = pd.Series(Categorical(["a", "b", "c"]))
  193. msg = "Cannot setitem on a Categorical with a new category"
  194. with pytest.raises(ValueError, match=msg):
  195. ser.where([True, False, True], "d")
  196. def test_where_ordered_differs_rasies(self):
  197. ser = pd.Series(
  198. Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True)
  199. )
  200. other = Categorical(
  201. ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
  202. )
  203. with pytest.raises(ValueError, match="without identical categories"):
  204. ser.where([True, False, True], other)
  205. @pytest.mark.parametrize("index", [True, False])
  206. def test_mask_with_boolean(index):
  207. s = Series(range(3))
  208. idx = Categorical([True, False, True])
  209. if index:
  210. idx = CategoricalIndex(idx)
  211. assert com.is_bool_indexer(idx)
  212. result = s[idx]
  213. expected = s[idx.astype("object")]
  214. tm.assert_series_equal(result, expected)
  215. @pytest.mark.parametrize("index", [True, False])
  216. def test_mask_with_boolean_na_treated_as_false(index):
  217. # https://github.com/pandas-dev/pandas/issues/31503
  218. s = Series(range(3))
  219. idx = Categorical([True, False, None])
  220. if index:
  221. idx = CategoricalIndex(idx)
  222. result = s[idx]
  223. expected = s[idx.fillna(False)]
  224. tm.assert_series_equal(result, expected)
  225. @pytest.fixture
  226. def non_coercible_categorical(monkeypatch):
  227. """
  228. Monkeypatch Categorical.__array__ to ensure no implicit conversion.
  229. Raises
  230. ------
  231. ValueError
  232. When Categorical.__array__ is called.
  233. """
  234. # TODO(Categorical): identify other places where this may be
  235. # useful and move to a conftest.py
  236. def array(self, dtype=None):
  237. raise ValueError("I cannot be converted.")
  238. with monkeypatch.context() as m:
  239. m.setattr(Categorical, "__array__", array)
  240. yield
  241. def test_series_at(non_coercible_categorical):
  242. arr = Categorical(["a", "b", "c"])
  243. ser = Series(arr)
  244. result = ser.at[0]
  245. assert result == "a"