PageRenderTime 205ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/series/test_ufunc.py

https://github.com/jreback/pandas
Python | 304 lines | 211 code | 70 blank | 23 comment | 20 complexity | 0efc0a085d508aeb79cd56c7cf45a1f6 MD5 | raw file
  1. from collections import deque
  2. import string
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. import pandas._testing as tm
  7. from pandas.arrays import SparseArray
  8. UNARY_UFUNCS = [np.positive, np.floor, np.exp]
  9. BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op
  10. SPARSE = [True, False]
  11. SPARSE_IDS = ["sparse", "dense"]
  12. SHUFFLE = [True, False]
  13. @pytest.fixture
  14. def arrays_for_binary_ufunc():
  15. """
  16. A pair of random, length-100 integer-dtype arrays, that are mostly 0.
  17. """
  18. a1 = np.random.randint(0, 10, 100, dtype="int64")
  19. a2 = np.random.randint(0, 10, 100, dtype="int64")
  20. a1[::3] = 0
  21. a2[::4] = 0
  22. return a1, a2
  23. @pytest.mark.parametrize("ufunc", UNARY_UFUNCS)
  24. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  25. def test_unary_ufunc(ufunc, sparse):
  26. # Test that ufunc(pd.Series) == pd.Series(ufunc)
  27. array = np.random.randint(0, 10, 10, dtype="int64")
  28. array[::2] = 0
  29. if sparse:
  30. array = SparseArray(array, dtype=pd.SparseDtype("int64", 0))
  31. index = list(string.ascii_letters[:10])
  32. name = "name"
  33. series = pd.Series(array, index=index, name=name)
  34. result = ufunc(series)
  35. expected = pd.Series(ufunc(array), index=index, name=name)
  36. tm.assert_series_equal(result, expected)
  37. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  38. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  39. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  40. def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
  41. # Test that ufunc(pd.Series(a), array) == pd.Series(ufunc(a, b))
  42. a1, a2 = arrays_for_binary_ufunc
  43. if sparse:
  44. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  45. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  46. name = "name" # op(pd.Series, array) preserves the name.
  47. series = pd.Series(a1, name=name)
  48. other = a2
  49. array_args = (a1, a2)
  50. series_args = (series, other) # ufunc(series, array)
  51. if flip:
  52. array_args = reversed(array_args)
  53. series_args = reversed(series_args) # ufunc(array, series)
  54. expected = pd.Series(ufunc(*array_args), name=name)
  55. result = ufunc(*series_args)
  56. tm.assert_series_equal(result, expected)
  57. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  58. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  59. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  60. def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
  61. # Test that
  62. # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
  63. # * ufunc(Index, pd.Series) dispatches to pd.Series (returns a pd.Series)
  64. a1, a2 = arrays_for_binary_ufunc
  65. if sparse:
  66. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  67. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  68. name = "name" # op(pd.Series, array) preserves the name.
  69. series = pd.Series(a1, name=name)
  70. other = pd.Index(a2, name=name).astype("int64")
  71. array_args = (a1, a2)
  72. series_args = (series, other) # ufunc(series, array)
  73. if flip:
  74. array_args = reversed(array_args)
  75. series_args = reversed(series_args) # ufunc(array, series)
  76. expected = pd.Series(ufunc(*array_args), name=name)
  77. result = ufunc(*series_args)
  78. tm.assert_series_equal(result, expected)
  79. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  80. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  81. @pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"])
  82. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  83. def test_binary_ufunc_with_series(
  84. flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc
  85. ):
  86. # Test that
  87. # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
  88. # with alignment between the indices
  89. a1, a2 = arrays_for_binary_ufunc
  90. if sparse:
  91. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  92. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  93. name = "name" # op(pd.Series, array) preserves the name.
  94. series = pd.Series(a1, name=name)
  95. other = pd.Series(a2, name=name)
  96. idx = np.random.permutation(len(a1))
  97. if shuffle:
  98. other = other.take(idx)
  99. if flip:
  100. index = other.align(series)[0].index
  101. else:
  102. index = series.align(other)[0].index
  103. else:
  104. index = series.index
  105. array_args = (a1, a2)
  106. series_args = (series, other) # ufunc(series, array)
  107. if flip:
  108. array_args = tuple(reversed(array_args))
  109. series_args = tuple(reversed(series_args)) # ufunc(array, series)
  110. expected = pd.Series(ufunc(*array_args), index=index, name=name)
  111. result = ufunc(*series_args)
  112. tm.assert_series_equal(result, expected)
  113. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  114. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  115. @pytest.mark.parametrize("flip", [True, False])
  116. def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
  117. # Test that
  118. # * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar))
  119. # * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series)
  120. array, _ = arrays_for_binary_ufunc
  121. if sparse:
  122. array = SparseArray(array)
  123. other = 2
  124. series = pd.Series(array, name="name")
  125. series_args = (series, other)
  126. array_args = (array, other)
  127. if flip:
  128. series_args = tuple(reversed(series_args))
  129. array_args = tuple(reversed(array_args))
  130. expected = pd.Series(ufunc(*array_args), name="name")
  131. result = ufunc(*series_args)
  132. tm.assert_series_equal(result, expected)
  133. @pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: any others?
  134. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  135. @pytest.mark.parametrize("shuffle", SHUFFLE)
  136. @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")
  137. def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc):
  138. # Test that
  139. # the same conditions from binary_ufunc_scalar apply to
  140. # ufuncs with multiple outputs.
  141. if sparse and ufunc is np.divmod:
  142. pytest.skip("sparse divmod not implemented.")
  143. a1, a2 = arrays_for_binary_ufunc
  144. # work around https://github.com/pandas-dev/pandas/issues/26987
  145. a1[a1 == 0] = 1
  146. a2[a2 == 0] = 1
  147. if sparse:
  148. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  149. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  150. s1 = pd.Series(a1)
  151. s2 = pd.Series(a2)
  152. if shuffle:
  153. # ensure we align before applying the ufunc
  154. s2 = s2.sample(frac=1)
  155. expected = ufunc(a1, a2)
  156. assert isinstance(expected, tuple)
  157. result = ufunc(s1, s2)
  158. assert isinstance(result, tuple)
  159. tm.assert_series_equal(result[0], pd.Series(expected[0]))
  160. tm.assert_series_equal(result[1], pd.Series(expected[1]))
  161. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  162. def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc):
  163. # Test that the same conditions from unary input apply to multi-output
  164. # ufuncs
  165. array, _ = arrays_for_binary_ufunc
  166. if sparse:
  167. array = SparseArray(array)
  168. series = pd.Series(array, name="name")
  169. result = np.modf(series)
  170. expected = np.modf(array)
  171. assert isinstance(result, tuple)
  172. assert isinstance(expected, tuple)
  173. tm.assert_series_equal(result[0], pd.Series(expected[0], name="name"))
  174. tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
  175. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  176. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  177. def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc):
  178. # Drop the names when they differ.
  179. a1, a2 = arrays_for_binary_ufunc
  180. s1 = pd.Series(a1, name="a")
  181. s2 = pd.Series(a2, name="b")
  182. result = ufunc(s1, s2)
  183. assert result.name is None
  184. def test_object_series_ok():
  185. class Dummy:
  186. def __init__(self, value):
  187. self.value = value
  188. def __add__(self, other):
  189. return self.value + other.value
  190. arr = np.array([Dummy(0), Dummy(1)])
  191. ser = pd.Series(arr)
  192. tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr)))
  193. tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
  194. @pytest.mark.parametrize(
  195. "values",
  196. [
  197. pd.array([1, 3, 2], dtype="int64"),
  198. pd.array([1, 10, 0], dtype="Sparse[int]"),
  199. pd.to_datetime(["2000", "2010", "2001"]),
  200. pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
  201. pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
  202. ],
  203. )
  204. def test_reduce(values):
  205. a = pd.Series(values)
  206. assert np.maximum.reduce(a) == values[1]
  207. @pytest.mark.parametrize("type_", [list, deque, tuple])
  208. def test_binary_ufunc_other_types(type_):
  209. a = pd.Series([1, 2, 3], name="name")
  210. b = type_([3, 4, 5])
  211. result = np.add(a, b)
  212. expected = pd.Series(np.add(a.to_numpy(), b), name="name")
  213. tm.assert_series_equal(result, expected)
  214. def test_object_dtype_ok():
  215. class Thing:
  216. def __init__(self, value):
  217. self.value = value
  218. def __add__(self, other):
  219. other = getattr(other, "value", other)
  220. return type(self)(self.value + other)
  221. def __eq__(self, other) -> bool:
  222. return type(other) is Thing and self.value == other.value
  223. def __repr__(self) -> str:
  224. return f"Thing({self.value})"
  225. s = pd.Series([Thing(1), Thing(2)])
  226. result = np.add(s, Thing(1))
  227. expected = pd.Series([Thing(2), Thing(3)])
  228. tm.assert_series_equal(result, expected)
  229. def test_outer():
  230. # https://github.com/pandas-dev/pandas/issues/27186
  231. s = pd.Series([1, 2, 3])
  232. o = np.array([1, 2, 3])
  233. with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
  234. np.subtract.outer(s, o)