PageRenderTime 53ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/tests/arrays/test_period.py

https://github.com/jreback/pandas
Python | 449 lines | 306 code | 113 blank | 30 comment | 22 complexity | c81945021312a107d3a21928de6a4ad8 MD5 | raw file
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs import iNaT
  4. from pandas._libs.tslibs.period import IncompatibleFrequency
  5. import pandas.util._test_decorators as td
  6. from pandas.core.dtypes.base import registry
  7. from pandas.core.dtypes.dtypes import PeriodDtype
  8. import pandas as pd
  9. import pandas._testing as tm
  10. from pandas.core.arrays import PeriodArray, period_array
  11. # ----------------------------------------------------------------------------
  12. # Dtype
  13. def test_registered():
  14. assert PeriodDtype in registry.dtypes
  15. result = registry.find("Period[D]")
  16. expected = PeriodDtype("D")
  17. assert result == expected
  18. # ----------------------------------------------------------------------------
  19. # period_array
  20. @pytest.mark.parametrize(
  21. "data, freq, expected",
  22. [
  23. ([pd.Period("2017", "D")], None, [17167]),
  24. ([pd.Period("2017", "D")], "D", [17167]),
  25. ([2017], "D", [17167]),
  26. (["2017"], "D", [17167]),
  27. ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
  28. ([pd.Period("2017", "D"), None], None, [17167, iNaT]),
  29. (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]),
  30. (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
  31. (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]),
  32. ],
  33. )
  34. def test_period_array_ok(data, freq, expected):
  35. result = period_array(data, freq=freq).asi8
  36. expected = np.asarray(expected, dtype=np.int64)
  37. tm.assert_numpy_array_equal(result, expected)
  38. def test_period_array_readonly_object():
  39. # https://github.com/pandas-dev/pandas/issues/25403
  40. pa = period_array([pd.Period("2019-01-01")])
  41. arr = np.asarray(pa, dtype="object")
  42. arr.setflags(write=False)
  43. result = period_array(arr)
  44. tm.assert_period_array_equal(result, pa)
  45. result = pd.Series(arr)
  46. tm.assert_series_equal(result, pd.Series(pa))
  47. result = pd.DataFrame({"A": arr})
  48. tm.assert_frame_equal(result, pd.DataFrame({"A": pa}))
  49. def test_from_datetime64_freq_changes():
  50. # https://github.com/pandas-dev/pandas/issues/23438
  51. arr = pd.date_range("2017", periods=3, freq="D")
  52. result = PeriodArray._from_datetime64(arr, freq="M")
  53. expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M")
  54. tm.assert_period_array_equal(result, expected)
  55. @pytest.mark.parametrize(
  56. "data, freq, msg",
  57. [
  58. (
  59. [pd.Period("2017", "D"), pd.Period("2017", "A")],
  60. None,
  61. "Input has different freq",
  62. ),
  63. ([pd.Period("2017", "D")], "A", "Input has different freq"),
  64. ],
  65. )
  66. def test_period_array_raises(data, freq, msg):
  67. with pytest.raises(IncompatibleFrequency, match=msg):
  68. period_array(data, freq)
  69. def test_period_array_non_period_series_raies():
  70. ser = pd.Series([1, 2, 3])
  71. with pytest.raises(TypeError, match="dtype"):
  72. PeriodArray(ser, freq="D")
  73. def test_period_array_freq_mismatch():
  74. arr = period_array(["2000", "2001"], freq="D")
  75. with pytest.raises(IncompatibleFrequency, match="freq"):
  76. PeriodArray(arr, freq="M")
  77. with pytest.raises(IncompatibleFrequency, match="freq"):
  78. PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
  79. def test_asi8():
  80. result = period_array(["2000", "2001", None], freq="D").asi8
  81. expected = np.array([10957, 11323, iNaT])
  82. tm.assert_numpy_array_equal(result, expected)
  83. def test_take_raises():
  84. arr = period_array(["2000", "2001"], freq="D")
  85. with pytest.raises(IncompatibleFrequency, match="freq"):
  86. arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
  87. msg = "value should be a 'Period' or 'NaT'. Got 'str' instead"
  88. with pytest.raises(TypeError, match=msg):
  89. arr.take([0, -1], allow_fill=True, fill_value="foo")
  90. @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
  91. def test_astype(dtype):
  92. # We choose to ignore the sign and size of integers for
  93. # Period/Datetime/Timedelta astype
  94. arr = period_array(["2000", "2001", None], freq="D")
  95. with tm.assert_produces_warning(FutureWarning):
  96. # astype(int..) deprecated
  97. result = arr.astype(dtype)
  98. if np.dtype(dtype).kind == "u":
  99. expected_dtype = np.dtype("uint64")
  100. else:
  101. expected_dtype = np.dtype("int64")
  102. with tm.assert_produces_warning(FutureWarning):
  103. # astype(int..) deprecated
  104. expected = arr.astype(expected_dtype)
  105. assert result.dtype == expected_dtype
  106. tm.assert_numpy_array_equal(result, expected)
  107. def test_astype_copies():
  108. arr = period_array(["2000", "2001", None], freq="D")
  109. with tm.assert_produces_warning(FutureWarning):
  110. # astype(int..) deprecated
  111. result = arr.astype(np.int64, copy=False)
  112. # Add the `.base`, since we now use `.asi8` which returns a view.
  113. # We could maybe override it in PeriodArray to return ._data directly.
  114. assert result.base is arr._data
  115. with tm.assert_produces_warning(FutureWarning):
  116. # astype(int..) deprecated
  117. result = arr.astype(np.int64, copy=True)
  118. assert result is not arr._data
  119. tm.assert_numpy_array_equal(result, arr._data.view("i8"))
  120. def test_astype_categorical():
  121. arr = period_array(["2000", "2001", "2001", None], freq="D")
  122. result = arr.astype("category")
  123. categories = pd.PeriodIndex(["2000", "2001"], freq="D")
  124. expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
  125. tm.assert_categorical_equal(result, expected)
  126. def test_astype_period():
  127. arr = period_array(["2000", "2001", None], freq="D")
  128. result = arr.astype(PeriodDtype("M"))
  129. expected = period_array(["2000", "2001", None], freq="M")
  130. tm.assert_period_array_equal(result, expected)
  131. @pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"])
  132. def test_astype_datetime(other):
  133. arr = period_array(["2000", "2001", None], freq="D")
  134. # slice off the [ns] so that the regex matches.
  135. with pytest.raises(TypeError, match=other[:-4]):
  136. arr.astype(other)
  137. def test_fillna_raises():
  138. arr = period_array(["2000", "2001", "2002"], freq="D")
  139. with pytest.raises(ValueError, match="Length"):
  140. arr.fillna(arr[:2])
  141. def test_fillna_copies():
  142. arr = period_array(["2000", "2001", "2002"], freq="D")
  143. result = arr.fillna(pd.Period("2000", "D"))
  144. assert result is not arr
  145. # ----------------------------------------------------------------------------
  146. # setitem
  147. @pytest.mark.parametrize(
  148. "key, value, expected",
  149. [
  150. ([0], pd.Period("2000", "D"), [10957, 1, 2]),
  151. ([0], None, [iNaT, 1, 2]),
  152. ([0], np.nan, [iNaT, 1, 2]),
  153. ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
  154. (
  155. [0, 1, 2],
  156. [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")],
  157. [10957, 11323, 11688],
  158. ),
  159. ],
  160. )
  161. def test_setitem(key, value, expected):
  162. arr = PeriodArray(np.arange(3), freq="D")
  163. expected = PeriodArray(expected, freq="D")
  164. arr[key] = value
  165. tm.assert_period_array_equal(arr, expected)
  166. def test_setitem_raises_incompatible_freq():
  167. arr = PeriodArray(np.arange(3), freq="D")
  168. with pytest.raises(IncompatibleFrequency, match="freq"):
  169. arr[0] = pd.Period("2000", freq="A")
  170. other = period_array(["2000", "2001"], freq="A")
  171. with pytest.raises(IncompatibleFrequency, match="freq"):
  172. arr[[0, 1]] = other
  173. def test_setitem_raises_length():
  174. arr = PeriodArray(np.arange(3), freq="D")
  175. with pytest.raises(ValueError, match="length"):
  176. arr[[0, 1]] = [pd.Period("2000", freq="D")]
  177. def test_setitem_raises_type():
  178. arr = PeriodArray(np.arange(3), freq="D")
  179. with pytest.raises(TypeError, match="int"):
  180. arr[0] = 1
  181. # ----------------------------------------------------------------------------
  182. # Ops
  183. def test_sub_period():
  184. arr = period_array(["2000", "2001"], freq="D")
  185. other = pd.Period("2000", freq="M")
  186. with pytest.raises(IncompatibleFrequency, match="freq"):
  187. arr - other
  188. # ----------------------------------------------------------------------------
  189. # Methods
  190. @pytest.mark.parametrize(
  191. "other",
  192. [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")],
  193. )
  194. def test_where_different_freq_raises(other):
  195. ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D"))
  196. cond = np.array([True, False, True])
  197. with pytest.raises(IncompatibleFrequency, match="freq"):
  198. ser.where(cond, other)
  199. # ----------------------------------------------------------------------------
  200. # Printing
  201. def test_repr_small():
  202. arr = period_array(["2000", "2001"], freq="D")
  203. result = str(arr)
  204. expected = (
  205. "<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]"
  206. )
  207. assert result == expected
  208. def test_repr_large():
  209. arr = period_array(["2000", "2001"] * 500, freq="D")
  210. result = str(arr)
  211. expected = (
  212. "<PeriodArray>\n"
  213. "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
  214. "'2000-01-01',\n"
  215. " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
  216. "'2001-01-01',\n"
  217. " ...\n"
  218. " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
  219. "'2000-01-01',\n"
  220. " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
  221. "'2001-01-01']\n"
  222. "Length: 1000, dtype: period[D]"
  223. )
  224. assert result == expected
  225. # ----------------------------------------------------------------------------
  226. # Reductions
  227. class TestReductions:
  228. def test_min_max(self):
  229. arr = period_array(
  230. [
  231. "2000-01-03",
  232. "2000-01-03",
  233. "NaT",
  234. "2000-01-02",
  235. "2000-01-05",
  236. "2000-01-04",
  237. ],
  238. freq="D",
  239. )
  240. result = arr.min()
  241. expected = pd.Period("2000-01-02", freq="D")
  242. assert result == expected
  243. result = arr.max()
  244. expected = pd.Period("2000-01-05", freq="D")
  245. assert result == expected
  246. result = arr.min(skipna=False)
  247. assert result is pd.NaT
  248. result = arr.max(skipna=False)
  249. assert result is pd.NaT
  250. @pytest.mark.parametrize("skipna", [True, False])
  251. def test_min_max_empty(self, skipna):
  252. arr = period_array([], freq="D")
  253. result = arr.min(skipna=skipna)
  254. assert result is pd.NaT
  255. result = arr.max(skipna=skipna)
  256. assert result is pd.NaT
  257. # ----------------------------------------------------------------------------
  258. # Arrow interaction
  259. pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev")
  260. @pyarrow_skip
  261. def test_arrow_extension_type():
  262. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  263. p1 = ArrowPeriodType("D")
  264. p2 = ArrowPeriodType("D")
  265. p3 = ArrowPeriodType("M")
  266. assert p1.freq == "D"
  267. assert p1 == p2
  268. assert not p1 == p3
  269. assert hash(p1) == hash(p2)
  270. assert not hash(p1) == hash(p3)
  271. @pyarrow_skip
  272. @pytest.mark.parametrize(
  273. "data, freq",
  274. [
  275. (pd.date_range("2017", periods=3), "D"),
  276. (pd.date_range("2017", periods=3, freq="A"), "A-DEC"),
  277. ],
  278. )
  279. def test_arrow_array(data, freq):
  280. import pyarrow as pa
  281. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  282. periods = period_array(data, freq=freq)
  283. result = pa.array(periods)
  284. assert isinstance(result.type, ArrowPeriodType)
  285. assert result.type.freq == freq
  286. expected = pa.array(periods.asi8, type="int64")
  287. assert result.storage.equals(expected)
  288. # convert to its storage type
  289. result = pa.array(periods, type=pa.int64())
  290. assert result.equals(expected)
  291. # unsupported conversions
  292. msg = "Not supported to convert PeriodArray to 'double' type"
  293. with pytest.raises(TypeError, match=msg):
  294. pa.array(periods, type="float64")
  295. with pytest.raises(TypeError, match="different 'freq'"):
  296. pa.array(periods, type=ArrowPeriodType("T"))
  297. @pyarrow_skip
  298. def test_arrow_array_missing():
  299. import pyarrow as pa
  300. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  301. arr = PeriodArray([1, 2, 3], freq="D")
  302. arr[1] = pd.NaT
  303. result = pa.array(arr)
  304. assert isinstance(result.type, ArrowPeriodType)
  305. assert result.type.freq == "D"
  306. expected = pa.array([1, None, 3], type="int64")
  307. assert result.storage.equals(expected)
  308. @pyarrow_skip
  309. def test_arrow_table_roundtrip():
  310. import pyarrow as pa
  311. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  312. arr = PeriodArray([1, 2, 3], freq="D")
  313. arr[1] = pd.NaT
  314. df = pd.DataFrame({"a": arr})
  315. table = pa.table(df)
  316. assert isinstance(table.field("a").type, ArrowPeriodType)
  317. result = table.to_pandas()
  318. assert isinstance(result["a"].dtype, PeriodDtype)
  319. tm.assert_frame_equal(result, df)
  320. table2 = pa.concat_tables([table, table])
  321. result = table2.to_pandas()
  322. expected = pd.concat([df, df], ignore_index=True)
  323. tm.assert_frame_equal(result, expected)
  324. @pyarrow_skip
  325. def test_arrow_table_roundtrip_without_metadata():
  326. import pyarrow as pa
  327. arr = PeriodArray([1, 2, 3], freq="H")
  328. arr[1] = pd.NaT
  329. df = pd.DataFrame({"a": arr})
  330. table = pa.table(df)
  331. # remove the metadata
  332. table = table.replace_schema_metadata()
  333. assert table.schema.metadata is None
  334. result = table.to_pandas()
  335. assert isinstance(result["a"].dtype, PeriodDtype)
  336. tm.assert_frame_equal(result, df)