PageRenderTime 160ms CodeModel.GetById 18ms RepoModel.GetById 2ms app.codeStats 0ms

/pandas/tests/tseries/frequencies/test_inference.py

https://github.com/pydata/pandas
Python | 524 lines | 405 code | 94 blank | 25 comment | 28 complexity | 3976ca4f9575adc9767928e70712bbd0 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas._libs.tslibs.ccalendar import (
  8. DAYS,
  9. MONTHS,
  10. )
  11. from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
  12. from pandas.compat import is_platform_windows
  13. from pandas import (
  14. DatetimeIndex,
  15. Index,
  16. Series,
  17. Timestamp,
  18. date_range,
  19. period_range,
  20. )
  21. import pandas._testing as tm
  22. from pandas.core.arrays import (
  23. DatetimeArray,
  24. TimedeltaArray,
  25. )
  26. from pandas.core.tools.datetimes import to_datetime
  27. import pandas.tseries.frequencies as frequencies
  28. import pandas.tseries.offsets as offsets
  29. @pytest.fixture(
  30. params=[
  31. (timedelta(1), "D"),
  32. (timedelta(hours=1), "H"),
  33. (timedelta(minutes=1), "T"),
  34. (timedelta(seconds=1), "S"),
  35. (np.timedelta64(1, "ns"), "N"),
  36. (timedelta(microseconds=1), "U"),
  37. (timedelta(microseconds=1000), "L"),
  38. ]
  39. )
  40. def base_delta_code_pair(request):
  41. return request.param
  42. freqs = (
  43. [f"Q-{month}" for month in MONTHS]
  44. + [f"{annual}-{month}" for annual in ["A", "BA"] for month in MONTHS]
  45. + ["M", "BM", "BMS"]
  46. + [f"WOM-{count}{day}" for count in range(1, 5) for day in DAYS]
  47. + [f"W-{day}" for day in DAYS]
  48. )
  49. @pytest.mark.parametrize("freq", freqs)
  50. @pytest.mark.parametrize("periods", [5, 7])
  51. def test_infer_freq_range(periods, freq):
  52. freq = freq.upper()
  53. gen = date_range("1/1/2000", periods=periods, freq=freq)
  54. index = DatetimeIndex(gen.values)
  55. if not freq.startswith("Q-"):
  56. assert frequencies.infer_freq(index) == gen.freqstr
  57. else:
  58. inf_freq = frequencies.infer_freq(index)
  59. is_dec_range = inf_freq == "Q-DEC" and gen.freqstr in (
  60. "Q",
  61. "Q-DEC",
  62. "Q-SEP",
  63. "Q-JUN",
  64. "Q-MAR",
  65. )
  66. is_nov_range = inf_freq == "Q-NOV" and gen.freqstr in (
  67. "Q-NOV",
  68. "Q-AUG",
  69. "Q-MAY",
  70. "Q-FEB",
  71. )
  72. is_oct_range = inf_freq == "Q-OCT" and gen.freqstr in (
  73. "Q-OCT",
  74. "Q-JUL",
  75. "Q-APR",
  76. "Q-JAN",
  77. )
  78. assert is_dec_range or is_nov_range or is_oct_range
  79. def test_raise_if_period_index():
  80. index = period_range(start="1/1/1990", periods=20, freq="M")
  81. msg = "Check the `freq` attribute instead of using infer_freq"
  82. with pytest.raises(TypeError, match=msg):
  83. frequencies.infer_freq(index)
  84. def test_raise_if_too_few():
  85. index = DatetimeIndex(["12/31/1998", "1/3/1999"])
  86. msg = "Need at least 3 dates to infer frequency"
  87. with pytest.raises(ValueError, match=msg):
  88. frequencies.infer_freq(index)
  89. def test_business_daily():
  90. index = DatetimeIndex(["01/01/1999", "1/4/1999", "1/5/1999"])
  91. assert frequencies.infer_freq(index) == "B"
  92. def test_business_daily_look_alike():
  93. # see gh-16624
  94. #
  95. # Do not infer "B when "weekend" (2-day gap) in wrong place.
  96. index = DatetimeIndex(["12/31/1998", "1/3/1999", "1/4/1999"])
  97. assert frequencies.infer_freq(index) is None
  98. def test_day_corner():
  99. index = DatetimeIndex(["1/1/2000", "1/2/2000", "1/3/2000"])
  100. assert frequencies.infer_freq(index) == "D"
  101. def test_non_datetime_index():
  102. dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"])
  103. assert frequencies.infer_freq(dates) == "D"
  104. def test_fifth_week_of_month_infer():
  105. # see gh-9425
  106. #
  107. # Only attempt to infer up to WOM-4.
  108. index = DatetimeIndex(["2014-03-31", "2014-06-30", "2015-03-30"])
  109. assert frequencies.infer_freq(index) is None
  110. def test_week_of_month_fake():
  111. # All of these dates are on same day
  112. # of week and are 4 or 5 weeks apart.
  113. index = DatetimeIndex(["2013-08-27", "2013-10-01", "2013-10-29", "2013-11-26"])
  114. assert frequencies.infer_freq(index) != "WOM-4TUE"
  115. def test_fifth_week_of_month():
  116. # see gh-9425
  117. #
  118. # Only supports freq up to WOM-4.
  119. msg = (
  120. "Of the four parameters: start, end, periods, "
  121. "and freq, exactly three must be specified"
  122. )
  123. with pytest.raises(ValueError, match=msg):
  124. date_range("2014-01-01", freq="WOM-5MON")
  125. def test_monthly_ambiguous():
  126. rng = DatetimeIndex(["1/31/2000", "2/29/2000", "3/31/2000"])
  127. assert rng.inferred_freq == "M"
  128. def test_annual_ambiguous():
  129. rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
  130. assert rng.inferred_freq == "A-JAN"
  131. @pytest.mark.parametrize("count", range(1, 5))
  132. def test_infer_freq_delta(base_delta_code_pair, count):
  133. b = Timestamp(datetime.now())
  134. base_delta, code = base_delta_code_pair
  135. inc = base_delta * count
  136. index = DatetimeIndex([b + inc * j for j in range(3)])
  137. exp_freq = f"{count:d}{code}" if count > 1 else code
  138. assert frequencies.infer_freq(index) == exp_freq
  139. @pytest.mark.parametrize(
  140. "constructor",
  141. [
  142. lambda now, delta: DatetimeIndex(
  143. [now + delta * 7] + [now + delta * j for j in range(3)]
  144. ),
  145. lambda now, delta: DatetimeIndex(
  146. [now + delta * j for j in range(3)] + [now + delta * 7]
  147. ),
  148. ],
  149. )
  150. def test_infer_freq_custom(base_delta_code_pair, constructor):
  151. b = Timestamp(datetime.now())
  152. base_delta, _ = base_delta_code_pair
  153. index = constructor(b, base_delta)
  154. assert frequencies.infer_freq(index) is None
  155. @pytest.mark.parametrize(
  156. "freq,expected", [("Q", "Q-DEC"), ("Q-NOV", "Q-NOV"), ("Q-OCT", "Q-OCT")]
  157. )
  158. def test_infer_freq_index(freq, expected):
  159. rng = period_range("1959Q2", "2009Q3", freq=freq)
  160. rng = Index(rng.to_timestamp("D", how="e").astype(object))
  161. assert rng.inferred_freq == expected
  162. @pytest.mark.parametrize(
  163. "expected,dates",
  164. list(
  165. {
  166. "AS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"],
  167. "Q-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"],
  168. "M": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"],
  169. "W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"],
  170. "D": ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"],
  171. "H": [
  172. "2011-12-31 22:00",
  173. "2011-12-31 23:00",
  174. "2012-01-01 00:00",
  175. "2012-01-01 01:00",
  176. ],
  177. }.items()
  178. ),
  179. )
  180. def test_infer_freq_tz(tz_naive_fixture, expected, dates):
  181. # see gh-7310
  182. tz = tz_naive_fixture
  183. idx = DatetimeIndex(dates, tz=tz)
  184. assert idx.inferred_freq == expected
  185. @pytest.mark.parametrize(
  186. "date_pair",
  187. [
  188. ["2013-11-02", "2013-11-5"], # Fall DST
  189. ["2014-03-08", "2014-03-11"], # Spring DST
  190. ["2014-01-01", "2014-01-03"], # Regular Time
  191. ],
  192. )
  193. @pytest.mark.parametrize(
  194. "freq", ["H", "3H", "10T", "3601S", "3600001L", "3600000001U", "3600000000001N"]
  195. )
  196. def test_infer_freq_tz_transition(tz_naive_fixture, date_pair, freq):
  197. # see gh-8772
  198. tz = tz_naive_fixture
  199. idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz)
  200. assert idx.inferred_freq == freq
  201. def test_infer_freq_tz_transition_custom():
  202. index = date_range("2013-11-03", periods=5, freq="3H").tz_localize(
  203. "America/Chicago"
  204. )
  205. assert index.inferred_freq is None
  206. @pytest.mark.parametrize(
  207. "data,expected",
  208. [
  209. # Hourly freq in a day must result in "H"
  210. (
  211. [
  212. "2014-07-01 09:00",
  213. "2014-07-01 10:00",
  214. "2014-07-01 11:00",
  215. "2014-07-01 12:00",
  216. "2014-07-01 13:00",
  217. "2014-07-01 14:00",
  218. ],
  219. "H",
  220. ),
  221. (
  222. [
  223. "2014-07-01 09:00",
  224. "2014-07-01 10:00",
  225. "2014-07-01 11:00",
  226. "2014-07-01 12:00",
  227. "2014-07-01 13:00",
  228. "2014-07-01 14:00",
  229. "2014-07-01 15:00",
  230. "2014-07-01 16:00",
  231. "2014-07-02 09:00",
  232. "2014-07-02 10:00",
  233. "2014-07-02 11:00",
  234. ],
  235. "BH",
  236. ),
  237. (
  238. [
  239. "2014-07-04 09:00",
  240. "2014-07-04 10:00",
  241. "2014-07-04 11:00",
  242. "2014-07-04 12:00",
  243. "2014-07-04 13:00",
  244. "2014-07-04 14:00",
  245. "2014-07-04 15:00",
  246. "2014-07-04 16:00",
  247. "2014-07-07 09:00",
  248. "2014-07-07 10:00",
  249. "2014-07-07 11:00",
  250. ],
  251. "BH",
  252. ),
  253. (
  254. [
  255. "2014-07-04 09:00",
  256. "2014-07-04 10:00",
  257. "2014-07-04 11:00",
  258. "2014-07-04 12:00",
  259. "2014-07-04 13:00",
  260. "2014-07-04 14:00",
  261. "2014-07-04 15:00",
  262. "2014-07-04 16:00",
  263. "2014-07-07 09:00",
  264. "2014-07-07 10:00",
  265. "2014-07-07 11:00",
  266. "2014-07-07 12:00",
  267. "2014-07-07 13:00",
  268. "2014-07-07 14:00",
  269. "2014-07-07 15:00",
  270. "2014-07-07 16:00",
  271. "2014-07-08 09:00",
  272. "2014-07-08 10:00",
  273. "2014-07-08 11:00",
  274. "2014-07-08 12:00",
  275. "2014-07-08 13:00",
  276. "2014-07-08 14:00",
  277. "2014-07-08 15:00",
  278. "2014-07-08 16:00",
  279. ],
  280. "BH",
  281. ),
  282. ],
  283. )
  284. def test_infer_freq_business_hour(data, expected):
  285. # see gh-7905
  286. idx = DatetimeIndex(data)
  287. assert idx.inferred_freq == expected
  288. def test_not_monotonic():
  289. rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
  290. rng = rng[::-1]
  291. assert rng.inferred_freq == "-1A-JAN"
  292. def test_non_datetime_index2():
  293. rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
  294. vals = rng.to_pydatetime()
  295. result = frequencies.infer_freq(vals)
  296. assert result == rng.inferred_freq
  297. @pytest.mark.parametrize(
  298. "idx", [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]
  299. )
  300. def test_invalid_index_types(idx):
  301. msg = "|".join(
  302. [
  303. "cannot infer freq from a non-convertible",
  304. "Check the `freq` attribute instead of using infer_freq",
  305. ]
  306. )
  307. with pytest.raises(TypeError, match=msg):
  308. frequencies.infer_freq(idx)
  309. @pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue")
  310. def test_invalid_index_types_unicode():
  311. # see gh-10822
  312. #
  313. # Odd error message on conversions to datetime for unicode.
  314. msg = "Unknown string format"
  315. with pytest.raises(ValueError, match=msg):
  316. frequencies.infer_freq(tm.makeStringIndex(10))
  317. def test_string_datetime_like_compat():
  318. # see gh-6463
  319. data = ["2004-01", "2004-02", "2004-03", "2004-04"]
  320. expected = frequencies.infer_freq(data)
  321. result = frequencies.infer_freq(Index(data))
  322. assert result == expected
  323. def test_series():
  324. # see gh-6407
  325. s = Series(date_range("20130101", "20130110"))
  326. inferred = frequencies.infer_freq(s)
  327. assert inferred == "D"
  328. @pytest.mark.parametrize("end", [10, 10.0])
  329. def test_series_invalid_type(end):
  330. # see gh-6407
  331. msg = "cannot infer freq from a non-convertible dtype on a Series"
  332. s = Series(np.arange(end))
  333. with pytest.raises(TypeError, match=msg):
  334. frequencies.infer_freq(s)
  335. def test_series_inconvertible_string():
  336. # see gh-6407
  337. msg = "Unknown string format"
  338. with pytest.raises(ValueError, match=msg):
  339. frequencies.infer_freq(Series(["foo", "bar"]))
  340. @pytest.mark.parametrize("freq", [None, "L"])
  341. def test_series_period_index(freq):
  342. # see gh-6407
  343. #
  344. # Cannot infer on PeriodIndex
  345. msg = "cannot infer freq from a non-convertible dtype on a Series"
  346. s = Series(period_range("2013", periods=10, freq=freq))
  347. with pytest.raises(TypeError, match=msg):
  348. frequencies.infer_freq(s)
  349. @pytest.mark.parametrize("freq", ["M", "L", "S"])
  350. def test_series_datetime_index(freq):
  351. s = Series(date_range("20130101", periods=10, freq=freq))
  352. inferred = frequencies.infer_freq(s)
  353. assert inferred == freq
  354. @pytest.mark.parametrize(
  355. "offset_func",
  356. [
  357. frequencies._get_offset,
  358. lambda freq: date_range("2011-01-01", periods=5, freq=freq),
  359. ],
  360. )
  361. @pytest.mark.parametrize(
  362. "freq",
  363. [
  364. "WEEKDAY",
  365. "EOM",
  366. "W@MON",
  367. "W@TUE",
  368. "W@WED",
  369. "W@THU",
  370. "W@FRI",
  371. "W@SAT",
  372. "W@SUN",
  373. "Q@JAN",
  374. "Q@FEB",
  375. "Q@MAR",
  376. "A@JAN",
  377. "A@FEB",
  378. "A@MAR",
  379. "A@APR",
  380. "A@MAY",
  381. "A@JUN",
  382. "A@JUL",
  383. "A@AUG",
  384. "A@SEP",
  385. "A@OCT",
  386. "A@NOV",
  387. "A@DEC",
  388. "Y@JAN",
  389. "WOM@1MON",
  390. "WOM@2MON",
  391. "WOM@3MON",
  392. "WOM@4MON",
  393. "WOM@1TUE",
  394. "WOM@2TUE",
  395. "WOM@3TUE",
  396. "WOM@4TUE",
  397. "WOM@1WED",
  398. "WOM@2WED",
  399. "WOM@3WED",
  400. "WOM@4WED",
  401. "WOM@1THU",
  402. "WOM@2THU",
  403. "WOM@3THU",
  404. "WOM@4THU",
  405. "WOM@1FRI",
  406. "WOM@2FRI",
  407. "WOM@3FRI",
  408. "WOM@4FRI",
  409. ],
  410. )
  411. def test_legacy_offset_warnings(offset_func, freq):
  412. with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
  413. offset_func(freq)
  414. def test_ms_vs_capital_ms():
  415. left = frequencies._get_offset("ms")
  416. right = frequencies._get_offset("MS")
  417. assert left == offsets.Milli()
  418. assert right == offsets.MonthBegin()
  419. def test_infer_freq_warn_deprecated():
  420. with tm.assert_produces_warning(FutureWarning):
  421. frequencies.infer_freq(date_range(2022, periods=3), warn=False)
  422. def test_infer_freq_non_nano():
  423. arr = np.arange(10).astype(np.int64).view("M8[s]")
  424. dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
  425. res = frequencies.infer_freq(dta)
  426. assert res == "S"
  427. arr2 = arr.view("m8[ms]")
  428. tda = TimedeltaArray._simple_new(arr2, dtype=arr2.dtype)
  429. res2 = frequencies.infer_freq(tda)
  430. assert res2 == "L"