PageRenderTime 49ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/vb_suite/timeseries.py

http://github.com/pydata/pandas
Python | 335 lines | 334 code | 1 blank | 0 comment | 0 complexity | 7036233ea8535af465a60e4d209cd628 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. from vbench.api import Benchmark
  2. from datetime import datetime
  3. common_setup = """from pandas_vb_common import *
  4. from datetime import timedelta
  5. N = 100000
  6. try:
  7. rng = date_range('1/1/2000', periods=N, freq='min')
  8. except NameError:
  9. rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute())
  10. def date_range(start=None, end=None, periods=None, freq=None):
  11. return DatetimeIndex(start, end, periods=periods, offset=freq)
  12. if hasattr(Series, 'convert'):
  13. Series.resample = Series.convert
  14. ts = Series(np.random.randn(N), index=rng)
  15. """
  16. #----------------------------------------------------------------------
  17. # Lookup value in large time series, hash map population
  18. setup = common_setup + """
  19. rng = date_range('1/1/2000', periods=1500000, freq='s')
  20. ts = Series(1, index=rng)
  21. """
  22. stmt = "ts[ts.index[len(ts) // 2]]; ts.index._cleanup()"
  23. timeseries_large_lookup_value = Benchmark(stmt, setup,
  24. start_date=datetime(2012, 1, 1))
  25. #----------------------------------------------------------------------
  26. # Test slice minutely series
  27. timeseries_slice_minutely = Benchmark('ts[:10000]', common_setup)
  28. #----------------------------------------------------------------------
  29. # Test conversion
  30. setup = common_setup + """
  31. """
  32. timeseries_1min_5min_ohlc = Benchmark(
  33. "ts[:10000].resample('5min', how='ohlc')",
  34. common_setup,
  35. start_date=datetime(2012, 5, 1))
  36. timeseries_1min_5min_mean = Benchmark(
  37. "ts[:10000].resample('5min', how='mean')",
  38. common_setup,
  39. start_date=datetime(2012, 5, 1))
  40. #----------------------------------------------------------------------
  41. # Irregular alignment
  42. setup = common_setup + """
  43. lindex = np.random.permutation(N)[:N // 2]
  44. rindex = np.random.permutation(N)[:N // 2]
  45. left = Series(ts.values.take(lindex), index=ts.index.take(lindex))
  46. right = Series(ts.values.take(rindex), index=ts.index.take(rindex))
  47. """
  48. timeseries_add_irregular = Benchmark('left + right', setup)
  49. #----------------------------------------------------------------------
  50. # Sort large irregular time series
  51. setup = common_setup + """
  52. N = 100000
  53. rng = date_range('1/1/2000', periods=N, freq='s')
  54. rng = rng.take(np.random.permutation(N))
  55. ts = Series(np.random.randn(N), index=rng)
  56. """
  57. timeseries_sort_index = Benchmark('ts.sort_index()', setup,
  58. start_date=datetime(2012, 4, 1))
  59. #----------------------------------------------------------------------
  60. # Shifting, add offset
  61. setup = common_setup + """
  62. rng = date_range('1/1/2000', periods=10000, freq='T')
  63. """
  64. datetimeindex_add_offset = Benchmark('rng + timedelta(minutes=2)', setup,
  65. start_date=datetime(2012, 4, 1))
  66. setup = common_setup + """
  67. N = 10000
  68. rng = date_range('1/1/1990', periods=N, freq='53s')
  69. ts = Series(np.random.randn(N), index=rng)
  70. dates = date_range('1/1/1990', periods=N * 10, freq='5s')
  71. """
  72. timeseries_asof_single = Benchmark('ts.asof(dates[0])', setup,
  73. start_date=datetime(2012, 4, 27))
  74. timeseries_asof = Benchmark('ts.asof(dates)', setup,
  75. start_date=datetime(2012, 4, 27))
  76. setup = setup + 'ts[250:5000] = np.nan'
  77. timeseries_asof_nan = Benchmark('ts.asof(dates)', setup,
  78. start_date=datetime(2012, 4, 27))
  79. #----------------------------------------------------------------------
  80. # Time zone stuff
  81. setup = common_setup + """
  82. rng = date_range('1/1/2000', '3/1/2000', tz='US/Eastern')
  83. """
  84. timeseries_timestamp_tzinfo_cons = \
  85. Benchmark('rng[0]', setup, start_date=datetime(2012, 5, 5))
  86. #----------------------------------------------------------------------
  87. # Resampling period
  88. setup = common_setup + """
  89. rng = period_range('1/1/2000', '1/1/2001', freq='T')
  90. ts = Series(np.random.randn(len(rng)), index=rng)
  91. """
  92. timeseries_period_downsample_mean = \
  93. Benchmark("ts.resample('D', how='mean')", setup,
  94. start_date=datetime(2012, 4, 25))
  95. setup = common_setup + """
  96. rng = date_range('1/1/2000', '1/1/2001', freq='T')
  97. ts = Series(np.random.randn(len(rng)), index=rng)
  98. """
  99. timeseries_timestamp_downsample_mean = \
  100. Benchmark("ts.resample('D', how='mean')", setup,
  101. start_date=datetime(2012, 4, 25))
  102. #----------------------------------------------------------------------
  103. # to_datetime
  104. setup = common_setup + """
  105. rng = date_range('1/1/2000', periods=20000, freq='h')
  106. strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in rng]
  107. """
  108. timeseries_to_datetime_iso8601 = \
  109. Benchmark('to_datetime(strings)', setup,
  110. start_date=datetime(2012, 7, 11))
  111. setup = common_setup + """
  112. rng = date_range('1/1/2000', periods=10000, freq='D')
  113. strings = Series(rng.year*10000+rng.month*100+rng.day,dtype=np.int64).apply(str)
  114. """
  115. timeseries_to_datetime_YYYYMMDD = \
  116. Benchmark('to_datetime(strings,format="%Y%m%d")', setup,
  117. start_date=datetime(2012, 7, 1))
  118. # ---- infer_freq
  119. # infer_freq
  120. setup = common_setup + """
  121. from pandas.tseries.frequencies import infer_freq
  122. rng = date_range('1/1/1700', freq='D', periods=100000)
  123. a = rng[:50000].append(rng[50002:])
  124. """
  125. timeseries_infer_freq = \
  126. Benchmark('infer_freq(a)', setup, start_date=datetime(2012, 7, 1))
  127. # setitem PeriodIndex
  128. setup = common_setup + """
  129. rng = period_range('1/1/1990', freq='S', periods=20000)
  130. df = DataFrame(index=range(len(rng)))
  131. """
  132. period_setitem = \
  133. Benchmark("df['col'] = rng", setup,
  134. start_date=datetime(2012, 8, 1))
  135. setup = common_setup + """
  136. rng = date_range('1/1/2000 9:30', periods=10000, freq='S', tz='US/Eastern')
  137. """
  138. datetimeindex_normalize = \
  139. Benchmark('rng.normalize()', setup,
  140. start_date=datetime(2012, 9, 1))
  141. setup = common_setup + """
  142. from pandas.tseries.offsets import Second
  143. s1 = date_range('1/1/2000', periods=100, freq='S')
  144. curr = s1[-1]
  145. slst = []
  146. for i in range(100):
  147. slst.append(curr + Second()), periods=100, freq='S')
  148. curr = slst[-1][-1]
  149. """
  150. # dti_append_tz = \
  151. # Benchmark('s1.append(slst)', setup, start_date=datetime(2012, 9, 1))
  152. setup = common_setup + """
  153. rng = date_range('1/1/2000', periods=1000, freq='H')
  154. df = DataFrame(np.random.randn(len(rng), 2), rng)
  155. """
  156. dti_reset_index = \
  157. Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1))
  158. setup = common_setup + """
  159. rng = date_range('1/1/2000', periods=1000, freq='H',
  160. tz='US/Eastern')
  161. df = DataFrame(np.random.randn(len(rng), 2), index=rng)
  162. """
  163. dti_reset_index_tz = \
  164. Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1))
  165. setup = common_setup + """
  166. rng = date_range('1/1/2000', periods=1000, freq='T')
  167. index = rng.repeat(10)
  168. """
  169. datetimeindex_unique = Benchmark('index.unique()', setup,
  170. start_date=datetime(2012, 7, 1))
  171. # tz_localize with infer argument. This is an attempt to emulate the results
  172. # of read_csv with duplicated data. Not passing infer_dst will fail
  173. setup = common_setup + """
  174. dst_rng = date_range('10/29/2000 1:00:00',
  175. '10/29/2000 1:59:59', freq='S')
  176. index = date_range('10/29/2000', '10/29/2000 00:59:59', freq='S')
  177. index = index.append(dst_rng)
  178. index = index.append(dst_rng)
  179. index = index.append(date_range('10/29/2000 2:00:00',
  180. '10/29/2000 3:00:00', freq='S'))
  181. """
  182. datetimeindex_infer_dst = \
  183. Benchmark('index.tz_localize("US/Eastern", infer_dst=True)',
  184. setup, start_date=datetime(2013, 9, 30))
  185. #----------------------------------------------------------------------
  186. # Resampling: fast-path various functions
  187. setup = common_setup + """
  188. rng = date_range('20130101',periods=100000,freq='50L')
  189. df = DataFrame(np.random.randn(100000,2),index=rng)
  190. """
  191. dataframe_resample_mean_string = \
  192. Benchmark("df.resample('1s', how='mean')", setup)
  193. dataframe_resample_mean_numpy = \
  194. Benchmark("df.resample('1s', how=np.mean)", setup)
  195. dataframe_resample_min_string = \
  196. Benchmark("df.resample('1s', how='min')", setup)
  197. dataframe_resample_min_numpy = \
  198. Benchmark("df.resample('1s', how=np.min)", setup)
  199. dataframe_resample_max_string = \
  200. Benchmark("df.resample('1s', how='max')", setup)
  201. dataframe_resample_max_numpy = \
  202. Benchmark("df.resample('1s', how=np.max)", setup)
  203. #----------------------------------------------------------------------
  204. # DatetimeConverter
  205. setup = common_setup + """
  206. from pandas.tseries.converter import DatetimeConverter
  207. """
  208. datetimeindex_converter = \
  209. Benchmark('DatetimeConverter.convert(rng, None, None)',
  210. setup, start_date=datetime(2013, 1, 1))
  211. # Adding custom business day
  212. setup = common_setup + """
  213. import datetime as dt
  214. import pandas as pd
  215. import numpy as np
  216. date = dt.datetime(2011,1,1)
  217. dt64 = np.datetime64('2011-01-01 09:00Z')
  218. day = pd.offsets.Day()
  219. year = pd.offsets.YearBegin()
  220. cday = pd.offsets.CustomBusinessDay()
  221. cme = pd.offsets.CustomBusinessMonthEnd()
  222. """
  223. timeseries_day_incr = Benchmark("date + day",setup)
  224. timeseries_day_apply = Benchmark("day.apply(date)",setup)
  225. timeseries_year_incr = Benchmark("date + year",setup)
  226. timeseries_year_apply = Benchmark("year.apply(date)",setup)
  227. timeseries_custom_bday_incr = \
  228. Benchmark("date + cday",setup)
  229. timeseries_custom_bday_apply = \
  230. Benchmark("cday.apply(date)",setup)
  231. timeseries_custom_bday_apply_dt64 = \
  232. Benchmark("cday.apply(dt64)",setup)
  233. # Increment by n
  234. timeseries_custom_bday_incr_n = \
  235. Benchmark("date + 10 * cday",setup)
  236. # Increment custom business month
  237. timeseries_custom_bmonthend_incr = \
  238. Benchmark("date + cme",setup)
  239. timeseries_custom_bmonthend_incr_n = \
  240. Benchmark("date + 10 * cme",setup)
  241. #----------------------------------------------------------------------
  242. # month/quarter/year start/end accessors
  243. setup = common_setup + """
  244. N = 10000
  245. rng = date_range('1/1/1', periods=N, freq='B')
  246. """
  247. timeseries_is_month_start = Benchmark('rng.is_month_start', setup,
  248. start_date=datetime(2014, 4, 1))