/Tools/stringbench/stringbench.py

https://github.com/albertz/CPython · Python · 1482 lines · 1225 code · 216 blank · 41 comment · 182 complexity · 3207f8cc5a4a924b3f4596c9997e262a MD5 · raw file

  1. # Various microbenchmarks comparing unicode and byte string performance
  2. # Please keep this file both 2.x and 3.x compatible!
  3. import timeit
  4. import itertools
  5. import operator
  6. import re
  7. import sys
  8. import datetime
  9. import optparse
  10. VERSION = '2.0'
  11. def p(*args):
  12. sys.stdout.write(' '.join(str(s) for s in args) + '\n')
  13. if sys.version_info >= (3,):
  14. BYTES = bytes_from_str = lambda x: x.encode('ascii')
  15. UNICODE = unicode_from_str = lambda x: x
  16. else:
  17. BYTES = bytes_from_str = lambda x: x
  18. UNICODE = unicode_from_str = lambda x: x.decode('ascii')
  19. class UnsupportedType(TypeError):
  20. pass
  21. p('stringbench v%s' % VERSION)
  22. p(sys.version)
  23. p(datetime.datetime.now())
  24. REPEAT = 1
  25. REPEAT = 3
  26. #REPEAT = 7
  27. if __name__ != "__main__":
  28. raise SystemExit("Must run as main program")
  29. parser = optparse.OptionParser()
  30. parser.add_option("-R", "--skip-re", dest="skip_re",
  31. action="store_true",
  32. help="skip regular expression tests")
  33. parser.add_option("-8", "--8-bit", dest="bytes_only",
  34. action="store_true",
  35. help="only do 8-bit string benchmarks")
  36. parser.add_option("-u", "--unicode", dest="unicode_only",
  37. action="store_true",
  38. help="only do Unicode string benchmarks")
  39. _RANGE_1000 = list(range(1000))
  40. _RANGE_100 = list(range(100))
  41. _RANGE_10 = list(range(10))
  42. dups = {}
  43. def bench(s, group, repeat_count):
  44. def blah(f):
  45. if f.__name__ in dups:
  46. raise AssertionError("Multiple functions with same name: %r" %
  47. (f.__name__,))
  48. dups[f.__name__] = 1
  49. f.comment = s
  50. f.is_bench = True
  51. f.group = group
  52. f.repeat_count = repeat_count
  53. return f
  54. return blah
  55. def uses_re(f):
  56. f.uses_re = True
  57. ####### 'in' comparisons
  58. @bench('"A" in "A"*1000', "early match, single character", 1000)
  59. def in_test_quick_match_single_character(STR):
  60. s1 = STR("A" * 1000)
  61. s2 = STR("A")
  62. for x in _RANGE_1000:
  63. s2 in s1
  64. @bench('"B" in "A"*1000', "no match, single character", 1000)
  65. def in_test_no_match_single_character(STR):
  66. s1 = STR("A" * 1000)
  67. s2 = STR("B")
  68. for x in _RANGE_1000:
  69. s2 in s1
  70. @bench('"AB" in "AB"*1000', "early match, two characters", 1000)
  71. def in_test_quick_match_two_characters(STR):
  72. s1 = STR("AB" * 1000)
  73. s2 = STR("AB")
  74. for x in _RANGE_1000:
  75. s2 in s1
  76. @bench('"BC" in "AB"*1000', "no match, two characters", 1000)
  77. def in_test_no_match_two_character(STR):
  78. s1 = STR("AB" * 1000)
  79. s2 = STR("BC")
  80. for x in _RANGE_1000:
  81. s2 in s1
  82. @bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
  83. def in_test_slow_match_two_characters(STR):
  84. s1 = STR("AB" * 300+"C")
  85. s2 = STR("BC")
  86. for x in _RANGE_1000:
  87. s2 in s1
  88. @bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
  89. "late match, 100 characters", 100)
  90. def in_test_slow_match_100_characters(STR):
  91. m = STR("ABC"*33)
  92. d = STR("D")
  93. e = STR("E")
  94. s1 = (m+d)*300 + m+e
  95. s2 = m+e
  96. for x in _RANGE_100:
  97. s2 in s1
  98. # Try with regex
  99. @uses_re
  100. @bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
  101. "late match, 100 characters", 100)
  102. def re_test_slow_match_100_characters(STR):
  103. m = STR("ABC"*33)
  104. d = STR("D")
  105. e = STR("E")
  106. s1 = (m+d)*300 + m+e
  107. s2 = m+e
  108. pat = re.compile(s2)
  109. search = pat.search
  110. for x in _RANGE_100:
  111. search(s1)
  112. #### same tests as 'in' but use 'find'
  113. @bench('("A"*1000).find("A")', "early match, single character", 1000)
  114. def find_test_quick_match_single_character(STR):
  115. s1 = STR("A" * 1000)
  116. s2 = STR("A")
  117. s1_find = s1.find
  118. for x in _RANGE_1000:
  119. s1_find(s2)
  120. @bench('("A"*1000).find("B")', "no match, single character", 1000)
  121. def find_test_no_match_single_character(STR):
  122. s1 = STR("A" * 1000)
  123. s2 = STR("B")
  124. s1_find = s1.find
  125. for x in _RANGE_1000:
  126. s1_find(s2)
  127. @bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
  128. def find_test_quick_match_two_characters(STR):
  129. s1 = STR("AB" * 1000)
  130. s2 = STR("AB")
  131. s1_find = s1.find
  132. for x in _RANGE_1000:
  133. s1_find(s2)
  134. @bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
  135. def find_test_no_match_two_character(STR):
  136. s1 = STR("AB" * 1000)
  137. s2 = STR("BC")
  138. s1_find = s1.find
  139. for x in _RANGE_1000:
  140. s1_find(s2)
  141. @bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
  142. def find_test_no_match_two_character_bis(STR):
  143. s1 = STR("AB" * 1000)
  144. s2 = STR("CA")
  145. s1_find = s1.find
  146. for x in _RANGE_1000:
  147. s1_find(s2)
  148. @bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
  149. def find_test_slow_match_two_characters(STR):
  150. s1 = STR("AB" * 300+"C")
  151. s2 = STR("BC")
  152. s1_find = s1.find
  153. for x in _RANGE_1000:
  154. s1_find(s2)
  155. @bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
  156. def find_test_slow_match_two_characters_bis(STR):
  157. s1 = STR("AB" * 300+"CA")
  158. s2 = STR("CA")
  159. s1_find = s1.find
  160. for x in _RANGE_1000:
  161. s1_find(s2)
  162. @bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
  163. "late match, 100 characters", 100)
  164. def find_test_slow_match_100_characters(STR):
  165. m = STR("ABC"*33)
  166. d = STR("D")
  167. e = STR("E")
  168. s1 = (m+d)*500 + m+e
  169. s2 = m+e
  170. s1_find = s1.find
  171. for x in _RANGE_100:
  172. s1_find(s2)
  173. @bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
  174. "late match, 100 characters", 100)
  175. def find_test_slow_match_100_characters_bis(STR):
  176. m = STR("ABC"*33)
  177. d = STR("D")
  178. e = STR("E")
  179. s1 = (m+d)*500 + e+m
  180. s2 = e+m
  181. s1_find = s1.find
  182. for x in _RANGE_100:
  183. s1_find(s2)
  184. #### Same tests for 'rfind'
  185. @bench('("A"*1000).rfind("A")', "early match, single character", 1000)
  186. def rfind_test_quick_match_single_character(STR):
  187. s1 = STR("A" * 1000)
  188. s2 = STR("A")
  189. s1_rfind = s1.rfind
  190. for x in _RANGE_1000:
  191. s1_rfind(s2)
  192. @bench('("A"*1000).rfind("B")', "no match, single character", 1000)
  193. def rfind_test_no_match_single_character(STR):
  194. s1 = STR("A" * 1000)
  195. s2 = STR("B")
  196. s1_rfind = s1.rfind
  197. for x in _RANGE_1000:
  198. s1_rfind(s2)
  199. @bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
  200. def rfind_test_quick_match_two_characters(STR):
  201. s1 = STR("AB" * 1000)
  202. s2 = STR("AB")
  203. s1_rfind = s1.rfind
  204. for x in _RANGE_1000:
  205. s1_rfind(s2)
  206. @bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
  207. def rfind_test_no_match_two_character(STR):
  208. s1 = STR("AB" * 1000)
  209. s2 = STR("BC")
  210. s1_rfind = s1.rfind
  211. for x in _RANGE_1000:
  212. s1_rfind(s2)
  213. @bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
  214. def rfind_test_no_match_two_character_bis(STR):
  215. s1 = STR("AB" * 1000)
  216. s2 = STR("CA")
  217. s1_rfind = s1.rfind
  218. for x in _RANGE_1000:
  219. s1_rfind(s2)
  220. @bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
  221. def rfind_test_slow_match_two_characters(STR):
  222. s1 = STR("C" + "AB" * 300)
  223. s2 = STR("CA")
  224. s1_rfind = s1.rfind
  225. for x in _RANGE_1000:
  226. s1_rfind(s2)
  227. @bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
  228. def rfind_test_slow_match_two_characters_bis(STR):
  229. s1 = STR("BC" + "AB" * 300)
  230. s2 = STR("BC")
  231. s1_rfind = s1.rfind
  232. for x in _RANGE_1000:
  233. s1_rfind(s2)
  234. @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
  235. "late match, 100 characters", 100)
  236. def rfind_test_slow_match_100_characters(STR):
  237. m = STR("ABC"*33)
  238. d = STR("D")
  239. e = STR("E")
  240. s1 = e+m + (d+m)*500
  241. s2 = e+m
  242. s1_rfind = s1.rfind
  243. for x in _RANGE_100:
  244. s1_rfind(s2)
  245. @bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
  246. "late match, 100 characters", 100)
  247. def rfind_test_slow_match_100_characters_bis(STR):
  248. m = STR("ABC"*33)
  249. d = STR("D")
  250. e = STR("E")
  251. s1 = m+e + (d+m)*500
  252. s2 = m+e
  253. s1_rfind = s1.rfind
  254. for x in _RANGE_100:
  255. s1_rfind(s2)
  256. #### Now with index.
  257. # Skip the ones which fail because that would include exception overhead.
  258. @bench('("A"*1000).index("A")', "early match, single character", 1000)
  259. def index_test_quick_match_single_character(STR):
  260. s1 = STR("A" * 1000)
  261. s2 = STR("A")
  262. s1_index = s1.index
  263. for x in _RANGE_1000:
  264. s1_index(s2)
  265. @bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
  266. def index_test_quick_match_two_characters(STR):
  267. s1 = STR("AB" * 1000)
  268. s2 = STR("AB")
  269. s1_index = s1.index
  270. for x in _RANGE_1000:
  271. s1_index(s2)
  272. @bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
  273. def index_test_slow_match_two_characters(STR):
  274. s1 = STR("AB" * 300+"C")
  275. s2 = STR("BC")
  276. s1_index = s1.index
  277. for x in _RANGE_1000:
  278. s1_index(s2)
  279. @bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
  280. "late match, 100 characters", 100)
  281. def index_test_slow_match_100_characters(STR):
  282. m = STR("ABC"*33)
  283. d = STR("D")
  284. e = STR("E")
  285. s1 = (m+d)*500 + m+e
  286. s2 = m+e
  287. s1_index = s1.index
  288. for x in _RANGE_100:
  289. s1_index(s2)
  290. #### Same for rindex
  291. @bench('("A"*1000).rindex("A")', "early match, single character", 1000)
  292. def rindex_test_quick_match_single_character(STR):
  293. s1 = STR("A" * 1000)
  294. s2 = STR("A")
  295. s1_rindex = s1.rindex
  296. for x in _RANGE_1000:
  297. s1_rindex(s2)
  298. @bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
  299. def rindex_test_quick_match_two_characters(STR):
  300. s1 = STR("AB" * 1000)
  301. s2 = STR("AB")
  302. s1_rindex = s1.rindex
  303. for x in _RANGE_1000:
  304. s1_rindex(s2)
  305. @bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
  306. def rindex_test_slow_match_two_characters(STR):
  307. s1 = STR("C" + "AB" * 300)
  308. s2 = STR("CA")
  309. s1_rindex = s1.rindex
  310. for x in _RANGE_1000:
  311. s1_rindex(s2)
  312. @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
  313. "late match, 100 characters", 100)
  314. def rindex_test_slow_match_100_characters(STR):
  315. m = STR("ABC"*33)
  316. d = STR("D")
  317. e = STR("E")
  318. s1 = e + m + (d+m)*500
  319. s2 = e + m
  320. s1_rindex = s1.rindex
  321. for x in _RANGE_100:
  322. s1_rindex(s2)
  323. #### Same for partition
  324. @bench('("A"*1000).partition("A")', "early match, single character", 1000)
  325. def partition_test_quick_match_single_character(STR):
  326. s1 = STR("A" * 1000)
  327. s2 = STR("A")
  328. s1_partition = s1.partition
  329. for x in _RANGE_1000:
  330. s1_partition(s2)
  331. @bench('("A"*1000).partition("B")', "no match, single character", 1000)
  332. def partition_test_no_match_single_character(STR):
  333. s1 = STR("A" * 1000)
  334. s2 = STR("B")
  335. s1_partition = s1.partition
  336. for x in _RANGE_1000:
  337. s1_partition(s2)
  338. @bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
  339. def partition_test_quick_match_two_characters(STR):
  340. s1 = STR("AB" * 1000)
  341. s2 = STR("AB")
  342. s1_partition = s1.partition
  343. for x in _RANGE_1000:
  344. s1_partition(s2)
  345. @bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
  346. def partition_test_no_match_two_character(STR):
  347. s1 = STR("AB" * 1000)
  348. s2 = STR("BC")
  349. s1_partition = s1.partition
  350. for x in _RANGE_1000:
  351. s1_partition(s2)
  352. @bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
  353. def partition_test_slow_match_two_characters(STR):
  354. s1 = STR("AB" * 300+"C")
  355. s2 = STR("BC")
  356. s1_partition = s1.partition
  357. for x in _RANGE_1000:
  358. s1_partition(s2)
  359. @bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
  360. "late match, 100 characters", 100)
  361. def partition_test_slow_match_100_characters(STR):
  362. m = STR("ABC"*33)
  363. d = STR("D")
  364. e = STR("E")
  365. s1 = (m+d)*500 + m+e
  366. s2 = m+e
  367. s1_partition = s1.partition
  368. for x in _RANGE_100:
  369. s1_partition(s2)
  370. #### Same for rpartition
  371. @bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
  372. def rpartition_test_quick_match_single_character(STR):
  373. s1 = STR("A" * 1000)
  374. s2 = STR("A")
  375. s1_rpartition = s1.rpartition
  376. for x in _RANGE_1000:
  377. s1_rpartition(s2)
  378. @bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
  379. def rpartition_test_no_match_single_character(STR):
  380. s1 = STR("A" * 1000)
  381. s2 = STR("B")
  382. s1_rpartition = s1.rpartition
  383. for x in _RANGE_1000:
  384. s1_rpartition(s2)
  385. @bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
  386. def rpartition_test_quick_match_two_characters(STR):
  387. s1 = STR("AB" * 1000)
  388. s2 = STR("AB")
  389. s1_rpartition = s1.rpartition
  390. for x in _RANGE_1000:
  391. s1_rpartition(s2)
  392. @bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
  393. def rpartition_test_no_match_two_character(STR):
  394. s1 = STR("AB" * 1000)
  395. s2 = STR("BC")
  396. s1_rpartition = s1.rpartition
  397. for x in _RANGE_1000:
  398. s1_rpartition(s2)
  399. @bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
  400. def rpartition_test_slow_match_two_characters(STR):
  401. s1 = STR("C" + "AB" * 300)
  402. s2 = STR("CA")
  403. s1_rpartition = s1.rpartition
  404. for x in _RANGE_1000:
  405. s1_rpartition(s2)
  406. @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
  407. "late match, 100 characters", 100)
  408. def rpartition_test_slow_match_100_characters(STR):
  409. m = STR("ABC"*33)
  410. d = STR("D")
  411. e = STR("E")
  412. s1 = e + m + (d+m)*500
  413. s2 = e + m
  414. s1_rpartition = s1.rpartition
  415. for x in _RANGE_100:
  416. s1_rpartition(s2)
  417. #### Same for split(s, 1)
  418. @bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
  419. def split_test_quick_match_single_character(STR):
  420. s1 = STR("A" * 1000)
  421. s2 = STR("A")
  422. s1_split = s1.split
  423. for x in _RANGE_1000:
  424. s1_split(s2, 1)
  425. @bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
  426. def split_test_no_match_single_character(STR):
  427. s1 = STR("A" * 1000)
  428. s2 = STR("B")
  429. s1_split = s1.split
  430. for x in _RANGE_1000:
  431. s1_split(s2, 1)
  432. @bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
  433. def split_test_quick_match_two_characters(STR):
  434. s1 = STR("AB" * 1000)
  435. s2 = STR("AB")
  436. s1_split = s1.split
  437. for x in _RANGE_1000:
  438. s1_split(s2, 1)
  439. @bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
  440. def split_test_no_match_two_character(STR):
  441. s1 = STR("AB" * 1000)
  442. s2 = STR("BC")
  443. s1_split = s1.split
  444. for x in _RANGE_1000:
  445. s1_split(s2, 1)
  446. @bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
  447. def split_test_slow_match_two_characters(STR):
  448. s1 = STR("AB" * 300+"C")
  449. s2 = STR("BC")
  450. s1_split = s1.split
  451. for x in _RANGE_1000:
  452. s1_split(s2, 1)
  453. @bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
  454. "late match, 100 characters", 100)
  455. def split_test_slow_match_100_characters(STR):
  456. m = STR("ABC"*33)
  457. d = STR("D")
  458. e = STR("E")
  459. s1 = (m+d)*500 + m+e
  460. s2 = m+e
  461. s1_split = s1.split
  462. for x in _RANGE_100:
  463. s1_split(s2, 1)
  464. #### Same for rsplit(s, 1)
  465. @bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
  466. def rsplit_test_quick_match_single_character(STR):
  467. s1 = STR("A" * 1000)
  468. s2 = STR("A")
  469. s1_rsplit = s1.rsplit
  470. for x in _RANGE_1000:
  471. s1_rsplit(s2, 1)
  472. @bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
  473. def rsplit_test_no_match_single_character(STR):
  474. s1 = STR("A" * 1000)
  475. s2 = STR("B")
  476. s1_rsplit = s1.rsplit
  477. for x in _RANGE_1000:
  478. s1_rsplit(s2, 1)
  479. @bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
  480. def rsplit_test_quick_match_two_characters(STR):
  481. s1 = STR("AB" * 1000)
  482. s2 = STR("AB")
  483. s1_rsplit = s1.rsplit
  484. for x in _RANGE_1000:
  485. s1_rsplit(s2, 1)
  486. @bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
  487. def rsplit_test_no_match_two_character(STR):
  488. s1 = STR("AB" * 1000)
  489. s2 = STR("BC")
  490. s1_rsplit = s1.rsplit
  491. for x in _RANGE_1000:
  492. s1_rsplit(s2, 1)
  493. @bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
  494. def rsplit_test_slow_match_two_characters(STR):
  495. s1 = STR("C" + "AB" * 300)
  496. s2 = STR("CA")
  497. s1_rsplit = s1.rsplit
  498. for x in _RANGE_1000:
  499. s1_rsplit(s2, 1)
  500. @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
  501. "late match, 100 characters", 100)
  502. def rsplit_test_slow_match_100_characters(STR):
  503. m = STR("ABC"*33)
  504. d = STR("D")
  505. e = STR("E")
  506. s1 = e + m + (d+m)*500
  507. s2 = e + m
  508. s1_rsplit = s1.rsplit
  509. for x in _RANGE_100:
  510. s1_rsplit(s2, 1)
  511. #### Benchmark the operator-based methods
  512. @bench('"A"*10', "repeat 1 character 10 times", 1000)
  513. def repeat_single_10_times(STR):
  514. s = STR("A")
  515. for x in _RANGE_1000:
  516. s * 10
  517. @bench('"A"*1000', "repeat 1 character 1000 times", 1000)
  518. def repeat_single_1000_times(STR):
  519. s = STR("A")
  520. for x in _RANGE_1000:
  521. s * 1000
  522. @bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
  523. def repeat_5_10_times(STR):
  524. s = STR("ABCDE")
  525. for x in _RANGE_1000:
  526. s * 10
  527. @bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
  528. def repeat_5_1000_times(STR):
  529. s = STR("ABCDE")
  530. for x in _RANGE_1000:
  531. s * 1000
  532. # + for concat
  533. @bench('"Andrew"+"Dalke"', "concat two strings", 1000)
  534. def concat_two_strings(STR):
  535. s1 = STR("Andrew")
  536. s2 = STR("Dalke")
  537. for x in _RANGE_1000:
  538. s1+s2
  539. @bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
  540. 1000)
  541. def concat_many_strings(STR):
  542. s1=STR('TIXSGYNREDCVBHJ')
  543. s2=STR('PUMTLXBZVDO')
  544. s3=STR('FVZNJ')
  545. s4=STR('OGDXUW')
  546. s5=STR('WEIMRNCOYVGHKB')
  547. s6=STR('FCQTNMXPUZH')
  548. s7=STR('TICZJYRLBNVUEAK')
  549. s8=STR('REYB')
  550. s9=STR('PWUOQ')
  551. s10=STR('EQHCMKBS')
  552. s11=STR('AEVDFOH')
  553. s12=STR('IFHVD')
  554. s13=STR('JGTCNLXWOHQ')
  555. s14=STR('ITSKEPYLROZAWXF')
  556. s15=STR('THEK')
  557. s16=STR('GHPZFBUYCKMNJIT')
  558. s17=STR('JMUZ')
  559. s18=STR('WLZQMTB')
  560. s19=STR('KPADCBW')
  561. s20=STR('TNJHZQAGBU')
  562. for x in _RANGE_1000:
  563. (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
  564. s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
  565. #### Benchmark join
  566. def get_bytes_yielding_seq(STR, arg):
  567. if STR is BYTES and sys.version_info >= (3,):
  568. raise UnsupportedType
  569. return STR(arg)
  570. @bench('"A".join("")',
  571. "join empty string, with 1 character sep", 100)
  572. def join_empty_single(STR):
  573. sep = STR("A")
  574. s2 = get_bytes_yielding_seq(STR, "")
  575. sep_join = sep.join
  576. for x in _RANGE_100:
  577. sep_join(s2)
  578. @bench('"ABCDE".join("")',
  579. "join empty string, with 5 character sep", 100)
  580. def join_empty_5(STR):
  581. sep = STR("ABCDE")
  582. s2 = get_bytes_yielding_seq(STR, "")
  583. sep_join = sep.join
  584. for x in _RANGE_100:
  585. sep_join(s2)
  586. @bench('"A".join("ABC..Z")',
  587. "join string with 26 characters, with 1 character sep", 1000)
  588. def join_alphabet_single(STR):
  589. sep = STR("A")
  590. s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
  591. sep_join = sep.join
  592. for x in _RANGE_1000:
  593. sep_join(s2)
  594. @bench('"ABCDE".join("ABC..Z")',
  595. "join string with 26 characters, with 5 character sep", 1000)
  596. def join_alphabet_5(STR):
  597. sep = STR("ABCDE")
  598. s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
  599. sep_join = sep.join
  600. for x in _RANGE_1000:
  601. sep_join(s2)
  602. @bench('"A".join(list("ABC..Z"))',
  603. "join list of 26 characters, with 1 character sep", 1000)
  604. def join_alphabet_list_single(STR):
  605. sep = STR("A")
  606. s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
  607. sep_join = sep.join
  608. for x in _RANGE_1000:
  609. sep_join(s2)
  610. @bench('"ABCDE".join(list("ABC..Z"))',
  611. "join list of 26 characters, with 5 character sep", 1000)
  612. def join_alphabet_list_five(STR):
  613. sep = STR("ABCDE")
  614. s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
  615. sep_join = sep.join
  616. for x in _RANGE_1000:
  617. sep_join(s2)
  618. @bench('"A".join(["Bob"]*100))',
  619. "join list of 100 words, with 1 character sep", 1000)
  620. def join_100_words_single(STR):
  621. sep = STR("A")
  622. s2 = [STR("Bob")]*100
  623. sep_join = sep.join
  624. for x in _RANGE_1000:
  625. sep_join(s2)
  626. @bench('"ABCDE".join(["Bob"]*100))',
  627. "join list of 100 words, with 5 character sep", 1000)
  628. def join_100_words_5(STR):
  629. sep = STR("ABCDE")
  630. s2 = [STR("Bob")]*100
  631. sep_join = sep.join
  632. for x in _RANGE_1000:
  633. sep_join(s2)
  634. #### split tests
  635. @bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
  636. def whitespace_split(STR):
  637. s = STR("Here are some words. "*2)
  638. s_split = s.split
  639. for x in _RANGE_1000:
  640. s_split()
  641. @bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
  642. def whitespace_rsplit(STR):
  643. s = STR("Here are some words. "*2)
  644. s_rsplit = s.rsplit
  645. for x in _RANGE_1000:
  646. s_rsplit()
  647. @bench('("Here are some words. "*2).split(None, 1)',
  648. "split 1 whitespace", 1000)
  649. def whitespace_split_1(STR):
  650. s = STR("Here are some words. "*2)
  651. s_split = s.split
  652. N = None
  653. for x in _RANGE_1000:
  654. s_split(N, 1)
  655. @bench('("Here are some words. "*2).rsplit(None, 1)',
  656. "split 1 whitespace", 1000)
  657. def whitespace_rsplit_1(STR):
  658. s = STR("Here are some words. "*2)
  659. s_rsplit = s.rsplit
  660. N = None
  661. for x in _RANGE_1000:
  662. s_rsplit(N, 1)
  663. @bench('("Here are some words. "*2).partition(" ")',
  664. "split 1 whitespace", 1000)
  665. def whitespace_partition(STR):
  666. sep = STR(" ")
  667. s = STR("Here are some words. "*2)
  668. s_partition = s.partition
  669. for x in _RANGE_1000:
  670. s_partition(sep)
  671. @bench('("Here are some words. "*2).rpartition(" ")',
  672. "split 1 whitespace", 1000)
  673. def whitespace_rpartition(STR):
  674. sep = STR(" ")
  675. s = STR("Here are some words. "*2)
  676. s_rpartition = s.rpartition
  677. for x in _RANGE_1000:
  678. s_rpartition(sep)
  679. human_text = """\
  680. Python is a dynamic object-oriented programming language that can be
  681. used for many kinds of software development. It offers strong support
  682. for integration with other languages and tools, comes with extensive
  683. standard libraries, and can be learned in a few days. Many Python
  684. programmers report substantial productivity gains and feel the language
  685. encourages the development of higher quality, more maintainable code.
  686. Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm
  687. Handhelds, and Nokia mobile phones. Python has also been ported to the
  688. Java and .NET virtual machines.
  689. Python is distributed under an OSI-approved open source license that
  690. makes it free to use, even for commercial products.
  691. """*25
  692. human_text_bytes = bytes_from_str(human_text)
  693. human_text_unicode = unicode_from_str(human_text)
  694. def _get_human_text(STR):
  695. if STR is UNICODE:
  696. return human_text_unicode
  697. if STR is BYTES:
  698. return human_text_bytes
  699. raise AssertionError
  700. @bench('human_text.split()', "split whitespace (huge)", 10)
  701. def whitespace_split_huge(STR):
  702. s = _get_human_text(STR)
  703. s_split = s.split
  704. for x in _RANGE_10:
  705. s_split()
  706. @bench('human_text.rsplit()', "split whitespace (huge)", 10)
  707. def whitespace_rsplit_huge(STR):
  708. s = _get_human_text(STR)
  709. s_rsplit = s.rsplit
  710. for x in _RANGE_10:
  711. s_rsplit()
  712. @bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
  713. def newlines_split(STR):
  714. s = STR("this\nis\na\ntest\n")
  715. s_split = s.split
  716. nl = STR("\n")
  717. for x in _RANGE_1000:
  718. s_split(nl)
  719. @bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
  720. def newlines_rsplit(STR):
  721. s = STR("this\nis\na\ntest\n")
  722. s_rsplit = s.rsplit
  723. nl = STR("\n")
  724. for x in _RANGE_1000:
  725. s_rsplit(nl)
  726. @bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
  727. def newlines_splitlines(STR):
  728. s = STR("this\nis\na\ntest\n")
  729. s_splitlines = s.splitlines
  730. for x in _RANGE_1000:
  731. s_splitlines()
  732. ## split text with 2000 newlines
  733. def _make_2000_lines():
  734. import random
  735. r = random.Random(100)
  736. chars = list(map(chr, range(32, 128)))
  737. i = 0
  738. while i < len(chars):
  739. chars[i] = " "
  740. i += r.randrange(9)
  741. s = "".join(chars)
  742. s = s*4
  743. words = []
  744. for i in range(2000):
  745. start = r.randrange(96)
  746. n = r.randint(5, 65)
  747. words.append(s[start:start+n])
  748. return "\n".join(words)+"\n"
  749. _text_with_2000_lines = _make_2000_lines()
  750. _text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
  751. _text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
  752. def _get_2000_lines(STR):
  753. if STR is UNICODE:
  754. return _text_with_2000_lines_unicode
  755. if STR is BYTES:
  756. return _text_with_2000_lines_bytes
  757. raise AssertionError
  758. @bench('"...text...".split("\\n")', "split 2000 newlines", 10)
  759. def newlines_split_2000(STR):
  760. s = _get_2000_lines(STR)
  761. s_split = s.split
  762. nl = STR("\n")
  763. for x in _RANGE_10:
  764. s_split(nl)
  765. @bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
  766. def newlines_rsplit_2000(STR):
  767. s = _get_2000_lines(STR)
  768. s_rsplit = s.rsplit
  769. nl = STR("\n")
  770. for x in _RANGE_10:
  771. s_rsplit(nl)
  772. @bench('"...text...".splitlines()', "split 2000 newlines", 10)
  773. def newlines_splitlines_2000(STR):
  774. s = _get_2000_lines(STR)
  775. s_splitlines = s.splitlines
  776. for x in _RANGE_10:
  777. s_splitlines()
  778. ## split text on "--" characters
  779. @bench(
  780. '"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
  781. "split on multicharacter separator (small)", 1000)
  782. def split_multichar_sep_small(STR):
  783. s = STR("this--is--a--test--of--the--emergency--broadcast--system")
  784. s_split = s.split
  785. pat = STR("--")
  786. for x in _RANGE_1000:
  787. s_split(pat)
  788. @bench(
  789. '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
  790. "split on multicharacter separator (small)", 1000)
  791. def rsplit_multichar_sep_small(STR):
  792. s = STR("this--is--a--test--of--the--emergency--broadcast--system")
  793. s_rsplit = s.rsplit
  794. pat = STR("--")
  795. for x in _RANGE_1000:
  796. s_rsplit(pat)
  797. ## split dna text on "ACTAT" characters
  798. @bench('dna.split("ACTAT")',
  799. "split on multicharacter separator (dna)", 10)
  800. def split_multichar_sep_dna(STR):
  801. s = _get_dna(STR)
  802. s_split = s.split
  803. pat = STR("ACTAT")
  804. for x in _RANGE_10:
  805. s_split(pat)
  806. @bench('dna.rsplit("ACTAT")',
  807. "split on multicharacter separator (dna)", 10)
  808. def rsplit_multichar_sep_dna(STR):
  809. s = _get_dna(STR)
  810. s_rsplit = s.rsplit
  811. pat = STR("ACTAT")
  812. for x in _RANGE_10:
  813. s_rsplit(pat)
  814. ## split with limits
  815. GFF3_example = "\t".join([
  816. "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
  817. "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
  818. @bench('GFF3_example.split("\\t")', "tab split", 1000)
  819. def tab_split_no_limit(STR):
  820. sep = STR("\t")
  821. s = STR(GFF3_example)
  822. s_split = s.split
  823. for x in _RANGE_1000:
  824. s_split(sep)
  825. @bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
  826. def tab_split_limit(STR):
  827. sep = STR("\t")
  828. s = STR(GFF3_example)
  829. s_split = s.split
  830. for x in _RANGE_1000:
  831. s_split(sep, 8)
  832. @bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
  833. def tab_rsplit_no_limit(STR):
  834. sep = STR("\t")
  835. s = STR(GFF3_example)
  836. s_rsplit = s.rsplit
  837. for x in _RANGE_1000:
  838. s_rsplit(sep)
  839. @bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
  840. def tab_rsplit_limit(STR):
  841. sep = STR("\t")
  842. s = STR(GFF3_example)
  843. s_rsplit = s.rsplit
  844. for x in _RANGE_1000:
  845. s_rsplit(sep, 8)
  846. #### Count characters
  847. @bench('...text.with.2000.newlines.count("\\n")',
  848. "count newlines", 10)
  849. def count_newlines(STR):
  850. s = _get_2000_lines(STR)
  851. s_count = s.count
  852. nl = STR("\n")
  853. for x in _RANGE_10:
  854. s_count(nl)
  855. # Orchid sequences concatenated, from Biopython
  856. _dna = """
  857. CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
  858. AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
  859. AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
  860. TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
  861. AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
  862. TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
  863. CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
  864. TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
  865. GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
  866. TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
  867. GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
  868. ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
  869. CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
  870. ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
  871. ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
  872. TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
  873. CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
  874. GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
  875. ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
  876. ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
  877. ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
  878. GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
  879. TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
  880. TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
  881. TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
  882. GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
  883. GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
  884. AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
  885. GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
  886. TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
  887. CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
  888. TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
  889. TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
  890. AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
  891. GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
  892. GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
  893. CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
  894. GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
  895. TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
  896. ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
  897. GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
  898. AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
  899. AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
  900. ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
  901. GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
  902. GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
  903. AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
  904. GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
  905. ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
  906. GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
  907. """
  908. _dna = "".join(_dna.splitlines())
  909. _dna = _dna * 25
  910. _dna_bytes = bytes_from_str(_dna)
  911. _dna_unicode = unicode_from_str(_dna)
  912. def _get_dna(STR):
  913. if STR is UNICODE:
  914. return _dna_unicode
  915. if STR is BYTES:
  916. return _dna_bytes
  917. raise AssertionError
  918. @bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
  919. def count_aact(STR):
  920. seq = _get_dna(STR)
  921. seq_count = seq.count
  922. needle = STR("AACT")
  923. for x in _RANGE_10:
  924. seq_count(needle)
  925. ##### startswith and endswith
  926. @bench('"Andrew".startswith("A")', 'startswith single character', 1000)
  927. def startswith_single(STR):
  928. s1 = STR("Andrew")
  929. s2 = STR("A")
  930. s1_startswith = s1.startswith
  931. for x in _RANGE_1000:
  932. s1_startswith(s2)
  933. @bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
  934. 1000)
  935. def startswith_multiple(STR):
  936. s1 = STR("Andrew")
  937. s2 = STR("Andrew")
  938. s1_startswith = s1.startswith
  939. for x in _RANGE_1000:
  940. s1_startswith(s2)
  941. @bench('"Andrew".startswith("Anders")',
  942. 'startswith multiple characters - not!', 1000)
  943. def startswith_multiple_not(STR):
  944. s1 = STR("Andrew")
  945. s2 = STR("Anders")
  946. s1_startswith = s1.startswith
  947. for x in _RANGE_1000:
  948. s1_startswith(s2)
  949. # endswith
  950. @bench('"Andrew".endswith("w")', 'endswith single character', 1000)
  951. def endswith_single(STR):
  952. s1 = STR("Andrew")
  953. s2 = STR("w")
  954. s1_endswith = s1.endswith
  955. for x in _RANGE_1000:
  956. s1_endswith(s2)
  957. @bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
  958. def endswith_multiple(STR):
  959. s1 = STR("Andrew")
  960. s2 = STR("Andrew")
  961. s1_endswith = s1.endswith
  962. for x in _RANGE_1000:
  963. s1_endswith(s2)
  964. @bench('"Andrew".endswith("Anders")',
  965. 'endswith multiple characters - not!', 1000)
  966. def endswith_multiple_not(STR):
  967. s1 = STR("Andrew")
  968. s2 = STR("Anders")
  969. s1_endswith = s1.endswith
  970. for x in _RANGE_1000:
  971. s1_endswith(s2)
  972. #### Strip
  973. @bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
  974. def terminal_newline_strip_right(STR):
  975. s = STR("Hello!\n")
  976. s_strip = s.strip
  977. for x in _RANGE_1000:
  978. s_strip()
  979. @bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
  980. def terminal_newline_rstrip(STR):
  981. s = STR("Hello!\n")
  982. s_rstrip = s.rstrip
  983. for x in _RANGE_1000:
  984. s_rstrip()
  985. @bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
  986. def terminal_newline_strip_left(STR):
  987. s = STR("\nHello!")
  988. s_strip = s.strip
  989. for x in _RANGE_1000:
  990. s_strip()
  991. @bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
  992. def terminal_newline_strip_both(STR):
  993. s = STR("\nHello!\n")
  994. s_strip = s.strip
  995. for x in _RANGE_1000:
  996. s_strip()
  997. @bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
  998. def terminal_newline_lstrip(STR):
  999. s = STR("\nHello!")
  1000. s_lstrip = s.lstrip
  1001. for x in _RANGE_1000:
  1002. s_lstrip()
  1003. @bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
  1004. 'strip terminal newline', 1000)
  1005. def terminal_newline_if_else(STR):
  1006. s = STR("Hello!\n")
  1007. NL = STR("\n")
  1008. for x in _RANGE_1000:
  1009. s[:-1] if (s[-1] == NL) else s
  1010. # Strip multiple spaces or tabs
  1011. @bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000)
  1012. def terminal_space_strip(STR):
  1013. s = STR("Hello\t \t!")
  1014. s_strip = s.strip
  1015. for x in _RANGE_1000:
  1016. s_strip()
  1017. @bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
  1018. def terminal_space_rstrip(STR):
  1019. s = STR("Hello!\t \t")
  1020. s_rstrip = s.rstrip
  1021. for x in _RANGE_1000:
  1022. s_rstrip()
  1023. @bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
  1024. def terminal_space_lstrip(STR):
  1025. s = STR("\t \tHello!")
  1026. s_lstrip = s.lstrip
  1027. for x in _RANGE_1000:
  1028. s_lstrip()
  1029. #### replace
  1030. @bench('"This is a test".replace(" ", "\\t")', 'replace single character',
  1031. 1000)
  1032. def replace_single_character(STR):
  1033. s = STR("This is a test!")
  1034. from_str = STR(" ")
  1035. to_str = STR("\t")
  1036. s_replace = s.replace
  1037. for x in _RANGE_1000:
  1038. s_replace(from_str, to_str)
  1039. @uses_re
  1040. @bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
  1041. 1000)
  1042. def replace_single_character_re(STR):
  1043. s = STR("This is a test!")
  1044. pat = re.compile(STR(" "))
  1045. to_str = STR("\t")
  1046. pat_sub = pat.sub
  1047. for x in _RANGE_1000:
  1048. pat_sub(to_str, s)
  1049. @bench('"...text.with.2000.lines...replace("\\n", " ")',
  1050. 'replace single character, big string', 10)
  1051. def replace_single_character_big(STR):
  1052. s = _get_2000_lines(STR)
  1053. from_str = STR("\n")
  1054. to_str = STR(" ")
  1055. s_replace = s.replace
  1056. for x in _RANGE_10:
  1057. s_replace(from_str, to_str)
  1058. @uses_re
  1059. @bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
  1060. 'replace single character, big string', 10)
  1061. def replace_single_character_big_re(STR):
  1062. s = _get_2000_lines(STR)
  1063. pat = re.compile(STR("\n"))
  1064. to_str = STR(" ")
  1065. pat_sub = pat.sub
  1066. for x in _RANGE_10:
  1067. pat_sub(to_str, s)
  1068. @bench('dna.replace("ATC", "ATT")',
  1069. 'replace multiple characters, dna', 10)
  1070. def replace_multiple_characters_dna(STR):
  1071. seq = _get_dna(STR)
  1072. from_str = STR("ATC")
  1073. to_str = STR("ATT")
  1074. seq_replace = seq.replace
  1075. for x in _RANGE_10:
  1076. seq_replace(from_str, to_str)
  1077. # This increases the character count
  1078. @bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
  1079. 'replace and expand multiple characters, big string', 10)
  1080. def replace_multiple_character_big(STR):
  1081. s = _get_2000_lines(STR)
  1082. from_str = STR("\n")
  1083. to_str = STR("\r\n")
  1084. s_replace = s.replace
  1085. for x in _RANGE_10:
  1086. s_replace(from_str, to_str)
  1087. # This decreases the character count
  1088. @bench('"When shall we three meet again?".replace("ee", "")',
  1089. 'replace/remove multiple characters', 1000)
  1090. def replace_multiple_character_remove(STR):
  1091. s = STR("When shall we three meet again?")
  1092. from_str = STR("ee")
  1093. to_str = STR("")
  1094. s_replace = s.replace
  1095. for x in _RANGE_1000:
  1096. s_replace(from_str, to_str)
  1097. big_s = "A" + ("Z"*128*1024)
  1098. big_s_bytes = bytes_from_str(big_s)
  1099. big_s_unicode = unicode_from_str(big_s)
  1100. def _get_big_s(STR):
  1101. if STR is UNICODE: return big_s_unicode
  1102. if STR is BYTES: return big_s_bytes
  1103. raise AssertionError
  1104. # The older replace implementation counted all matches in
  1105. # the string even when it only needed to make one replacement.
  1106. @bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
  1107. 'quick replace single character match', 10)
  1108. def quick_replace_single_match(STR):
  1109. s = _get_big_s(STR)
  1110. from_str = STR("A")
  1111. to_str = STR("BB")
  1112. s_replace = s.replace
  1113. for x in _RANGE_10:
  1114. s_replace(from_str, to_str, 1)
  1115. @bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
  1116. 'quick replace multiple character match', 10)
  1117. def quick_replace_multiple_match(STR):
  1118. s = _get_big_s(STR)
  1119. from_str = STR("AZZ")
  1120. to_str = STR("BBZZ")
  1121. s_replace = s.replace
  1122. for x in _RANGE_10:
  1123. s_replace(from_str, to_str, 1)
  1124. ####
  1125. # CCP does a lot of this, for internationalisation of ingame messages.
  1126. _format = "The %(thing)s is %(place)s the %(location)s."
  1127. _format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
  1128. _format_bytes = bytes_from_str(_format)
  1129. _format_unicode = unicode_from_str(_format)
  1130. _format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
  1131. _format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
  1132. def _get_format(STR):
  1133. if STR is UNICODE:
  1134. return _format_unicode
  1135. if STR is BYTES:
  1136. if sys.version_info >= (3,):
  1137. raise UnsupportedType
  1138. return _format_bytes
  1139. raise AssertionError
  1140. def _get_format_dict(STR):
  1141. if STR is UNICODE:
  1142. return _format_dict_unicode
  1143. if STR is BYTES:
  1144. if sys.version_info >= (3,):
  1145. raise UnsupportedType
  1146. return _format_dict_bytes
  1147. raise AssertionError
  1148. # Formatting.
  1149. @bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
  1150. 'formatting a string type with a dict', 1000)
  1151. def format_with_dict(STR):
  1152. s = _get_format(STR)
  1153. d = _get_format_dict(STR)
  1154. for x in _RANGE_1000:
  1155. s % d
  1156. #### Upper- and lower- case conversion
  1157. @bench('("Where in the world is Carmen San Deigo?"*10).lower()',
  1158. "case conversion -- rare", 1000)
  1159. def lower_conversion_rare(STR):
  1160. s = STR("Where in the world is Carmen San Deigo?"*10)
  1161. s_lower = s.lower
  1162. for x in _RANGE_1000:
  1163. s_lower()
  1164. @bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
  1165. "case conversion -- dense", 1000)
  1166. def lower_conversion_dense(STR):
  1167. s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
  1168. s_lower = s.lower
  1169. for x in _RANGE_1000:
  1170. s_lower()
  1171. @bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
  1172. "case conversion -- rare", 1000)
  1173. def upper_conversion_rare(STR):
  1174. s = STR("Where in the world is Carmen San Deigo?"*10)
  1175. s_upper = s.upper
  1176. for x in _RANGE_1000:
  1177. s_upper()
  1178. @bench('("where in the world is carmen san deigo?"*10).upper()',
  1179. "case conversion -- dense", 1000)
  1180. def upper_conversion_dense(STR):
  1181. s = STR("where in the world is carmen san deigo?"*10)
  1182. s_upper = s.upper
  1183. for x in _RANGE_1000:
  1184. s_upper()
  1185. # end of benchmarks
  1186. #################
  1187. class BenchTimer(timeit.Timer):
  1188. def best(self, repeat=1):
  1189. for i in range(1, 10):
  1190. number = 10**i
  1191. x = self.timeit(number)
  1192. if x > 0.02:
  1193. break
  1194. times = [x]
  1195. for i in range(1, repeat):
  1196. times.append(self.timeit(number))
  1197. return min(times) / number
  1198. def main():
  1199. (options, test_names) = parser.parse_args()
  1200. if options.bytes_only and options.unicode_only:
  1201. raise SystemExit("Only one of --8-bit and --unicode are allowed")
  1202. bench_functions = []
  1203. for (k,v) in globals().items():
  1204. if hasattr(v, "is_bench"):
  1205. if test_names:
  1206. for name in test_names:
  1207. if name in v.group:
  1208. break
  1209. else:
  1210. # Not selected, ignore
  1211. continue
  1212. if options.skip_re and hasattr(v, "uses_re"):
  1213. continue
  1214. bench_functions.append( (v.group, k, v) )
  1215. bench_functions.sort()
  1216. p("bytes\tunicode")
  1217. p("(in ms)\t(in ms)\t%\tcomment")
  1218. bytes_total = uni_total = 0.0
  1219. for title, group in itertools.groupby(bench_functions,
  1220. operator.itemgetter(0)):
  1221. # Flush buffer before each group
  1222. sys.stdout.flush()
  1223. p("="*10, title)
  1224. for (_, k, v) in group:
  1225. if hasattr(v, "is_bench"):
  1226. bytes_time = 0.0
  1227. bytes_time_s = " - "
  1228. if not options.unicode_only:
  1229. try:
  1230. bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
  1231. "import __main__").best(REPEAT)
  1232. bytes_time_s = "%.2f" % (1000 * bytes_time)
  1233. bytes_total += bytes_time
  1234. except UnsupportedType:
  1235. bytes_time_s = "N/A"
  1236. uni_time = 0.0
  1237. uni_time_s = " - "
  1238. if not options.bytes_only:
  1239. try:
  1240. uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
  1241. "import __main__").best(REPEAT)
  1242. uni_time_s = "%.2f" % (1000 * uni_time)
  1243. uni_total += uni_time
  1244. except UnsupportedType:
  1245. uni_time_s = "N/A"
  1246. try:
  1247. average = bytes_time/uni_time
  1248. except (TypeError, ZeroDivisionError):
  1249. average = 0.0
  1250. p("%s\t%s\t%.1f\t%s (*%d)" % (
  1251. bytes_time_s, uni_time_s, 100.*average,
  1252. v.comment, v.repeat_count))
  1253. if bytes_total == uni_total == 0.0:
  1254. p("That was zippy!")
  1255. else:
  1256. try:
  1257. ratio = bytes_total/uni_total
  1258. except ZeroDivisionError:
  1259. ratio = 0.0
  1260. p("%.2f\t%.2f\t%.1f\t%s" % (
  1261. 1000*bytes_total, 1000*uni_total, 100.*ratio,
  1262. "TOTAL"))
  1263. if __name__ == "__main__":
  1264. main()