PageRenderTime 46ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/unladen_swallow/performance/bm_regex_effbot.py

https://bitbucket.org/csenger/benchmarks
Python | 147 lines | 115 code | 7 blank | 25 comment | 3 complexity | 066d34346351a21c0f57e95e5eb2ebb0 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, GPL-2.0
  1. #!/usr/bin/env python
  2. """Benchmarks for Python's regex engine.
  3. These are some of the original benchmarks used to tune Python's regex engine
  4. in 2000 written by Fredrik Lundh. Retreived from
  5. http://mail.python.org/pipermail/python-dev/2000-August/007797.html and
  6. integrated into Unladen Swallow's perf.py in 2009 by David Laing.
  7. These benchmarks are of interest since they helped to guide the original
  8. optimization of the sre engine, and we shouldn't necessarily ignore them just
  9. because they're "old".
  10. """
  11. # Python imports
  12. import optparse
  13. import re
  14. import time
  15. # Local imports
  16. import util
  17. # These are the regular expressions to be tested. These sync up,
  18. # index-for-index with the list of strings generated by gen_string_table()
  19. # below.
  20. regexs = [
  21. re.compile('Python|Perl'),
  22. re.compile('Python|Perl'),
  23. re.compile('(Python|Perl)'),
  24. re.compile('(?:Python|Perl)'),
  25. re.compile('Python'),
  26. re.compile('Python'),
  27. re.compile('.*Python'),
  28. re.compile('.*Python.*'),
  29. re.compile('.*(Python)'),
  30. re.compile('.*(?:Python)'),
  31. re.compile('Python|Perl|Tcl'),
  32. re.compile('Python|Perl|Tcl'),
  33. re.compile('(Python|Perl|Tcl)'),
  34. re.compile('(?:Python|Perl|Tcl)'),
  35. re.compile('(Python)\\1'),
  36. re.compile('(Python)\\1'),
  37. re.compile('([0a-z][a-z0-9]*,)+'),
  38. re.compile('(?:[0a-z][a-z0-9]*,)+'),
  39. re.compile('([a-z][a-z0-9]*,)+'),
  40. re.compile('(?:[a-z][a-z0-9]*,)+'),
  41. re.compile('.*P.*y.*t.*h.*o.*n.*')]
  42. def gen_string_table(n):
  43. """Generates the list of strings that will be used in the benchmarks.
  44. All strings have repeated prefixes and suffices, and n specifies the
  45. number of repetitions.
  46. """
  47. strings = []
  48. strings.append('-'*n+'Perl'+'-'*n)
  49. strings.append('P'*n+'Perl'+'P'*n)
  50. strings.append('-'*n+'Perl'+'-'*n)
  51. strings.append('-'*n+'Perl'+'-'*n)
  52. strings.append('-'*n+'Python'+'-'*n)
  53. strings.append('P'*n+'Python'+'P'*n)
  54. strings.append('-'*n+'Python'+'-'*n)
  55. strings.append('-'*n+'Python'+'-'*n)
  56. strings.append('-'*n+'Python'+'-'*n)
  57. strings.append('-'*n+'Python'+'-'*n)
  58. strings.append('-'*n+'Perl'+'-'*n)
  59. strings.append('P'*n+'Perl'+'P'*n)
  60. strings.append('-'*n+'Perl'+'-'*n)
  61. strings.append('-'*n+'Perl'+'-'*n)
  62. strings.append('-'*n+'PythonPython'+'-'*n)
  63. strings.append('P'*n+'PythonPython'+'P'*n)
  64. strings.append('-'*n+'a5,b7,c9,'+'-'*n)
  65. strings.append('-'*n+'a5,b7,c9,'+'-'*n)
  66. strings.append('-'*n+'a5,b7,c9,'+'-'*n)
  67. strings.append('-'*n+'a5,b7,c9,'+'-'*n)
  68. strings.append('-'*n+'Python'+'-'*n)
  69. return strings
  70. # A cache for the generated strings.
  71. string_tables = {}
  72. def init_benchmarks(n_values=None):
  73. """Initialize the strings we'll run the regexes against.
  74. The strings used in the benchmark are prefixed and suffixed by
  75. strings that are repeated n times.
  76. The sequence n_values contains the values for n.
  77. If n_values is None the values of n from the original benchmark
  78. are used.
  79. The generated list of strings is cached in the string_tables
  80. variable, which is indexed by n.
  81. Returns:
  82. A list of string prefix/suffix lengths.
  83. """
  84. if n_values is None:
  85. n_values = [0, 5, 50, 250, 1000, 5000, 10000]
  86. for n in n_values:
  87. string_tables[n] = gen_string_table(n)
  88. return n_values
  89. def run_benchmarks(n):
  90. """Runs all of the benchmarks for a given value of n."""
  91. for id in xrange(len(regexs)):
  92. re.search(regexs[id], string_tables[n][id])
  93. re.search(regexs[id], string_tables[n][id])
  94. re.search(regexs[id], string_tables[n][id])
  95. re.search(regexs[id], string_tables[n][id])
  96. re.search(regexs[id], string_tables[n][id])
  97. re.search(regexs[id], string_tables[n][id])
  98. re.search(regexs[id], string_tables[n][id])
  99. re.search(regexs[id], string_tables[n][id])
  100. re.search(regexs[id], string_tables[n][id])
  101. re.search(regexs[id], string_tables[n][id])
  102. def test_regex_effbot(iterations):
  103. sizes = init_benchmarks()
  104. # Warm up.
  105. for size in sizes:
  106. run_benchmarks(size)
  107. times = []
  108. for i in xrange(iterations):
  109. t0 = time.time()
  110. for size in sizes:
  111. run_benchmarks(size)
  112. t1 = time.time()
  113. times.append(t1 - t0)
  114. return times
  115. if __name__ == '__main__':
  116. parser = optparse.OptionParser(
  117. usage="%prog [options]",
  118. description=("Test the performance of regexps using Fredik Lundh's "
  119. "benchmarks."))
  120. util.add_standard_options_to(parser)
  121. options, args = parser.parse_args()
  122. util.run_benchmark(options, options.num_runs, test_regex_effbot)