PageRenderTime 51ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/2.7/test/re_tests.py

https://bitbucket.org/dac_io/pypy
Python | 674 lines | 621 code | 17 blank | 36 comment | 2 complexity | 5bf589bc73bdf2d337027ba7fc5394d5 MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- mode: python -*-
  3. # Re test suite and benchmark suite v1.5
  4. # The 3 possible outcomes for each pattern
  5. [SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
  6. # Benchmark suite (needs expansion)
  7. #
  8. # The benchmark suite does not test correctness, just speed. The
  9. # first element of each tuple is the regex pattern; the second is a
  10. # string to match it against. The benchmarking code will embed the
  11. # second string inside several sizes of padding, to test how regex
  12. # matching performs on large strings.
  13. benchmarks = [
  14. # test common prefix
  15. ('Python|Perl', 'Perl'), # Alternation
  16. ('(Python|Perl)', 'Perl'), # Grouped alternation
  17. ('Python|Perl|Tcl', 'Perl'), # Alternation
  18. ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation
  19. ('(Python)\\1', 'PythonPython'), # Backreference
  20. ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
  21. ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
  22. ('Python', 'Python'), # Simple text literal
  23. ('.*Python', 'Python'), # Bad text literal
  24. ('.*Python.*', 'Python'), # Worse text literal
  25. ('.*(Python)', 'Python'), # Bad text literal with grouping
  26. ]
  27. # Test suite (for verifying correctness)
  28. #
  29. # The test suite is a list of 5- or 3-tuples. The 5 parts of a
  30. # complete tuple are:
  31. # element 0: a string containing the pattern
  32. # 1: the string to match against the pattern
  33. # 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
  34. # 3: a string that will be eval()'ed to produce a test string.
  35. # This is an arbitrary Python expression; the available
  36. # variables are "found" (the whole match), and "g1", "g2", ...
  37. # up to "g99" contain the contents of each group, or the
  38. # string 'None' if the group wasn't given a value, or the
  39. # string 'Error' if the group index was out of range;
  40. # also "groups", the return value of m.group() (a tuple).
  41. # 4: The expected result of evaluating the expression.
  42. # If the two don't match, an error is reported.
  43. #
  44. # If the regex isn't expected to work, the latter two elements can be omitted.
  45. tests = [
  46. # Test ?P< and ?P= extensions
  47. ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier
  48. ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit
  49. ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
  50. ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
  51. # Same tests, for the ?P= form
  52. ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
  53. ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
  54. ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
  55. ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group
  56. ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
  57. ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
  58. # Test octal escapes
  59. ('\\1', 'a', SYNTAX_ERROR), # Backreference
  60. ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character
  61. ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
  62. ('\\141', 'a', SUCCEED, 'found', 'a'),
  63. ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
  64. # Test \0 is handled everywhere
  65. (r'\0', '\0', SUCCEED, 'found', '\0'),
  66. (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
  67. (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
  68. (r'[^a\0]', '\0', FAIL),
  69. # Test various letter escapes
  70. (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
  71. (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
  72. # NOTE: not an error under PCRE/PRE:
  73. # (r'\u', '', SYNTAX_ERROR), # A Perl escape
  74. (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
  75. (r'\xff', '\377', SUCCEED, 'found', chr(255)),
  76. # new \x semantics
  77. (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
  78. (r'\x00f', '\017', FAIL, 'found', chr(15)),
  79. (r'\x00fe', '\376', FAIL, 'found', chr(254)),
  80. # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
  81. # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
  82. # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
  83. (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
  84. SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
  85. # Test that . only matches \n in DOTALL mode
  86. ('a.b', 'acb', SUCCEED, 'found', 'acb'),
  87. ('a.b', 'a\nb', FAIL),
  88. ('a.*b', 'acc\nccb', FAIL),
  89. ('a.{4,5}b', 'acc\nccb', FAIL),
  90. ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
  91. ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
  92. ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
  93. ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
  94. ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
  95. (')', '', SYNTAX_ERROR), # Unmatched right bracket
  96. ('', '', SUCCEED, 'found', ''), # Empty pattern
  97. ('abc', 'abc', SUCCEED, 'found', 'abc'),
  98. ('abc', 'xbc', FAIL),
  99. ('abc', 'axc', FAIL),
  100. ('abc', 'abx', FAIL),
  101. ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
  102. ('abc', 'ababc', SUCCEED, 'found', 'abc'),
  103. ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
  104. ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
  105. ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
  106. ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  107. ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
  108. ('ab+bc', 'abc', FAIL),
  109. ('ab+bc', 'abq', FAIL),
  110. ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  111. ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
  112. ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
  113. ('ab?bc', 'abbbbc', FAIL),
  114. ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
  115. ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
  116. ('^abc$', 'abcc', FAIL),
  117. ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
  118. ('^abc$', 'aabc', FAIL),
  119. ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
  120. ('^', 'abc', SUCCEED, 'found+"-"', '-'),
  121. ('$', 'abc', SUCCEED, 'found+"-"', '-'),
  122. ('a.c', 'abc', SUCCEED, 'found', 'abc'),
  123. ('a.c', 'axc', SUCCEED, 'found', 'axc'),
  124. ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
  125. ('a.*c', 'axyzd', FAIL),
  126. ('a[bc]d', 'abc', FAIL),
  127. ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
  128. ('a[b-d]e', 'abd', FAIL),
  129. ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
  130. ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
  131. ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
  132. ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
  133. # NOTE: not an error under PCRE/PRE:
  134. # ('a[b-]', 'a-', SYNTAX_ERROR),
  135. ('a[]b', '-', SYNTAX_ERROR),
  136. ('a[', '-', SYNTAX_ERROR),
  137. ('a\\', '-', SYNTAX_ERROR),
  138. ('abc)', '-', SYNTAX_ERROR),
  139. ('(abc', '-', SYNTAX_ERROR),
  140. ('a]', 'a]', SUCCEED, 'found', 'a]'),
  141. ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
  142. ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
  143. ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
  144. ('a[^bc]d', 'abd', FAIL),
  145. ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
  146. ('a[^-b]c', 'a-c', FAIL),
  147. ('a[^]b]c', 'a]c', FAIL),
  148. ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
  149. ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
  150. ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
  151. ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
  152. ('\\by\\b', 'xy', FAIL),
  153. ('\\by\\b', 'yz', FAIL),
  154. ('\\by\\b', 'xyz', FAIL),
  155. ('x\\b', 'xyz', FAIL),
  156. ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
  157. ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
  158. ('z\\B', 'xyz', FAIL),
  159. ('\\Bx', 'xyz', FAIL),
  160. ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
  161. ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
  162. ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
  163. ('\\By\\B', 'xy', FAIL),
  164. ('\\By\\B', 'yz', FAIL),
  165. ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
  166. ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
  167. ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
  168. ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
  169. ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
  170. ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
  171. ('$b', 'b', FAIL),
  172. ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
  173. ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
  174. ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
  175. ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
  176. ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
  177. ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
  178. ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
  179. ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
  180. ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
  181. ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
  182. (')(', '-', SYNTAX_ERROR),
  183. ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
  184. ('abc', '', FAIL),
  185. ('a*', '', SUCCEED, 'found', ''),
  186. ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
  187. ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
  188. ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
  189. ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
  190. ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
  191. ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
  192. ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
  193. ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
  194. ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
  195. ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
  196. ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
  197. ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
  198. ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
  199. ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
  200. ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
  201. ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
  202. ('a[bcd]+dcdcde', 'adcdcde', FAIL),
  203. ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
  204. ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
  205. ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
  206. ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
  207. ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
  208. ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
  209. ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
  210. ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
  211. ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
  212. ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
  213. ('multiple words of text', 'uh-uh', FAIL),
  214. ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
  215. ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
  216. ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
  217. ('[k]', 'ab', FAIL),
  218. ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
  219. ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
  220. ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
  221. ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
  222. ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
  223. ('^(a+).\\1$', 'aaaa', FAIL),
  224. ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
  225. ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
  226. ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
  227. ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
  228. ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
  229. ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
  230. ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
  231. ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
  232. ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
  233. ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
  234. ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
  235. ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
  236. ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
  237. ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
  238. ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
  239. ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
  240. ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
  241. ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
  242. ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
  243. ('([abc]*)x', 'abc', FAIL),
  244. ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
  245. ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
  246. # Test symbolic groups
  247. ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
  248. ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
  249. ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
  250. ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
  251. # Test octal escapes/memory references
  252. ('\\1', 'a', SYNTAX_ERROR),
  253. ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
  254. ('\\141', 'a', SUCCEED, 'found', 'a'),
  255. ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
  256. # All tests from Perl
  257. ('abc', 'abc', SUCCEED, 'found', 'abc'),
  258. ('abc', 'xbc', FAIL),
  259. ('abc', 'axc', FAIL),
  260. ('abc', 'abx', FAIL),
  261. ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
  262. ('abc', 'ababc', SUCCEED, 'found', 'abc'),
  263. ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
  264. ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
  265. ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
  266. ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  267. ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  268. ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
  269. ('ab+bc', 'abc', FAIL),
  270. ('ab+bc', 'abq', FAIL),
  271. ('ab{1,}bc', 'abq', FAIL),
  272. ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  273. ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  274. ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  275. ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
  276. ('ab{4,5}bc', 'abbbbc', FAIL),
  277. ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
  278. ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
  279. ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
  280. ('ab?bc', 'abbbbc', FAIL),
  281. ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
  282. ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
  283. ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
  284. ('^abc$', 'abcc', FAIL),
  285. ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
  286. ('^abc$', 'aabc', FAIL),
  287. ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
  288. ('^', 'abc', SUCCEED, 'found', ''),
  289. ('$', 'abc', SUCCEED, 'found', ''),
  290. ('a.c', 'abc', SUCCEED, 'found', 'abc'),
  291. ('a.c', 'axc', SUCCEED, 'found', 'axc'),
  292. ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
  293. ('a.*c', 'axyzd', FAIL),
  294. ('a[bc]d', 'abc', FAIL),
  295. ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
  296. ('a[b-d]e', 'abd', FAIL),
  297. ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
  298. ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
  299. ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
  300. ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
  301. ('a[b-a]', '-', SYNTAX_ERROR),
  302. ('a[]b', '-', SYNTAX_ERROR),
  303. ('a[', '-', SYNTAX_ERROR),
  304. ('a]', 'a]', SUCCEED, 'found', 'a]'),
  305. ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
  306. ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
  307. ('a[^bc]d', 'abd', FAIL),
  308. ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
  309. ('a[^-b]c', 'a-c', FAIL),
  310. ('a[^]b]c', 'a]c', FAIL),
  311. ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
  312. ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
  313. ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
  314. ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
  315. ('*a', '-', SYNTAX_ERROR),
  316. ('(*)b', '-', SYNTAX_ERROR),
  317. ('$b', 'b', FAIL),
  318. ('a\\', '-', SYNTAX_ERROR),
  319. ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
  320. ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
  321. ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
  322. ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
  323. ('abc)', '-', SYNTAX_ERROR),
  324. ('(abc', '-', SYNTAX_ERROR),
  325. ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
  326. ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
  327. ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
  328. ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
  329. ('a**', '-', SYNTAX_ERROR),
  330. ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
  331. ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
  332. ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
  333. ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
  334. ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
  335. ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
  336. ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
  337. (')(', '-', SYNTAX_ERROR),
  338. ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
  339. ('abc', '', FAIL),
  340. ('a*', '', SUCCEED, 'found', ''),
  341. ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
  342. ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
  343. ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
  344. ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
  345. ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
  346. ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
  347. ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
  348. ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
  349. ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
  350. ('^(ab|cd)e', 'abcde', FAIL),
  351. ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
  352. ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
  353. ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
  354. ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
  355. ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
  356. ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
  357. ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
  358. ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
  359. ('a[bcd]+dcdcde', 'adcdcde', FAIL),
  360. ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
  361. ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
  362. ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
  363. ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
  364. ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
  365. ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
  366. ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
  367. ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
  368. ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
  369. ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
  370. ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
  371. # Python does not have the same rules for \\41 so this is a syntax error
  372. # ('((((((((((a))))))))))\\41', 'aa', FAIL),
  373. # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
  374. ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
  375. ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
  376. ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
  377. ('multiple words of text', 'uh-uh', FAIL),
  378. ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
  379. ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
  380. ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
  381. ('[k]', 'ab', FAIL),
  382. ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
  383. ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
  384. ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
  385. ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
  386. ('(?i)abc', 'XBC', FAIL),
  387. ('(?i)abc', 'AXC', FAIL),
  388. ('(?i)abc', 'ABX', FAIL),
  389. ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
  390. ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
  391. ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
  392. ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
  393. ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
  394. ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
  395. ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
  396. ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
  397. ('(?i)ab+bc', 'ABC', FAIL),
  398. ('(?i)ab+bc', 'ABQ', FAIL),
  399. ('(?i)ab{1,}bc', 'ABQ', FAIL),
  400. ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
  401. ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
  402. ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
  403. ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
  404. ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
  405. ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
  406. ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
  407. ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
  408. ('(?i)ab??bc', 'ABBBBC', FAIL),
  409. ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
  410. ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
  411. ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
  412. ('(?i)^abc$', 'ABCC', FAIL),
  413. ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
  414. ('(?i)^abc$', 'AABC', FAIL),
  415. ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
  416. ('(?i)^', 'ABC', SUCCEED, 'found', ''),
  417. ('(?i)$', 'ABC', SUCCEED, 'found', ''),
  418. ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
  419. ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
  420. ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
  421. ('(?i)a.*c', 'AXYZD', FAIL),
  422. ('(?i)a[bc]d', 'ABC', FAIL),
  423. ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
  424. ('(?i)a[b-d]e', 'ABD', FAIL),
  425. ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
  426. ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
  427. ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
  428. ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
  429. ('(?i)a[b-a]', '-', SYNTAX_ERROR),
  430. ('(?i)a[]b', '-', SYNTAX_ERROR),
  431. ('(?i)a[', '-', SYNTAX_ERROR),
  432. ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
  433. ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
  434. ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
  435. ('(?i)a[^bc]d', 'ABD', FAIL),
  436. ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
  437. ('(?i)a[^-b]c', 'A-C', FAIL),
  438. ('(?i)a[^]b]c', 'A]C', FAIL),
  439. ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
  440. ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
  441. ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
  442. ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
  443. ('(?i)*a', '-', SYNTAX_ERROR),
  444. ('(?i)(*)b', '-', SYNTAX_ERROR),
  445. ('(?i)$b', 'B', FAIL),
  446. ('(?i)a\\', '-', SYNTAX_ERROR),
  447. ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
  448. ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
  449. ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
  450. ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
  451. ('(?i)abc)', '-', SYNTAX_ERROR),
  452. ('(?i)(abc', '-', SYNTAX_ERROR),
  453. ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
  454. ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
  455. ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
  456. ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
  457. ('(?i)a**', '-', SYNTAX_ERROR),
  458. ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
  459. ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
  460. ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
  461. ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
  462. ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
  463. ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
  464. ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
  465. ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
  466. ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
  467. ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
  468. ('(?i))(', '-', SYNTAX_ERROR),
  469. ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
  470. ('(?i)abc', '', FAIL),
  471. ('(?i)a*', '', SUCCEED, 'found', ''),
  472. ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
  473. ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
  474. ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
  475. ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
  476. ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
  477. ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
  478. ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
  479. ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
  480. ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
  481. ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
  482. ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
  483. ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
  484. ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
  485. ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
  486. ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
  487. ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
  488. ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
  489. ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
  490. ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
  491. ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
  492. ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
  493. ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
  494. ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
  495. ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
  496. ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
  497. ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
  498. ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
  499. ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
  500. ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
  501. ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
  502. #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
  503. #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
  504. ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
  505. ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
  506. ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
  507. ('(?i)multiple words of text', 'UH-UH', FAIL),
  508. ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
  509. ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
  510. ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
  511. ('(?i)[k]', 'AB', FAIL),
  512. # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
  513. # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
  514. ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
  515. ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
  516. ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
  517. ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
  518. ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
  519. ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
  520. ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
  521. ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
  522. ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
  523. ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
  524. ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
  525. # lookbehind: split by : but not if it is escaped by -.
  526. ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
  527. # escaping with \ as we know it
  528. ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
  529. # terminating with ' and escaping with ? as in edifact
  530. ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
  531. # Comments using the (?#...) syntax
  532. ('w(?# comment', 'w', SYNTAX_ERROR),
  533. ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
  534. # Check odd placement of embedded pattern modifiers
  535. # not an error under PCRE/PRE:
  536. ('w(?i)', 'W', SUCCEED, 'found', 'W'),
  537. # ('w(?i)', 'W', SYNTAX_ERROR),
  538. # Comments using the x embedded pattern modifier
  539. ("""(?x)w# comment 1
  540. x y
  541. # comment 2
  542. z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
  543. # using the m embedded pattern modifier
  544. ('^abc', """jkl
  545. abc
  546. xyz""", FAIL),
  547. ('(?m)^abc', """jkl
  548. abc
  549. xyz""", SUCCEED, 'found', 'abc'),
  550. ('(?m)abc$', """jkl
  551. xyzabc
  552. 123""", SUCCEED, 'found', 'abc'),
  553. # using the s embedded pattern modifier
  554. ('a.b', 'a\nb', FAIL),
  555. ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
  556. # test \w, etc. both inside and outside character classes
  557. ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
  558. ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
  559. ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
  560. ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
  561. ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
  562. # not an error under PCRE/PRE:
  563. # ('[\\d-x]', '-', SYNTAX_ERROR),
  564. (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
  565. (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
  566. (r'\xff', '\377', SUCCEED, 'found', chr(255)),
  567. # new \x semantics
  568. (r'\x00ff', '\377', FAIL),
  569. # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
  570. (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
  571. ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
  572. (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
  573. (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
  574. #
  575. # post-1.5.2 additions
  576. # xmllib problem
  577. (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
  578. # bug 110866: reference to undefined group
  579. (r'((.)\1+)', '', SYNTAX_ERROR),
  580. # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
  581. (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
  582. # bug 112468: various expected syntax errors
  583. (r'(', '', SYNTAX_ERROR),
  584. (r'[\41]', '!', SUCCEED, 'found', '!'),
  585. # bug 114033: nothing to repeat
  586. (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
  587. # bug 115040: rescan if flags are modified inside pattern
  588. (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
  589. # bug 115618: negative lookahead
  590. (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
  591. # bug 116251: character class bug
  592. (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
  593. # bug 123769+127259: non-greedy backtracking bug
  594. (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
  595. (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
  596. (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
  597. # bug 127259: \Z shouldn't depend on multiline mode
  598. (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
  599. # bug 128899: uppercase literals under the ignorecase flag
  600. (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
  601. (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
  602. (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
  603. (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
  604. # bug 130748: ^* should be an error (nothing to repeat)
  605. (r'^*', '', SYNTAX_ERROR),
  606. # bug 133283: minimizing repeat problem
  607. (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
  608. # bug 477728: minimizing repeat problem
  609. (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
  610. # bug 483789: minimizing repeat problem
  611. (r'a[^>]*?b', 'a>b', FAIL),
  612. # bug 490573: minimizing repeat problem
  613. (r'^a*?$', 'foo', FAIL),
  614. # bug 470582: nested groups problem
  615. (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
  616. # another minimizing repeat problem (capturing groups in assertions)
  617. ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
  618. ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
  619. ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
  620. ]
  621. try:
  622. u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
  623. except SyntaxError:
  624. pass
  625. else:
  626. tests.extend([
  627. # bug 410271: \b broken under locales
  628. (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
  629. (r'(?u)\b.\b', u, SUCCEED, 'found', u),
  630. (r'(?u)\w', u, SUCCEED, 'found', u),
  631. ])