PageRenderTime 830ms CodeModel.GetById 151ms app.highlight 480ms RepoModel.GetById 118ms app.codeStats 1ms

/Lib/test/re_tests.py

http://unladen-swallow.googlecode.com/
Python | 674 lines | 621 code | 17 blank | 36 comment | 2 complexity | 5bf589bc73bdf2d337027ba7fc5394d5 MD5 | raw file
  1#!/usr/bin/env python
  2# -*- mode: python -*-
  3
  4# Re test suite and benchmark suite v1.5
  5
  6# The 3 possible outcomes for each pattern
  7[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
  8
  9# Benchmark suite (needs expansion)
 10#
 11# The benchmark suite does not test correctness, just speed.  The
 12# first element of each tuple is the regex pattern; the second is a
 13# string to match it against.  The benchmarking code will embed the
 14# second string inside several sizes of padding, to test how regex
 15# matching performs on large strings.
 16
 17benchmarks = [
 18
 19    # test common prefix
 20    ('Python|Perl', 'Perl'),    # Alternation
 21    ('(Python|Perl)', 'Perl'),  # Grouped alternation
 22
 23    ('Python|Perl|Tcl', 'Perl'),        # Alternation
 24    ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
 25
 26    ('(Python)\\1', 'PythonPython'),    # Backreference
 27    ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
 28    ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
 29
 30    ('Python', 'Python'),               # Simple text literal
 31    ('.*Python', 'Python'),             # Bad text literal
 32    ('.*Python.*', 'Python'),           # Worse text literal
 33    ('.*(Python)', 'Python'),           # Bad text literal with grouping
 34
 35]
 36
 37# Test suite (for verifying correctness)
 38#
 39# The test suite is a list of 5- or 3-tuples.  The 5 parts of a
 40# complete tuple are:
 41# element 0: a string containing the pattern
 42#         1: the string to match against the pattern
 43#         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
 44#         3: a string that will be eval()'ed to produce a test string.
 45#            This is an arbitrary Python expression; the available
 46#            variables are "found" (the whole match), and "g1", "g2", ...
 47#            up to "g99" contain the contents of each group, or the
 48#            string 'None' if the group wasn't given a value, or the
 49#            string 'Error' if the group index was out of range;
 50#            also "groups", the return value of m.group() (a tuple).
 51#         4: The expected result of evaluating the expression.
 52#            If the two don't match, an error is reported.
 53#
 54# If the regex isn't expected to work, the latter two elements can be omitted.
 55
 56tests = [
 57    # Test ?P< and ?P= extensions
 58    ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
 59    ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
 60    ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
 61    ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
 62
 63    # Same tests, for the ?P= form
 64    ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
 65    ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
 66    ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
 67    ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
 68
 69    ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
 70    ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
 71
 72    # Test octal escapes
 73    ('\\1', 'a', SYNTAX_ERROR),    # Backreference
 74    ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
 75    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
 76    ('\\141', 'a', SUCCEED, 'found', 'a'),
 77    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
 78
 79    # Test \0 is handled everywhere
 80    (r'\0', '\0', SUCCEED, 'found', '\0'),
 81    (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
 82    (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
 83    (r'[^a\0]', '\0', FAIL),
 84
 85    # Test various letter escapes
 86    (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
 87    (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
 88    # NOTE: not an error under PCRE/PRE:
 89    # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
 90    (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
 91    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
 92    # new \x semantics
 93    (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
 94    (r'\x00f', '\017', FAIL, 'found', chr(15)),
 95    (r'\x00fe', '\376', FAIL, 'found', chr(254)),
 96    # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
 97    # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
 98    # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
 99
100    (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
101     SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
102
103    # Test that . only matches \n in DOTALL mode
104    ('a.b', 'acb', SUCCEED, 'found', 'acb'),
105    ('a.b', 'a\nb', FAIL),
106    ('a.*b', 'acc\nccb', FAIL),
107    ('a.{4,5}b', 'acc\nccb', FAIL),
108    ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
109    ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
110    ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
111    ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
112    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
113
114    (')', '', SYNTAX_ERROR),           # Unmatched right bracket
115    ('', '', SUCCEED, 'found', ''),    # Empty pattern
116    ('abc', 'abc', SUCCEED, 'found', 'abc'),
117    ('abc', 'xbc', FAIL),
118    ('abc', 'axc', FAIL),
119    ('abc', 'abx', FAIL),
120    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
121    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
122    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
123    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
124    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
125    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
126    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
127    ('ab+bc', 'abc', FAIL),
128    ('ab+bc', 'abq', FAIL),
129    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
130    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
131    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
132    ('ab?bc', 'abbbbc', FAIL),
133    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
134    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
135    ('^abc$', 'abcc', FAIL),
136    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
137    ('^abc$', 'aabc', FAIL),
138    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
139    ('^', 'abc', SUCCEED, 'found+"-"', '-'),
140    ('$', 'abc', SUCCEED, 'found+"-"', '-'),
141    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
142    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
143    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
144    ('a.*c', 'axyzd', FAIL),
145    ('a[bc]d', 'abc', FAIL),
146    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
147    ('a[b-d]e', 'abd', FAIL),
148    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
149    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
150    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
151    ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
152    # NOTE: not an error under PCRE/PRE:
153    # ('a[b-]', 'a-', SYNTAX_ERROR),
154    ('a[]b', '-', SYNTAX_ERROR),
155    ('a[', '-', SYNTAX_ERROR),
156    ('a\\', '-', SYNTAX_ERROR),
157    ('abc)', '-', SYNTAX_ERROR),
158    ('(abc', '-', SYNTAX_ERROR),
159    ('a]', 'a]', SUCCEED, 'found', 'a]'),
160    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
161    ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
162    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
163    ('a[^bc]d', 'abd', FAIL),
164    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
165    ('a[^-b]c', 'a-c', FAIL),
166    ('a[^]b]c', 'a]c', FAIL),
167    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
168    ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
169    ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
170    ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
171    ('\\by\\b', 'xy', FAIL),
172    ('\\by\\b', 'yz', FAIL),
173    ('\\by\\b', 'xyz', FAIL),
174    ('x\\b', 'xyz', FAIL),
175    ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
176    ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
177    ('z\\B', 'xyz', FAIL),
178    ('\\Bx', 'xyz', FAIL),
179    ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
180    ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
181    ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
182    ('\\By\\B', 'xy', FAIL),
183    ('\\By\\B', 'yz', FAIL),
184    ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
185    ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
186    ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
187    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
188    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
189    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
190    ('$b', 'b', FAIL),
191    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
192    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
193    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
194    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
195    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
196    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
197    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
198    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
199    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
200    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
201    (')(', '-', SYNTAX_ERROR),
202    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
203    ('abc', '', FAIL),
204    ('a*', '', SUCCEED, 'found', ''),
205    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
206    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
207    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
208    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
209    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
210    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
211    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
212    ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
213    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
214    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
215    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
216    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
217    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
218    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
219    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
220    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
221    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
222    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
223    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
224    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
225    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
226    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
227    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
228    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
229    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
230    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
231    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
232    ('multiple words of text', 'uh-uh', FAIL),
233    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
234    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
235    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
236    ('[k]', 'ab', FAIL),
237    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
238    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
239    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
240    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
241    ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
242    ('^(a+).\\1$', 'aaaa', FAIL),
243    ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
244    ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
245    ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
246    ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
247    ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
248    ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
249    ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
250    ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
251    ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
252    ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
253    ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
254    ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
255    ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
256    ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
257    ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
258    ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
259    ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
260    ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
261    ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
262    ('([abc]*)x', 'abc', FAIL),
263    ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
264    ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
265
266    # Test symbolic groups
267
268    ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
269    ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
270    ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
271    ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
272
273    # Test octal escapes/memory references
274
275    ('\\1', 'a', SYNTAX_ERROR),
276    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
277    ('\\141', 'a', SUCCEED, 'found', 'a'),
278    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
279
280    # All tests from Perl
281
282    ('abc', 'abc', SUCCEED, 'found', 'abc'),
283    ('abc', 'xbc', FAIL),
284    ('abc', 'axc', FAIL),
285    ('abc', 'abx', FAIL),
286    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
287    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
288    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
289    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
290    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
291    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
292    ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
293    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
294    ('ab+bc', 'abc', FAIL),
295    ('ab+bc', 'abq', FAIL),
296    ('ab{1,}bc', 'abq', FAIL),
297    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
298    ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
299    ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
300    ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
301    ('ab{4,5}bc', 'abbbbc', FAIL),
302    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
303    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
304    ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
305    ('ab?bc', 'abbbbc', FAIL),
306    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
307    ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
308    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
309    ('^abc$', 'abcc', FAIL),
310    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
311    ('^abc$', 'aabc', FAIL),
312    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
313    ('^', 'abc', SUCCEED, 'found', ''),
314    ('$', 'abc', SUCCEED, 'found', ''),
315    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
316    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
317    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
318    ('a.*c', 'axyzd', FAIL),
319    ('a[bc]d', 'abc', FAIL),
320    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
321    ('a[b-d]e', 'abd', FAIL),
322    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
323    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
324    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
325    ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
326    ('a[b-a]', '-', SYNTAX_ERROR),
327    ('a[]b', '-', SYNTAX_ERROR),
328    ('a[', '-', SYNTAX_ERROR),
329    ('a]', 'a]', SUCCEED, 'found', 'a]'),
330    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
331    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
332    ('a[^bc]d', 'abd', FAIL),
333    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
334    ('a[^-b]c', 'a-c', FAIL),
335    ('a[^]b]c', 'a]c', FAIL),
336    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
337    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
338    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
339    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
340    ('*a', '-', SYNTAX_ERROR),
341    ('(*)b', '-', SYNTAX_ERROR),
342    ('$b', 'b', FAIL),
343    ('a\\', '-', SYNTAX_ERROR),
344    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
345    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
346    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
347    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
348    ('abc)', '-', SYNTAX_ERROR),
349    ('(abc', '-', SYNTAX_ERROR),
350    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
351    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
352    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
353    ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
354    ('a**', '-', SYNTAX_ERROR),
355    ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
356    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
357    ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
358    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
359    ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
360    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
361    ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
362    (')(', '-', SYNTAX_ERROR),
363    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
364    ('abc', '', FAIL),
365    ('a*', '', SUCCEED, 'found', ''),
366    ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
367    ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
368    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
369    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
370    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
371    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
372    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
373    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
374    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
375    ('^(ab|cd)e', 'abcde', FAIL),
376    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
377    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
378    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
379    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
380    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
381    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
382    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
383    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
384    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
385    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
386    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
387    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
388    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
389    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
390    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
391    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
392    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
393    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
394    ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
395    ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
396# Python does not have the same rules for \\41 so this is a syntax error
397#    ('((((((((((a))))))))))\\41', 'aa', FAIL),
398#    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
399    ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
400    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
401    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
402    ('multiple words of text', 'uh-uh', FAIL),
403    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
404    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
405    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
406    ('[k]', 'ab', FAIL),
407    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
408    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
409    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
410    ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
411    ('(?i)abc', 'XBC', FAIL),
412    ('(?i)abc', 'AXC', FAIL),
413    ('(?i)abc', 'ABX', FAIL),
414    ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
415    ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
416    ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
417    ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
418    ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
419    ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
420    ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
421    ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
422    ('(?i)ab+bc', 'ABC', FAIL),
423    ('(?i)ab+bc', 'ABQ', FAIL),
424    ('(?i)ab{1,}bc', 'ABQ', FAIL),
425    ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
426    ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
427    ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
428    ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
429    ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
430    ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
431    ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
432    ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
433    ('(?i)ab??bc', 'ABBBBC', FAIL),
434    ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
435    ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
436    ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
437    ('(?i)^abc$', 'ABCC', FAIL),
438    ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
439    ('(?i)^abc$', 'AABC', FAIL),
440    ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
441    ('(?i)^', 'ABC', SUCCEED, 'found', ''),
442    ('(?i)$', 'ABC', SUCCEED, 'found', ''),
443    ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
444    ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
445    ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
446    ('(?i)a.*c', 'AXYZD', FAIL),
447    ('(?i)a[bc]d', 'ABC', FAIL),
448    ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
449    ('(?i)a[b-d]e', 'ABD', FAIL),
450    ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
451    ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
452    ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
453    ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
454    ('(?i)a[b-a]', '-', SYNTAX_ERROR),
455    ('(?i)a[]b', '-', SYNTAX_ERROR),
456    ('(?i)a[', '-', SYNTAX_ERROR),
457    ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
458    ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
459    ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
460    ('(?i)a[^bc]d', 'ABD', FAIL),
461    ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
462    ('(?i)a[^-b]c', 'A-C', FAIL),
463    ('(?i)a[^]b]c', 'A]C', FAIL),
464    ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
465    ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
466    ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
467    ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
468    ('(?i)*a', '-', SYNTAX_ERROR),
469    ('(?i)(*)b', '-', SYNTAX_ERROR),
470    ('(?i)$b', 'B', FAIL),
471    ('(?i)a\\', '-', SYNTAX_ERROR),
472    ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
473    ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
474    ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
475    ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
476    ('(?i)abc)', '-', SYNTAX_ERROR),
477    ('(?i)(abc', '-', SYNTAX_ERROR),
478    ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
479    ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
480    ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
481    ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
482    ('(?i)a**', '-', SYNTAX_ERROR),
483    ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
484    ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
485    ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
486    ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
487    ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
488    ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
489    ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
490    ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
491    ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
492    ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
493    ('(?i))(', '-', SYNTAX_ERROR),
494    ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
495    ('(?i)abc', '', FAIL),
496    ('(?i)a*', '', SUCCEED, 'found', ''),
497    ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
498    ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
499    ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
500    ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
501    ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
502    ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
503    ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
504    ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
505    ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
506    ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
507    ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
508    ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
509    ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
510    ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
511    ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
512    ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
513    ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
514    ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
515    ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
516    ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
517    ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
518    ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
519    ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
520    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
521    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
522    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
523    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
524    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
525    ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
526    ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
527    #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
528    #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
529    ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
530    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
531    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
532    ('(?i)multiple words of text', 'UH-UH', FAIL),
533    ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
534    ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
535    ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
536    ('(?i)[k]', 'AB', FAIL),
537#    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
538#    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
539    ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
540    ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
541    ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
542    ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
543    ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
544    ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
545    ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
546    ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
547    ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
548    ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
549    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
550
551    # lookbehind: split by : but not if it is escaped by -.
552    ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
553    # escaping with \ as we know it
554    ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
555    # terminating with ' and escaping with ? as in edifact
556    ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
557
558    # Comments using the (?#...) syntax
559
560    ('w(?# comment', 'w', SYNTAX_ERROR),
561    ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
562
563    # Check odd placement of embedded pattern modifiers
564
565    # not an error under PCRE/PRE:
566    ('w(?i)', 'W', SUCCEED, 'found', 'W'),
567    # ('w(?i)', 'W', SYNTAX_ERROR),
568
569    # Comments using the x embedded pattern modifier
570
571    ("""(?x)w# comment 1
572        x y
573        # comment 2
574        z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
575
576    # using the m embedded pattern modifier
577
578    ('^abc', """jkl
579abc
580xyz""", FAIL),
581    ('(?m)^abc', """jkl
582abc
583xyz""", SUCCEED, 'found', 'abc'),
584
585    ('(?m)abc$', """jkl
586xyzabc
587123""", SUCCEED, 'found', 'abc'),
588
589    # using the s embedded pattern modifier
590
591    ('a.b', 'a\nb', FAIL),
592    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
593
594    # test \w, etc. both inside and outside character classes
595
596    ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
597    ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
598    ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
599    ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
600    ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
601    # not an error under PCRE/PRE:
602    # ('[\\d-x]', '-', SYNTAX_ERROR),
603    (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
604    (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
605
606    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
607    # new \x semantics
608    (r'\x00ff', '\377', FAIL),
609    # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
610    (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
611    ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
612    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
613    (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
614
615    #
616    # post-1.5.2 additions
617
618    # xmllib problem
619    (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
620    # bug 110866: reference to undefined group
621    (r'((.)\1+)', '', SYNTAX_ERROR),
622    # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
623    (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
624    # bug 112468: various expected syntax errors
625    (r'(', '', SYNTAX_ERROR),
626    (r'[\41]', '!', SUCCEED, 'found', '!'),
627    # bug 114033: nothing to repeat
628    (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
629    # bug 115040: rescan if flags are modified inside pattern
630    (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
631    # bug 115618: negative lookahead
632    (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
633    # bug 116251: character class bug
634    (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
635    # bug 123769+127259: non-greedy backtracking bug
636    (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
637    (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
638    (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
639    # bug 127259: \Z shouldn't depend on multiline mode
640    (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
641    # bug 128899: uppercase literals under the ignorecase flag
642    (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
643    (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
644    (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
645    (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
646    # bug 130748: ^* should be an error (nothing to repeat)
647    (r'^*', '', SYNTAX_ERROR),
648    # bug 133283: minimizing repeat problem
649    (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
650    # bug 477728: minimizing repeat problem
651    (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
652    # bug 483789: minimizing repeat problem
653    (r'a[^>]*?b', 'a>b', FAIL),
654    # bug 490573: minimizing repeat problem
655    (r'^a*?$', 'foo', FAIL),
656    # bug 470582: nested groups problem
657    (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
658    # another minimizing repeat problem (capturing groups in assertions)
659    ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
660    ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
661    ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
662]
663
664try:
665    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
666except SyntaxError:
667    pass
668else:
669    tests.extend([
670    # bug 410271: \b broken under locales
671    (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
672    (r'(?u)\b.\b', u, SUCCEED, 'found', u),
673    (r'(?u)\w', u, SUCCEED, 'found', u),
674    ])