PageRenderTime 97ms CodeModel.GetById 41ms app.highlight 49ms RepoModel.GetById 1ms app.codeStats 1ms

/pypy/rlib/rsre/test/test_re.py

https://bitbucket.org/evelyn559/pypy
Python | 660 lines | 628 code | 18 blank | 14 comment | 5 complexity | f0401dcc554dfbba37f1e1a041b3821c MD5 | raw file
  1import sys, os
  2from pypy.rlib.rsre.test.test_match import get_code
  3from pypy.rlib.rsre import rsre_re as re
  4
  5
  6class TestRe:
  7
  8    def test_search_star_plus(self):
  9        assert re.search('x*', 'axx').span(0) == (0, 0)
 10        assert re.search('x*', 'axx').span() == (0, 0)
 11        assert re.search('x+', 'axx').span(0) == (1, 3)
 12        assert re.search('x+', 'axx').span() == (1, 3)
 13        assert re.search('x', 'aaa') == None
 14        assert re.match('a*', 'xxx').span(0) == (0, 0)
 15        assert re.match('a*', 'xxx').span() == (0, 0)
 16        assert re.match('x*', 'xxxa').span(0) == (0, 3)
 17        assert re.match('x*', 'xxxa').span() == (0, 3)
 18        assert re.match('a+', 'xxx') == None
 19
 20    def bump_num(self, matchobj):
 21        int_value = int(matchobj.group(0))
 22        return str(int_value + 1)
 23
 24    def test_basic_re_sub(self):
 25        assert re.sub("(?i)b+", "x", "bbbb BBBB") == 'x x'
 26        assert re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y') == (
 27                         '9.3 -3 24x100y')
 28        assert re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3) == (
 29                         '9.3 -3 23x99y')
 30
 31        assert re.sub('.', lambda m: r"\n", 'x') == '\\n'
 32        assert re.sub('.', r"\n", 'x') == '\n'
 33
 34        s = r"\1\1"
 35        assert re.sub('(.)', s, 'x') == 'xx'
 36        assert re.sub('(.)', re.escape(s), 'x') == s
 37        assert re.sub('(.)', lambda m: s, 'x') == s
 38
 39        assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
 40        assert re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx') == 'xxxx'
 41        assert re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx') == 'xxxx'
 42        assert re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx') == 'xxxx'
 43
 44        assert re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a') == (
 45                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
 46        assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
 47        assert re.sub('a', '\t\n\v\r\f\a', 'a') == (
 48                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
 49
 50        assert re.sub('^\s*', 'X', 'test') == 'Xtest'
 51
 52    def test_bug_449964(self):
 53        # fails for group followed by other escape
 54        assert re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx') == (
 55                         'xx\bxx\b')
 56
 57    def test_bug_449000(self):
 58        # Test for sub() on escaped characters
 59        assert re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n') == (
 60                         'abc\ndef\n')
 61        assert re.sub('\r\n', r'\n', 'abc\r\ndef\r\n') == (
 62                         'abc\ndef\n')
 63        assert re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n') == (
 64                         'abc\ndef\n')
 65        assert re.sub('\r\n', '\n', 'abc\r\ndef\r\n') == (
 66                         'abc\ndef\n')
 67
 68    def test_bug_1140(self):
 69        # re.sub(x, y, u'') should return u'', not '', and
 70        # re.sub(x, y, '') should return '', not u''.
 71        # Also:
 72        # re.sub(x, y, unicode(x)) should return unicode(y), and
 73        # re.sub(x, y, str(x)) should return
 74        #     str(y) if isinstance(y, str) else unicode(y).
 75        for x in 'x', u'x':
 76            for y in 'y', u'y':
 77                z = re.sub(x, y, u'')
 78                assert z == u''
 79                assert type(z) == unicode
 80                #
 81                z = re.sub(x, y, '')
 82                assert z == ''
 83                assert type(z) == str
 84                #
 85                z = re.sub(x, y, unicode(x))
 86                assert z == y
 87                assert type(z) == unicode
 88                #
 89                z = re.sub(x, y, str(x))
 90                assert z == y
 91                assert type(z) == type(y)
 92
 93    def test_sub_template_numeric_escape(self):
 94        # bug 776311 and friends
 95        assert re.sub('x', r'\0', 'x') == '\0'
 96        assert re.sub('x', r'\000', 'x') == '\000'
 97        assert re.sub('x', r'\001', 'x') == '\001'
 98        assert re.sub('x', r'\008', 'x') == '\0' + '8'
 99        assert re.sub('x', r'\009', 'x') == '\0' + '9'
100        assert re.sub('x', r'\111', 'x') == '\111'
101        assert re.sub('x', r'\117', 'x') == '\117'
102
103        assert re.sub('x', r'\1111', 'x') == '\1111'
104        assert re.sub('x', r'\1111', 'x') == '\111' + '1'
105
106        assert re.sub('x', r'\00', 'x') == '\x00'
107        assert re.sub('x', r'\07', 'x') == '\x07'
108        assert re.sub('x', r'\08', 'x') == '\0' + '8'
109        assert re.sub('x', r'\09', 'x') == '\0' + '9'
110        assert re.sub('x', r'\0a', 'x') == '\0' + 'a'
111
112        assert re.sub('x', r'\400', 'x') == '\0'
113        assert re.sub('x', r'\777', 'x') == '\377'
114
115        raises(re.error, re.sub, 'x', r'\1', 'x')
116        raises(re.error, re.sub, 'x', r'\8', 'x')
117        raises(re.error, re.sub, 'x', r'\9', 'x')
118        raises(re.error, re.sub, 'x', r'\11', 'x')
119        raises(re.error, re.sub, 'x', r'\18', 'x')
120        raises(re.error, re.sub, 'x', r'\1a', 'x')
121        raises(re.error, re.sub, 'x', r'\90', 'x')
122        raises(re.error, re.sub, 'x', r'\99', 'x')
123        raises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
124        raises(re.error, re.sub, 'x', r'\11a', 'x')
125        raises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
126        raises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
127
128        # in python2.3 (etc), these loop endlessly in sre_parser.py
129        assert re.sub('(((((((((((x)))))))))))', r'\11', 'x') == 'x'
130        assert re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz') == (
131                         'xz8')
132        assert re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz') == (
133                         'xza')
134
135    def test_qualified_re_sub(self):
136        assert re.sub('a', 'b', 'aaaaa') == 'bbbbb'
137        assert re.sub('a', 'b', 'aaaaa', 1) == 'baaaa'
138
139    def test_bug_114660(self):
140        assert re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there') == (
141                         'hello there')
142
143    def test_bug_462270(self):
144        # Test for empty sub() behaviour, see SF bug #462270
145        assert re.sub('x*', '-', 'abxd') == '-a-b-d-'
146        assert re.sub('x+', '-', 'abxd') == 'ab-d'
147
148    def test_symbolic_refs(self):
149        raises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
150        raises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
151        raises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
152        raises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
153        raises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
154        raises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
155        raises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
156        raises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
157        raises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
158
159    def test_re_subn(self):
160        assert re.subn("(?i)b+", "x", "bbbb BBBB") == ('x x', 2)
161        assert re.subn("b+", "x", "bbbb BBBB") == ('x BBBB', 1)
162        assert re.subn("b+", "x", "xyz") == ('xyz', 0)
163        assert re.subn("b*", "x", "xyz") == ('xxxyxzx', 4)
164        assert re.subn("b*", "x", "xyz", 2) == ('xxxyz', 2)
165
166    def test_re_split(self):
167        assert re.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c']
168        assert re.split(":*", ":a:b::c") == ['', 'a', 'b', 'c']
169        assert re.split("(:*)", ":a:b::c") == (
170                         ['', ':', 'a', ':', 'b', '::', 'c'])
171        assert re.split("(?::*)", ":a:b::c") == ['', 'a', 'b', 'c']
172        assert re.split("(:)*", ":a:b::c") == (
173                         ['', ':', 'a', ':', 'b', ':', 'c'])
174        assert re.split("([b:]+)", ":a:b::c") == (
175                         ['', ':', 'a', ':b::', 'c'])
176        assert re.split("(b)|(:+)", ":a:b::c") == (
177                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
178                          None, '::', 'c'])
179        assert re.split("(?:b)|(?::+)", ":a:b::c") == (
180                         ['', 'a', '', '', 'c'])
181
182    def test_qualified_re_split(self):
183        assert re.split(":", ":a:b::c", 2) == ['', 'a', 'b::c']
184        assert re.split(':', 'a:b:c:d', 2) == ['a', 'b', 'c:d']
185        assert re.split("(:)", ":a:b::c", 2) == (
186                         ['', ':', 'a', ':', 'b::c'])
187        assert re.split("(:*)", ":a:b::c", 2) == (
188                         ['', ':', 'a', ':', 'b::c'])
189
190    def test_re_findall(self):
191        assert re.findall(":+", "abc") == []
192        assert re.findall(":+", "a:b::c:::d") == [":", "::", ":::"]
193        assert re.findall("(:+)", "a:b::c:::d") == [":", "::", ":::"]
194        assert re.findall("(:)(:*)", "a:b::c:::d") == [(":", ""),
195                                                               (":", ":"),
196                                                               (":", "::")]
197
198    def test_bug_117612(self):
199        assert re.findall(r"(a|(b))", "aba") == (
200                         [("a", ""),("b", "b"),("a", "")])
201
202    def test_re_match(self):
203        assert re.match('a', 'a').groups() == ()
204        assert re.match('(a)', 'a').groups() == ('a',)
205        assert re.match(r'(a)', 'a').group(0) == 'a'
206        assert re.match(r'(a)', 'a').group(1) == 'a'
207        assert re.match(r'(a)', 'a').group(1, 1) == ('a', 'a')
208
209        pat = re.compile('((a)|(b))(c)?')
210        assert pat.match('a').groups() == ('a', 'a', None, None)
211        assert pat.match('b').groups() == ('b', None, 'b', None)
212        assert pat.match('ac').groups() == ('a', 'a', None, 'c')
213        assert pat.match('bc').groups() == ('b', None, 'b', 'c')
214        assert pat.match('bc').groups("") == ('b', "", 'b', 'c')
215
216        # A single group
217        m = re.match('(a)', 'a')
218        assert m.group(0) == 'a'
219        assert m.group(0) == 'a'
220        assert m.group(1) == 'a'
221        assert m.group(1, 1) == ('a', 'a')
222
223        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
224        assert pat.match('a').group(1, 2, 3) == ('a', None, None)
225        assert pat.match('b').group('a1', 'b2', 'c3') == (
226                         (None, 'b', None))
227        assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c')
228
229    def test_bug_923(self):
230        # Issue923: grouping inside optional lookahead problem
231        assert re.match(r'a(?=(b))?', "ab").groups() == ("b",)
232        assert re.match(r'(a(?=(b))?)', "ab").groups() == ('a', 'b')
233        assert re.match(r'(a)(?=(b))?', "ab").groups() == ('a', 'b')
234        assert re.match(r'(?P<g1>a)(?=(?P<g2>b))?', "ab").groupdict() == {'g1': 'a', 'g2': 'b'}
235
236    def test_re_groupref_exists(self):
237        assert re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups() == (
238                         ('(', 'a'))
239        assert re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups() == (
240                         (None, 'a'))
241        assert re.match('^(\()?([^()]+)(?(1)\))$', 'a)') == None
242        assert re.match('^(\()?([^()]+)(?(1)\))$', '(a') == None
243        assert re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups() == (
244                         ('a', 'b'))
245        assert re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups() == (
246                         (None, 'd'))
247        assert re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups() == (
248                         (None, 'd'))
249        assert re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups() == (
250                         ('a', ''))
251
252        # Tests for bug #1177831: exercise groups other than the first group
253        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
254        assert p.match('abc').groups() == (
255                         ('a', 'b', 'c'))
256        assert p.match('ad').groups() == (
257                         ('a', None, 'd'))
258        assert p.match('abd') == None
259        assert p.match('ac') == None
260
261
262    def test_re_groupref(self):
263        assert re.match(r'^(\|)?([^()]+)\1$', '|a|').groups() == (
264                         ('|', 'a'))
265        assert re.match(r'^(\|)?([^()]+)\1?$', 'a').groups() == (
266                         (None, 'a'))
267        assert re.match(r'^(\|)?([^()]+)\1$', 'a|') == None
268        assert re.match(r'^(\|)?([^()]+)\1$', '|a') == None
269        assert re.match(r'^(?:(a)|c)(\1)$', 'aa').groups() == (
270                         ('a', 'a'))
271        assert re.match(r'^(?:(a)|c)(\1)?$', 'c').groups() == (
272                         (None, None))
273
274    def test_groupdict(self):
275        assert re.match('(?P<first>first) (?P<second>second)',
276                                  'first second').groupdict() == (
277                         {'first':'first', 'second':'second'})
278
279    def test_expand(self):
280        assert (re.match("(?P<first>first) (?P<second>second)",
281                                  "first second")
282                                  .expand(r"\2 \1 \g<second> \g<first>")) == (
283                         "second first second first")
284
285    def test_repeat_minmax(self):
286        assert re.match("^(\w){1}$", "abc") == None
287        assert re.match("^(\w){1}?$", "abc") == None
288        assert re.match("^(\w){1,2}$", "abc") == None
289        assert re.match("^(\w){1,2}?$", "abc") == None
290
291        assert re.match("^(\w){3}$", "abc").group(1) == "c"
292        assert re.match("^(\w){1,3}$", "abc").group(1) == "c"
293        assert re.match("^(\w){1,4}$", "abc").group(1) == "c"
294        assert re.match("^(\w){3,4}?$", "abc").group(1) == "c"
295        assert re.match("^(\w){3}?$", "abc").group(1) == "c"
296        assert re.match("^(\w){1,3}?$", "abc").group(1) == "c"
297        assert re.match("^(\w){1,4}?$", "abc").group(1) == "c"
298        assert re.match("^(\w){3,4}?$", "abc").group(1) == "c"
299
300        assert re.match("^x{1}$", "xxx") == None
301        assert re.match("^x{1}?$", "xxx") == None
302        assert re.match("^x{1,2}$", "xxx") == None
303        assert re.match("^x{1,2}?$", "xxx") == None
304
305        assert re.match("^x{3}$", "xxx") != None
306        assert re.match("^x{1,3}$", "xxx") != None
307        assert re.match("^x{1,4}$", "xxx") != None
308        assert re.match("^x{3,4}?$", "xxx") != None
309        assert re.match("^x{3}?$", "xxx") != None
310        assert re.match("^x{1,3}?$", "xxx") != None
311        assert re.match("^x{1,4}?$", "xxx") != None
312        assert re.match("^x{3,4}?$", "xxx") != None
313
314        assert re.match("^x{}$", "xxx") == None
315        assert re.match("^x{}$", "x{}") != None
316
317    def test_getattr(self):
318        assert re.match("(a)", "a").pos == 0
319        assert re.match("(a)", "a").endpos == 1
320        assert re.match("(a)", "a").string == "a"
321        assert re.match("(a)", "a").regs == ((0, 1), (0, 1))
322        assert re.match("(a)", "a").re != None
323
324    def test_special_escapes(self):
325        assert re.search(r"\b(b.)\b",
326                                   "abcd abc bcd bx").group(1) == "bx"
327        assert re.search(r"\B(b.)\B",
328                                   "abc bcd bc abxd").group(1) == "bx"
329        assert re.search(r"\b(b.)\b",
330                                   "abcd abc bcd bx", re.LOCALE).group(1) == "bx"
331        assert re.search(r"\B(b.)\B",
332                                   "abc bcd bc abxd", re.LOCALE).group(1) == "bx"
333        assert re.search(r"\b(b.)\b",
334                                   "abcd abc bcd bx", re.UNICODE).group(1) == "bx"
335        assert re.search(r"\B(b.)\B",
336                                   "abc bcd bc abxd", re.UNICODE).group(1) == "bx"
337        assert re.search(r"^abc$", "\nabc\n", re.M).group(0) == "abc"
338        assert re.search(r"^\Aabc\Z$", "abc", re.M).group(0) == "abc"
339        assert re.search(r"^\Aabc\Z$", "\nabc\n", re.M) == None
340        assert re.search(r"\b(b.)\b",
341                                   u"abcd abc bcd bx").group(1) == "bx"
342        assert re.search(r"\B(b.)\B",
343                                   u"abc bcd bc abxd").group(1) == "bx"
344        assert re.search(r"^abc$", u"\nabc\n", re.M).group(0) == "abc"
345        assert re.search(r"^\Aabc\Z$", u"abc", re.M).group(0) == "abc"
346        assert re.search(r"^\Aabc\Z$", u"\nabc\n", re.M) == None
347        assert re.search(r"\d\D\w\W\s\S",
348                                   "1aa! a").group(0) == "1aa! a"
349        assert re.search(r"\d\D\w\W\s\S",
350                                   "1aa! a", re.LOCALE).group(0) == "1aa! a"
351        assert re.search(r"\d\D\w\W\s\S",
352                                   "1aa! a", re.UNICODE).group(0) == "1aa! a"
353
354    def test_ignore_case(self):
355        assert re.match("abc", "ABC", re.I).group(0) == "ABC"
356        assert re.match("abc", u"ABC", re.I).group(0) == "ABC"
357
358    def test_bigcharset(self):
359        assert re.match(u"([\u2222\u2223])",
360                                  u"\u2222").group(1) == u"\u2222"
361        assert re.match(u"([\u2222\u2223])",
362                                  u"\u2222", re.UNICODE).group(1) == u"\u2222"
363
364    def test_anyall(self):
365        assert re.match("a.b", "a\nb", re.DOTALL).group(0) == (
366                         "a\nb")
367        assert re.match("a.*b", "a\n\nb", re.DOTALL).group(0) == (
368                         "a\n\nb")
369
370    def test_non_consuming(self):
371        assert re.match("(a(?=\s[^a]))", "a b").group(1) == "a"
372        assert re.match("(a(?=\s[^a]*))", "a b").group(1) == "a"
373        assert re.match("(a(?=\s[abc]))", "a b").group(1) == "a"
374        assert re.match("(a(?=\s[abc]*))", "a bc").group(1) == "a"
375        assert re.match(r"(a)(?=\s\1)", "a a").group(1) == "a"
376        assert re.match(r"(a)(?=\s\1*)", "a aa").group(1) == "a"
377        assert re.match(r"(a)(?=\s(abc|a))", "a a").group(1) == "a"
378
379        assert re.match(r"(a(?!\s[^a]))", "a a").group(1) == "a"
380        assert re.match(r"(a(?!\s[abc]))", "a d").group(1) == "a"
381        assert re.match(r"(a)(?!\s\1)", "a b").group(1) == "a"
382        assert re.match(r"(a)(?!\s(abc|a))", "a b").group(1) == "a"
383
384    def test_ignore_case(self):
385        assert re.match(r"(a\s[^a])", "a b", re.I).group(1) == "a b"
386        assert re.match(r"(a\s[^a]*)", "a bb", re.I).group(1) == "a bb"
387        assert re.match(r"(a\s[abc])", "a b", re.I).group(1) == "a b"
388        assert re.match(r"(a\s[abc]*)", "a bb", re.I).group(1) == "a bb"
389        assert re.match(r"((a)\s\2)", "a a", re.I).group(1) == "a a"
390        assert re.match(r"((a)\s\2*)", "a aa", re.I).group(1) == "a aa"
391        assert re.match(r"((a)\s(abc|a))", "a a", re.I).group(1) == "a a"
392        assert re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1) == "a aa"
393
394    def test_category(self):
395        assert re.match(r"(\s)", " ").group(1) == " "
396
397    def test_getlower(self):
398        import _sre
399        assert _sre.getlower(ord('A'), 0) == ord('a')
400        assert _sre.getlower(ord('A'), re.LOCALE) == ord('a')
401        assert _sre.getlower(ord('A'), re.UNICODE) == ord('a')
402
403        assert re.match("abc", "ABC", re.I).group(0) == "ABC"
404        assert re.match("abc", u"ABC", re.I).group(0) == "ABC"
405
406    def test_not_literal(self):
407        assert re.search("\s([^a])", " b").group(1) == "b"
408        assert re.search("\s([^a]*)", " bb").group(1) == "bb"
409
410    def test_search_coverage(self):
411        assert re.search("\s(b)", " b").group(1) == "b"
412        assert re.search("a\s", "a ").group(0) == "a "
413
414    def test_re_escape(self):
415        p=""
416        for i in range(0, 256):
417            p = p + chr(i)
418            assert re.match(re.escape(chr(i)), chr(i)) is not None
419            assert re.match(re.escape(chr(i)), chr(i)).span() == (0,1)
420
421        pat=re.compile(re.escape(p))
422        assert pat.match(p) is not None
423        assert pat.match(p).span() == (0,256)
424
425    def test_pickling(self):
426        import pickle
427        self.pickle_test(pickle)
428        import cPickle
429        self.pickle_test(cPickle)
430        # old pickles expect the _compile() reconstructor in sre module
431        import warnings
432        original_filters = warnings.filters[:]
433        try:
434            warnings.filterwarnings("ignore", "The sre module is deprecated",
435                                    DeprecationWarning)
436            from sre import _compile
437        finally:
438            warnings.filters = original_filters
439
440    def pickle_test(self, pickle):
441        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
442        s = pickle.dumps(oldpat)
443        newpat = pickle.loads(s)
444        # Not using object identity for _sre.py, since some Python builds do
445        # not seem to preserve that in all cases (observed on an UCS-4 build
446        # of 2.4.1).
447        #self.assertEqual(oldpat, newpat)
448        assert oldpat.__dict__ == newpat.__dict__
449
450    def test_constants(self):
451        assert re.I == re.IGNORECASE
452        assert re.L == re.LOCALE
453        assert re.M == re.MULTILINE
454        assert re.S == re.DOTALL
455        assert re.X == re.VERBOSE
456
457    def test_flags(self):
458        for flag in [re.I, re.M, re.X, re.S, re.L]:
459            assert re.compile('^pattern$', flag) != None
460
461    def test_sre_character_literals(self):
462        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
463            assert re.match(r"\%03o" % i, chr(i)) != None
464            assert re.match(r"\%03o0" % i, chr(i)+"0") != None
465            assert re.match(r"\%03o8" % i, chr(i)+"8") != None
466            assert re.match(r"\x%02x" % i, chr(i)) != None
467            assert re.match(r"\x%02x0" % i, chr(i)+"0") != None
468            assert re.match(r"\x%02xz" % i, chr(i)+"z") != None
469        raises(re.error, re.match, "\911", "")
470
471    def test_sre_character_class_literals(self):
472        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
473            assert re.match(r"[\%03o]" % i, chr(i)) != None
474            assert re.match(r"[\%03o0]" % i, chr(i)) != None
475            assert re.match(r"[\%03o8]" % i, chr(i)) != None
476            assert re.match(r"[\x%02x]" % i, chr(i)) != None
477            assert re.match(r"[\x%02x0]" % i, chr(i)) != None
478            assert re.match(r"[\x%02xz]" % i, chr(i)) != None
479        raises(re.error, re.match, "[\911]", "")
480
481    def test_bug_113254(self):
482        assert re.match(r'(a)|(b)', 'b').start(1) == -1
483        assert re.match(r'(a)|(b)', 'b').end(1) == -1
484        assert re.match(r'(a)|(b)', 'b').span(1) == (-1, -1)
485
486    def test_bug_527371(self):
487        # bug described in patches 527371/672491
488        assert re.match(r'(a)?a','a').lastindex == None
489        assert re.match(r'(a)(b)?b','ab').lastindex == 1
490        assert re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup == 'a'
491        assert re.match("(?P<a>a(b))", "ab").lastgroup == 'a'
492        assert re.match("((a))", "a").lastindex == 1
493
494    def test_bug_545855(self):
495        # bug 545855 -- This pattern failed to cause a compile error as it
496        # should, instead provoking a TypeError.
497        raises(re.error, re.compile, 'foo[a-')
498
499    def test_bug_418626(self):
500        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
501        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
502        # pattern '*?' on a long string.
503        assert re.match('.*?c', 10000*'ab'+'cd').end(0) == 20001
504        assert re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0) == (
505                         20003)
506        assert re.match('.*?cd', 20000*'abc'+'de').end(0) == 60001
507        # non-simple '*?' still used to hit the recursion limit, before the
508        # non-recursive scheme was implemented.
509        assert re.search('(a|b)*?c', 10000*'ab'+'cd').end(0) == 20001
510
511    def test_bug_612074(self):
512        pat=u"["+re.escape(u"\u2039")+u"]"
513        assert re.compile(pat) and 1 == 1
514
515    def test_stack_overflow(self):
516        # nasty cases that used to overflow the straightforward recursive
517        # implementation of repeated groups.
518        assert re.match('(x)*', 50000*'x').group(1) == 'x'
519        assert re.match('(x)*y', 50000*'x'+'y').group(1) == 'x'
520        assert re.match('(x)*?y', 50000*'x'+'y').group(1) == 'x'
521
522    def test_scanner(self):
523        def s_ident(scanner, token): return token
524        def s_operator(scanner, token): return "op%s" % token
525        def s_float(scanner, token): return float(token)
526        def s_int(scanner, token): return int(token)
527
528        scanner = re.Scanner([
529            (r"[a-zA-Z_]\w*", s_ident),
530            (r"\d+\.\d*", s_float),
531            (r"\d+", s_int),
532            (r"=|\+|-|\*|/", s_operator),
533            (r"\s+", None),
534            ])
535
536        assert scanner.scanner.scanner("").pattern != None
537
538        assert scanner.scan("sum = 3*foo + 312.50 + bar") == (
539                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
540                           'op+', 'bar'], ''))
541
542    def test_bug_448951(self):
543        # bug 448951 (similar to 429357, but with single char match)
544        # (Also test greedy matches.)
545        for op in '','?','*':
546            assert re.match(r'((.%s):)?z'%op, 'z').groups() == (
547                             (None, None))
548            assert re.match(r'((.%s):)?z'%op, 'a:z').groups() == (
549                             ('a:', 'a'))
550
551    def test_bug_725106(self):
552        # capturing groups in alternatives in repeats
553        assert re.match('^((a)|b)*', 'abc').groups() == (
554                         ('b', 'a'))
555        assert re.match('^(([ab])|c)*', 'abc').groups() == (
556                         ('c', 'b'))
557        assert re.match('^((d)|[ab])*', 'abc').groups() == (
558                         ('b', None))
559        assert re.match('^((a)c|[ab])*', 'abc').groups() == (
560                         ('b', None))
561        assert re.match('^((a)|b)*?c', 'abc').groups() == (
562                         ('b', 'a'))
563        assert re.match('^(([ab])|c)*?d', 'abcd').groups() == (
564                         ('c', 'b'))
565        assert re.match('^((d)|[ab])*?c', 'abc').groups() == (
566                         ('b', None))
567        assert re.match('^((a)c|[ab])*?c', 'abc').groups() == (
568                         ('b', None))
569
570    def test_bug_725149(self):
571        # mark_stack_base restoring before restoring marks
572        assert re.match('(a)(?:(?=(b)*)c)*', 'abb').groups() == (
573                         ('a', None))
574        assert re.match('(a)((?!(b)*))*', 'abb').groups() == (
575                         ('a', None, None))
576
577    def test_bug_764548(self):
578        # bug 764548, re.compile() barfs on str/unicode subclasses
579        try:
580            unicode
581        except NameError:
582            return  # no problem if we have no unicode
583        class my_unicode(unicode): pass
584        pat = re.compile(my_unicode("abc"))
585        assert pat.match("xyz") == None
586
587    def test_finditer(self):
588        iter = re.finditer(r":+", "a:b::c:::d")
589        assert [item.group(0) for item in iter] == (
590                         [":", "::", ":::"])
591
592    def test_bug_926075(self):
593        try:
594            unicode
595        except NameError:
596            return # no problem if we have no unicode
597        assert (re.compile('bug_926075') is not
598                     re.compile(eval("u'bug_926075'")))
599
600    def test_bug_931848(self):
601        try:
602            unicode
603        except NameError:
604            pass
605        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
606        assert re.compile(pattern).split("a.b.c") == (
607                         ['a','b','c'])
608
609    def test_bug_581080(self):
610        iter = re.finditer(r"\s", "a b")
611        assert iter.next().span() == (1,2)
612        raises(StopIteration, iter.next)
613
614        if 0:    # XXX
615            scanner = re.compile(r"\s").scanner("a b")
616            assert scanner.search().span() == (1, 2)
617            assert scanner.search() == None
618
619    def test_bug_817234(self):
620        iter = re.finditer(r".*", "asdf")
621        assert iter.next().span() == (0, 4)
622        assert iter.next().span() == (4, 4)
623        raises(StopIteration, iter.next)
624
625    def test_empty_array(self):
626        # SF buf 1647541
627        import array
628        for typecode in 'cbBuhHiIlLfd':
629            a = array.array(typecode)
630            assert re.compile("bla").match(a) == None
631            assert re.compile("").match(a).groups() == ()
632
633    def test_inline_flags(self):
634        # Bug #1700
635        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
636        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
637
638        p = re.compile(upper_char, re.I | re.U)
639        q = p.match(lower_char)
640        assert q != None
641
642        p = re.compile(lower_char, re.I | re.U)
643        q = p.match(upper_char)
644        assert q != None
645
646        p = re.compile('(?i)' + upper_char, re.U)
647        q = p.match(lower_char)
648        assert q != None
649
650        p = re.compile('(?i)' + lower_char, re.U)
651        q = p.match(upper_char)
652        assert q != None
653
654        p = re.compile('(?iu)' + upper_char)
655        q = p.match(lower_char)
656        assert q != None
657
658        p = re.compile('(?iu)' + lower_char)
659        q = p.match(upper_char)
660        assert q != None