PageRenderTime 166ms CodeModel.GetById 10ms app.highlight 141ms RepoModel.GetById 1ms app.codeStats 1ms

/Lib/test/test_re.py

http://unladen-swallow.googlecode.com/
Python | 818 lines | 778 code | 22 blank | 18 comment | 6 complexity | 491419d6706516edde6c634d57a039b6 MD5 | raw file
  1import sys
  2sys.path = ['.'] + sys.path
  3
  4from test.test_support import verbose, run_unittest
  5import re
  6from re import Scanner
  7import sys, os, traceback
  8from weakref import proxy
  9
 10# Misc tests from Tim Peters' re.doc
 11
 12# WARNING: Don't change details in these tests if you don't know
 13# what you're doing. Some of these tests were carefuly modeled to
 14# cover most of the code.
 15
 16import unittest
 17
 18class ReTests(unittest.TestCase):
 19
 20    def test_weakref(self):
 21        s = 'QabbbcR'
 22        x = re.compile('ab+c')
 23        y = proxy(x)
 24        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
 25
 26    def test_search_star_plus(self):
 27        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
 28        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
 29        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
 30        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
 31        self.assertEqual(re.search('x', 'aaa'), None)
 32        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
 33        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
 34        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
 35        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
 36        self.assertEqual(re.match('a+', 'xxx'), None)
 37
 38    def bump_num(self, matchobj):
 39        int_value = int(matchobj.group(0))
 40        return str(int_value + 1)
 41
 42    def test_basic_re_sub(self):
 43        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
 44        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
 45                         '9.3 -3 24x100y')
 46        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
 47                         '9.3 -3 23x99y')
 48
 49        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
 50        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
 51
 52        s = r"\1\1"
 53        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
 54        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
 55        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
 56
 57        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
 58        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
 59        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
 60        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
 61
 62        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
 63                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
 64        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
 65        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
 66                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
 67
 68        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
 69
 70    def test_bug_449964(self):
 71        # fails for group followed by other escape
 72        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
 73                         'xx\bxx\b')
 74
 75    def test_bug_449000(self):
 76        # Test for sub() on escaped characters
 77        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
 78                         'abc\ndef\n')
 79        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
 80                         'abc\ndef\n')
 81        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
 82                         'abc\ndef\n')
 83        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
 84                         'abc\ndef\n')
 85
 86    def test_bug_1140(self):
 87        # re.sub(x, y, u'') should return u'', not '', and
 88        # re.sub(x, y, '') should return '', not u''.
 89        # Also:
 90        # re.sub(x, y, unicode(x)) should return unicode(y), and
 91        # re.sub(x, y, str(x)) should return
 92        #     str(y) if isinstance(y, str) else unicode(y).
 93        for x in 'x', u'x':
 94            for y in 'y', u'y':
 95                z = re.sub(x, y, u'')
 96                self.assertEqual(z, u'')
 97                self.assertEqual(type(z), unicode)
 98                #
 99                z = re.sub(x, y, '')
100                self.assertEqual(z, '')
101                self.assertEqual(type(z), str)
102                #
103                z = re.sub(x, y, unicode(x))
104                self.assertEqual(z, y)
105                self.assertEqual(type(z), unicode)
106                #
107                z = re.sub(x, y, str(x))
108                self.assertEqual(z, y)
109                self.assertEqual(type(z), type(y))
110
111    def test_bug_1661(self):
112        # Verify that flags do not get silently ignored with compiled patterns
113        pattern = re.compile('.')
114        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
115        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
116        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
117        self.assertRaises(ValueError, re.compile, pattern, re.I)
118
119    def test_bug_3629(self):
120        # A regex that triggered a bug in the sre-code validator
121        re.compile("(?P<quote>)(?(quote))")
122
123    def test_sub_template_numeric_escape(self):
124        # bug 776311 and friends
125        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
126        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
127        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
128        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
129        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
130        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
131        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
132
133        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
134        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
135
136        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
137        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
138        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
139        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
140        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
141
142        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
143        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
144
145        self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
146        self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
147        self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
148        self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
149        self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
150        self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
151        self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
152        self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
153        self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
154        self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
155        self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
156        self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
157
158        # in python2.3 (etc), these loop endlessly in sre_parser.py
159        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
160        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
161                         'xz8')
162        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
163                         'xza')
164
165    def test_qualified_re_sub(self):
166        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
167        self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
168
169    def test_bug_114660(self):
170        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
171                         'hello there')
172
173    def test_bug_462270(self):
174        # Test for empty sub() behaviour, see SF bug #462270
175        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
176        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
177
178    def test_symbolic_refs(self):
179        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
180        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
181        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
182        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
183        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
184        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
185        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
186        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
187        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
188
189    def test_re_subn(self):
190        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
191        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
192        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
193        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
194        self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
195
196    def test_re_split(self):
197        self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
198        self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
199        self.assertEqual(re.split("(:*)", ":a:b::c"),
200                         ['', ':', 'a', ':', 'b', '::', 'c'])
201        self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
202        self.assertEqual(re.split("(:)*", ":a:b::c"),
203                         ['', ':', 'a', ':', 'b', ':', 'c'])
204        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
205                         ['', ':', 'a', ':b::', 'c'])
206        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
207                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
208                          None, '::', 'c'])
209        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
210                         ['', 'a', '', '', 'c'])
211
212    def test_qualified_re_split(self):
213        self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
214        self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
215        self.assertEqual(re.split("(:)", ":a:b::c", 2),
216                         ['', ':', 'a', ':', 'b::c'])
217        self.assertEqual(re.split("(:*)", ":a:b::c", 2),
218                         ['', ':', 'a', ':', 'b::c'])
219
220    def test_re_findall(self):
221        self.assertEqual(re.findall(":+", "abc"), [])
222        self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
223        self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
224        self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
225                                                               (":", ":"),
226                                                               (":", "::")])
227
228    def test_bug_117612(self):
229        self.assertEqual(re.findall(r"(a|(b))", "aba"),
230                         [("a", ""),("b", "b"),("a", "")])
231
232    def test_re_match(self):
233        self.assertEqual(re.match('a', 'a').groups(), ())
234        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
235        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
236        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
237        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
238
239        pat = re.compile('((a)|(b))(c)?')
240        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
241        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
242        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
243        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
244        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
245
246        # A single group
247        m = re.match('(a)', 'a')
248        self.assertEqual(m.group(0), 'a')
249        self.assertEqual(m.group(0), 'a')
250        self.assertEqual(m.group(1), 'a')
251        self.assertEqual(m.group(1, 1), ('a', 'a'))
252
253        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
254        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
255        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
256                         (None, 'b', None))
257        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
258
259    def test_re_groupref_exists(self):
260        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
261                         ('(', 'a'))
262        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
263                         (None, 'a'))
264        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
265        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
266        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
267                         ('a', 'b'))
268        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
269                         (None, 'd'))
270        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
271                         (None, 'd'))
272        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
273                         ('a', ''))
274
275        # Tests for bug #1177831: exercise groups other than the first group
276        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
277        self.assertEqual(p.match('abc').groups(),
278                         ('a', 'b', 'c'))
279        self.assertEqual(p.match('ad').groups(),
280                         ('a', None, 'd'))
281        self.assertEqual(p.match('abd'), None)
282        self.assertEqual(p.match('ac'), None)
283
284
285    def test_re_groupref(self):
286        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
287                         ('|', 'a'))
288        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
289                         (None, 'a'))
290        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
291        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
292        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
293                         ('a', 'a'))
294        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
295                         (None, None))
296
297    def test_groupdict(self):
298        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
299                                  'first second').groupdict(),
300                         {'first':'first', 'second':'second'})
301
302    def test_expand(self):
303        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
304                                  "first second")
305                                  .expand(r"\2 \1 \g<second> \g<first>"),
306                         "second first second first")
307
308    def test_repeat_minmax(self):
309        self.assertEqual(re.match("^(\w){1}$", "abc"), None)
310        self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
311        self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
312        self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
313
314        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
315        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
316        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
317        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
318        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
319        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
320        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
321        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
322
323        self.assertEqual(re.match("^x{1}$", "xxx"), None)
324        self.assertEqual(re.match("^x{1}?$", "xxx"), None)
325        self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
326        self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
327
328        self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
329        self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
330        self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
331        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
332        self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
333        self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
334        self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
335        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
336
337        self.assertEqual(re.match("^x{}$", "xxx"), None)
338        self.assertNotEqual(re.match("^x{}$", "x{}"), None)
339
340    def test_getattr(self):
341        self.assertEqual(re.match("(a)", "a").pos, 0)
342        self.assertEqual(re.match("(a)", "a").endpos, 1)
343        self.assertEqual(re.match("(a)", "a").string, "a")
344        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
345        self.assertNotEqual(re.match("(a)", "a").re, None)
346
347    def test_special_escapes(self):
348        self.assertEqual(re.search(r"\b(b.)\b",
349                                   "abcd abc bcd bx").group(1), "bx")
350        self.assertEqual(re.search(r"\B(b.)\B",
351                                   "abc bcd bc abxd").group(1), "bx")
352        self.assertEqual(re.search(r"\b(b.)\b",
353                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
354        self.assertEqual(re.search(r"\B(b.)\B",
355                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
356        self.assertEqual(re.search(r"\b(b.)\b",
357                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
358        self.assertEqual(re.search(r"\B(b.)\B",
359                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
360        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
361        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
362        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
363        self.assertEqual(re.search(r"\b(b.)\b",
364                                   u"abcd abc bcd bx").group(1), "bx")
365        self.assertEqual(re.search(r"\B(b.)\B",
366                                   u"abc bcd bc abxd").group(1), "bx")
367        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
368        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
369        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
370        self.assertEqual(re.search(r"\d\D\w\W\s\S",
371                                   "1aa! a").group(0), "1aa! a")
372        self.assertEqual(re.search(r"\d\D\w\W\s\S",
373                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
374        self.assertEqual(re.search(r"\d\D\w\W\s\S",
375                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
376
377    def test_bigcharset(self):
378        self.assertEqual(re.match(u"([\u2222\u2223])",
379                                  u"\u2222").group(1), u"\u2222")
380        self.assertEqual(re.match(u"([\u2222\u2223])",
381                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
382
383    def test_anyall(self):
384        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
385                         "a\nb")
386        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
387                         "a\n\nb")
388
389    def test_non_consuming(self):
390        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
391        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
392        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
393        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
394        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
395        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
396        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
397
398        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
399        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
400        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
401        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
402
403    def test_ignore_case(self):
404        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
405        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
406        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
407        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
408        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
409        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
410        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
411        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
412        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
413        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
414
415    def test_category(self):
416        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
417
418    def test_getlower(self):
419        import _sre
420        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
421        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
422        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
423
424        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
425        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
426
427    def test_not_literal(self):
428        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
429        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
430
431    def test_search_coverage(self):
432        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
433        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
434
435    def test_re_escape(self):
436        p=""
437        for i in range(0, 256):
438            p = p + chr(i)
439            self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
440                             True)
441            self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
442
443        pat=re.compile(re.escape(p))
444        self.assertEqual(pat.match(p) is not None, True)
445        self.assertEqual(pat.match(p).span(), (0,256))
446
447    def test_pickling(self):
448        import pickle
449        self.pickle_test(pickle)
450        import cPickle
451        self.pickle_test(cPickle)
452        # old pickles expect the _compile() reconstructor in sre module
453        import warnings
454        with warnings.catch_warnings():
455            warnings.filterwarnings("ignore", "The sre module is deprecated",
456                                    DeprecationWarning)
457            from sre import _compile
458
459    def pickle_test(self, pickle):
460        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
461        s = pickle.dumps(oldpat)
462        newpat = pickle.loads(s)
463        self.assertEqual(oldpat, newpat)
464
465    def test_constants(self):
466        self.assertEqual(re.I, re.IGNORECASE)
467        self.assertEqual(re.L, re.LOCALE)
468        self.assertEqual(re.M, re.MULTILINE)
469        self.assertEqual(re.S, re.DOTALL)
470        self.assertEqual(re.X, re.VERBOSE)
471
472    def test_flags(self):
473        for flag in [re.I, re.M, re.X, re.S, re.L]:
474            self.assertNotEqual(re.compile('^pattern$', flag), None)
475
476    def test_sre_character_literals(self):
477        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
478            self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
479            self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
480            self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
481            self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
482            self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
483            self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
484        self.assertRaises(re.error, re.match, "\911", "")
485
486    def test_sre_character_class_literals(self):
487        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
488            self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
489            self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
490            self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
491            self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
492            self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
493            self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
494        self.assertRaises(re.error, re.match, "[\911]", "")
495
496    def test_bug_113254(self):
497        self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
498        self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
499        self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
500
501    def test_bug_527371(self):
502        # bug described in patches 527371/672491
503        self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
504        self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
505        self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
506        self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
507        self.assertEqual(re.match("((a))", "a").lastindex, 1)
508
509    def test_bug_545855(self):
510        # bug 545855 -- This pattern failed to cause a compile error as it
511        # should, instead provoking a TypeError.
512        self.assertRaises(re.error, re.compile, 'foo[a-')
513
514    def test_bug_418626(self):
515        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
516        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
517        # pattern '*?' on a long string.
518        self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
519        self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
520                         20003)
521        self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
522        # non-simple '*?' still used to hit the recursion limit, before the
523        # non-recursive scheme was implemented.
524        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
525
526    def test_bug_612074(self):
527        pat=u"["+re.escape(u"\u2039")+u"]"
528        self.assertEqual(re.compile(pat) and 1, 1)
529
530    def test_stack_overflow(self):
531        # nasty cases that used to overflow the straightforward recursive
532        # implementation of repeated groups.
533        self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
534        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
535        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
536
537    def test_scanner(self):
538        def s_ident(scanner, token): return token
539        def s_operator(scanner, token): return "op%s" % token
540        def s_float(scanner, token): return float(token)
541        def s_int(scanner, token): return int(token)
542
543        scanner = Scanner([
544            (r"[a-zA-Z_]\w*", s_ident),
545            (r"\d+\.\d*", s_float),
546            (r"\d+", s_int),
547            (r"=|\+|-|\*|/", s_operator),
548            (r"\s+", None),
549            ])
550
551        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
552
553        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
554                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
555                           'op+', 'bar'], ''))
556
557    def test_bug_448951(self):
558        # bug 448951 (similar to 429357, but with single char match)
559        # (Also test greedy matches.)
560        for op in '','?','*':
561            self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
562                             (None, None))
563            self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
564                             ('a:', 'a'))
565
566    def test_bug_725106(self):
567        # capturing groups in alternatives in repeats
568        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
569                         ('b', 'a'))
570        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
571                         ('c', 'b'))
572        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
573                         ('b', None))
574        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
575                         ('b', None))
576        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
577                         ('b', 'a'))
578        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
579                         ('c', 'b'))
580        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
581                         ('b', None))
582        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
583                         ('b', None))
584
585    def test_bug_725149(self):
586        # mark_stack_base restoring before restoring marks
587        self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
588                         ('a', None))
589        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
590                         ('a', None, None))
591
592    def test_bug_764548(self):
593        # bug 764548, re.compile() barfs on str/unicode subclasses
594        try:
595            unicode
596        except NameError:
597            return  # no problem if we have no unicode
598        class my_unicode(unicode): pass
599        pat = re.compile(my_unicode("abc"))
600        self.assertEqual(pat.match("xyz"), None)
601
602    def test_finditer(self):
603        iter = re.finditer(r":+", "a:b::c:::d")
604        self.assertEqual([item.group(0) for item in iter],
605                         [":", "::", ":::"])
606
607    def test_bug_926075(self):
608        try:
609            unicode
610        except NameError:
611            return # no problem if we have no unicode
612        self.assert_(re.compile('bug_926075') is not
613                     re.compile(eval("u'bug_926075'")))
614
615    def test_bug_931848(self):
616        try:
617            unicode
618        except NameError:
619            pass
620        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
621        self.assertEqual(re.compile(pattern).split("a.b.c"),
622                         ['a','b','c'])
623
624    def test_bug_581080(self):
625        iter = re.finditer(r"\s", "a b")
626        self.assertEqual(iter.next().span(), (1,2))
627        self.assertRaises(StopIteration, iter.next)
628
629        scanner = re.compile(r"\s").scanner("a b")
630        self.assertEqual(scanner.search().span(), (1, 2))
631        self.assertEqual(scanner.search(), None)
632
633    def test_bug_817234(self):
634        iter = re.finditer(r".*", "asdf")
635        self.assertEqual(iter.next().span(), (0, 4))
636        self.assertEqual(iter.next().span(), (4, 4))
637        self.assertRaises(StopIteration, iter.next)
638
639    def test_empty_array(self):
640        # SF buf 1647541
641        import array
642        for typecode in 'cbBuhHiIlLfd':
643            a = array.array(typecode)
644            self.assertEqual(re.compile("bla").match(a), None)
645            self.assertEqual(re.compile("").match(a).groups(), ())
646
647    def test_inline_flags(self):
648        # Bug #1700
649        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
650        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
651
652        p = re.compile(upper_char, re.I | re.U)
653        q = p.match(lower_char)
654        self.assertNotEqual(q, None)
655
656        p = re.compile(lower_char, re.I | re.U)
657        q = p.match(upper_char)
658        self.assertNotEqual(q, None)
659
660        p = re.compile('(?i)' + upper_char, re.U)
661        q = p.match(lower_char)
662        self.assertNotEqual(q, None)
663
664        p = re.compile('(?i)' + lower_char, re.U)
665        q = p.match(upper_char)
666        self.assertNotEqual(q, None)
667
668        p = re.compile('(?iu)' + upper_char)
669        q = p.match(lower_char)
670        self.assertNotEqual(q, None)
671
672        p = re.compile('(?iu)' + lower_char)
673        q = p.match(upper_char)
674        self.assertNotEqual(q, None)
675
676    def test_dollar_matches_twice(self):
677        "$ matches the end of string, and just before the terminating \n"
678        pattern = re.compile('$')
679        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
680        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
681        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
682
683        pattern = re.compile('$', re.MULTILINE)
684        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
685        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
686        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
687
688
689def run_re_tests():
690    from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
691    if verbose:
692        print 'Running re_tests test suite'
693    else:
694        # To save time, only run the first and last 10 tests
695        #tests = tests[:10] + tests[-10:]
696        pass
697
698    for t in tests:
699        sys.stdout.flush()
700        pattern = s = outcome = repl = expected = None
701        if len(t) == 5:
702            pattern, s, outcome, repl, expected = t
703        elif len(t) == 3:
704            pattern, s, outcome = t
705        else:
706            raise ValueError, ('Test tuples should have 3 or 5 fields', t)
707
708        try:
709            obj = re.compile(pattern)
710        except re.error:
711            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
712            else:
713                print '=== Syntax error:', t
714        except KeyboardInterrupt: raise KeyboardInterrupt
715        except:
716            print '*** Unexpected error ***', t
717            if verbose:
718                traceback.print_exc(file=sys.stdout)
719        else:
720            try:
721                result = obj.search(s)
722            except re.error, msg:
723                print '=== Unexpected exception', t, repr(msg)
724            if outcome == SYNTAX_ERROR:
725                # This should have been a syntax error; forget it.
726                pass
727            elif outcome == FAIL:
728                if result is None: pass   # No match, as expected
729                else: print '=== Succeeded incorrectly', t
730            elif outcome == SUCCEED:
731                if result is not None:
732                    # Matched, as expected, so now we compute the
733                    # result string and compare it to our expected result.
734                    start, end = result.span(0)
735                    vardict={'found': result.group(0),
736                             'groups': result.group(),
737                             'flags': result.re.flags}
738                    for i in range(1, 100):
739                        try:
740                            gi = result.group(i)
741                            # Special hack because else the string concat fails:
742                            if gi is None:
743                                gi = "None"
744                        except IndexError:
745                            gi = "Error"
746                        vardict['g%d' % i] = gi
747                    for i in result.re.groupindex.keys():
748                        try:
749                            gi = result.group(i)
750                            if gi is None:
751                                gi = "None"
752                        except IndexError:
753                            gi = "Error"
754                        vardict[i] = gi
755                    repl = eval(repl, vardict)
756                    if repl != expected:
757                        print '=== grouping error', t,
758                        print repr(repl) + ' should be ' + repr(expected)
759                else:
760                    print '=== Failed incorrectly', t
761
762                # Try the match on a unicode string, and check that it
763                # still succeeds.
764                try:
765                    result = obj.search(unicode(s, "latin-1"))
766                    if result is None:
767                        print '=== Fails on unicode match', t
768                except NameError:
769                    continue # 1.5.2
770                except TypeError:
771                    continue # unicode test case
772
773                # Try the match on a unicode pattern, and check that it
774                # still succeeds.
775                obj=re.compile(unicode(pattern, "latin-1"))
776                result = obj.search(s)
777                if result is None:
778                    print '=== Fails on unicode pattern match', t
779
780                # Try the match with the search area limited to the extent
781                # of the match and see if it still succeeds.  \B will
782                # break (because it won't match at the end or start of a
783                # string), so we'll ignore patterns that feature it.
784
785                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
786                               and result is not None:
787                    obj = re.compile(pattern)
788                    result = obj.search(s, result.start(0), result.end(0) + 1)
789                    if result is None:
790                        print '=== Failed on range-limited match', t
791
792                # Try the match with IGNORECASE enabled, and check that it
793                # still succeeds.
794                obj = re.compile(pattern, re.IGNORECASE)
795                result = obj.search(s)
796                if result is None:
797                    print '=== Fails on case-insensitive match', t
798
799                # Try the match with LOCALE enabled, and check that it
800                # still succeeds.
801                obj = re.compile(pattern, re.LOCALE)
802                result = obj.search(s)
803                if result is None:
804                    print '=== Fails on locale-sensitive match', t
805
806                # Try the match with UNICODE locale enabled, and check
807                # that it still succeeds.
808                obj = re.compile(pattern, re.UNICODE)
809                result = obj.search(s)
810                if result is None:
811                    print '=== Fails on unicode-sensitive match', t
812
813def test_main():
814    run_unittest(ReTests)
815    run_re_tests()
816
817if __name__ == "__main__":
818    test_main()