PageRenderTime 284ms CodeModel.GetById 101ms app.highlight 152ms RepoModel.GetById 18ms app.codeStats 1ms

/pypy/module/_sre/test/test_app_sre.py

https://bitbucket.org/evelyn559/pypy
Python | 918 lines | 916 code | 1 blank | 1 comment | 0 complexity | 9aaf9a47bb95e865dd92057e176a52b4 MD5 | raw file
  1"""Regular expression tests specific to _sre.py and accumulated during TDD."""
  2import autopath
  3import py
  4from py.test import raises, skip
  5from pypy.interpreter.gateway import app2interp_temp
  6from pypy.conftest import gettestobjspace, option
  7
  8def init_globals_hack(space):
  9    space.appexec([space.wrap(autopath.this_dir)], """(this_dir):
 10    import __builtin__ as b
 11    import sys, os.path
 12    # Uh-oh, ugly hack
 13    sys.path.insert(0, this_dir)
 14    import support_test_app_sre
 15    b.s = support_test_app_sre
 16    sys.path.pop(0)
 17    """)
 18
 19class AppTestSrePy:
 20
 21    def test_magic(self):
 22        import _sre, sre_constants
 23        assert sre_constants.MAGIC == _sre.MAGIC
 24
 25    def test_codesize(self):
 26        import _sre
 27        assert _sre.getcodesize() == _sre.CODESIZE
 28
 29
 30class AppTestSrePattern:
 31
 32    def test_copy(self):
 33        # copy support is disabled by default in _sre.c
 34        import re
 35        p = re.compile("b")
 36        raises(TypeError, p.__copy__)        # p.__copy__() should raise
 37        raises(TypeError, p.__deepcopy__)    # p.__deepcopy__() should raise
 38
 39    def test_creation_attributes(self):
 40        import re
 41        pattern_string = "(b)l(?P<g>a)"
 42        p = re.compile(pattern_string, re.I | re.M)
 43        assert pattern_string == p.pattern
 44        assert re.I | re.M == p.flags
 45        assert 2 == p.groups
 46        assert {"g": 2} == p.groupindex
 47
 48    def test_match_none(self):
 49        import re
 50        p = re.compile("bla")
 51        none_matches = ["b", "bl", "blub", "jupidu"]
 52        for string in none_matches:
 53            assert None == p.match(string)
 54
 55    def test_pos_endpos(self):
 56        import re
 57        # XXX maybe fancier tests here
 58        p = re.compile("bl(a)")
 59        tests = [("abla", 0, 4), ("abla", 1, 4), ("ablaa", 1, 4)]
 60        for string, pos, endpos in tests:
 61            assert p.search(string, pos, endpos)
 62        tests = [("abla", 0, 3), ("abla", 2, 4)]
 63        for string, pos, endpos in tests:
 64            assert not p.search(string, pos, endpos)
 65
 66    def test_findall(self):
 67        import re
 68        assert ["b"] == re.findall("b", "bla")
 69        assert ["a", "u"] == re.findall("b(.)", "abalbus")
 70        assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
 71        assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
 72
 73    def test_finditer(self):
 74        import re
 75        it = re.finditer("b(.)", "brabbel")
 76        assert "br" == it.next().group(0)
 77        assert "bb" == it.next().group(0)
 78        raises(StopIteration, it.next)
 79
 80    def test_split(self):
 81        import re
 82        assert ["a", "o", "u", ""] == re.split("b", "abobub")
 83        assert ["a", "o", "ub"] == re.split("b", "abobub", 2)
 84        assert ['', 'a', 'l', 'a', 'lla'] == re.split("b(a)", "balballa")
 85        assert ['', 'a', None, 'l', 'u', None, 'lla'] == (
 86            re.split("b([ua]|(s))", "balbulla"))
 87
 88    def test_weakref(self):
 89        import re, _weakref
 90        _weakref.ref(re.compile(r""))
 91
 92
 93class AppTestSreMatch:
 94    def setup_class(cls):
 95        cls.space = gettestobjspace(usemodules=('array', ))
 96        
 97    def test_copy(self):
 98        import re
 99        # copy support is disabled by default in _sre.c
100        m = re.match("bla", "bla")
101        raises(TypeError, m.__copy__)
102        raises(TypeError, m.__deepcopy__)
103
104    def test_match_attributes(self):
105        import re
106        c = re.compile("bla")
107        m = c.match("blastring")
108        assert "blastring" == m.string
109        assert c == m.re
110        assert 0 == m.pos
111        assert 9 == m.endpos
112        assert None == m.lastindex
113        assert None == m.lastgroup
114        assert ((0, 3),) == m.regs
115
116    def test_match_attributes_with_groups(self):
117        import re
118        m = re.search("a(b)(?P<name>c)", "aabcd")
119        assert 0 == m.pos
120        assert 5 == m.endpos
121        assert 2 == m.lastindex
122        assert "name" == m.lastgroup
123        assert ((1, 4), (2, 3), (3, 4)) == m.regs
124
125    def test_regs_overlapping_groups(self):
126        import re
127        m = re.match("a((b)c)", "abc")
128        assert ((0, 3), (1, 3), (1, 2)) == m.regs
129
130    def test_start_end_span(self):
131        import re
132        m = re.search("a((b)c)", "aabcd")
133        assert (1, 4) == (m.start(), m.end())
134        assert (1, 4) == m.span()
135        assert (2, 4) == (m.start(1), m.end(1))
136        assert (2, 4) == m.span(1)
137        assert (2, 3) == (m.start(2), m.end(2))
138        assert (2, 3) == m.span(2)
139        raises(IndexError, m.start, 3)
140        raises(IndexError, m.end, 3)
141        raises(IndexError, m.span, 3)
142        raises(IndexError, m.start, -1)
143
144    def test_groups(self):
145        import re
146        m = re.search("a((.).)", "aabcd")
147        assert ("ab", "a") == m.groups()
148        assert ("ab", "a") == m.groups(True)
149        m = re.search("a((\d)|(\s))", "aa1b")
150        assert ("1", "1", None) == m.groups()
151        assert ("1", "1", True) == m.groups(True)
152        m = re.search("a((\d)|(\s))", "a ")
153        assert (" ", None, " ") == m.groups()
154        m = re.match("(a)", "a")
155        assert ("a",) == m.groups()
156
157    def test_groupdict(self):
158        import re
159        m = re.search("a((.).)", "aabcd")
160        assert {} == m.groupdict()
161        m = re.search("a((?P<first>.).)", "aabcd")
162        assert {"first": "a"} == m.groupdict()
163        m = re.search("a((?P<first>\d)|(?P<second>\s))", "aa1b")
164        assert {"first": "1", "second": None} == m.groupdict()
165        assert {"first": "1", "second": True} == m.groupdict(True)
166
167    def test_group(self):
168        import re
169        m = re.search("a((?P<first>\d)|(?P<second>\s))", "aa1b")
170        assert "a1" == m.group()
171        assert ("1", "1", None) == m.group(1, 2, 3)
172        assert ("1", None) == m.group("first", "second")
173        raises(IndexError, m.group, 1, 4)
174
175    def test_expand(self):
176        import re
177        m = re.search("a(..)(?P<name>..)", "ab1bc")
178        assert "b1bcbc" == m.expand(r"\1\g<name>\2")
179
180    def test_sub(self):
181        import re
182        assert "bbbbb" == re.sub("a", "b", "ababa")
183        assert ("bbbbb", 3) == re.subn("a", "b", "ababa")
184        assert "dddd" == re.sub("[abc]", "d", "abcd")
185        assert ("dddd", 3) == re.subn("[abc]", "d", "abcd")
186        assert "rbd\nbr\n" == re.sub("a(.)", r"b\1\n", "radar")
187        assert ("rbd\nbr\n", 2) == re.subn("a(.)", r"b\1\n", "radar")
188        assert ("bbbba", 2) == re.subn("a", "b", "ababa", 2)
189
190    def test_sub_unicode(self):
191        import re
192        assert isinstance(re.sub(u"a", u"b", u""), unicode)
193        # the input is returned unmodified if no substitution is performed,
194        # which (if interpreted literally, as CPython does) gives the
195        # following strangeish rules:
196        assert isinstance(re.sub(u"a", u"b", "diwoiioamoi"), unicode)
197        assert isinstance(re.sub(u"a", u"b", "diwoiiobmoi"), str)
198        assert isinstance(re.sub(u'x', 'y', 'x'), str)
199
200    def test_sub_callable(self):
201        import re
202        def call_me(match):
203            ret = ""
204            for char in match.group():
205                ret += chr(ord(char) + 1)
206            return ret
207        assert ("bbbbb", 3) == re.subn("a", call_me, "ababa")
208
209    def test_sub_callable_returns_none(self):
210        import re
211        def call_me(match):
212            return None
213        assert "acd" == re.sub("b", call_me, "abcd")
214
215    def test_sub_callable_suddenly_unicode(self):
216        import re
217        def call_me(match):
218            if match.group() == 'A':
219                return unichr(0x3039)
220            return ''
221        assert (u"bb\u3039b", 2) == re.subn("[aA]", call_me, "babAb")
222
223    def test_match_array(self):
224        import re, array
225        a = array.array('c', 'hello')
226        m = re.match('hel+', a)
227        assert m.end() == 4
228
229    def test_match_typeerror(self):
230        import re
231        raises(TypeError, re.match, 'hel+', list('hello'))
232
233    def test_group_bugs(self):
234        import re
235        r = re.compile(r"""
236            \&(?:
237              (?P<escaped>\&) |
238              (?P<named>[_a-z][_a-z0-9]*)      |
239              {(?P<braced>[_a-z][_a-z0-9]*)}   |
240              (?P<invalid>)
241            )
242        """, re.IGNORECASE | re.VERBOSE)
243        matches = list(r.finditer('this &gift is for &{who} &&'))
244        assert len(matches) == 3
245        assert matches[0].groupdict() == {'escaped': None,
246                                          'named': 'gift',
247                                          'braced': None,
248                                          'invalid': None}
249        assert matches[1].groupdict() == {'escaped': None,
250                                          'named': None,
251                                          'braced': 'who',
252                                          'invalid': None}
253        assert matches[2].groupdict() == {'escaped': '&',
254                                          'named': None,
255                                          'braced': None,
256                                          'invalid': None}
257        matches = list(r.finditer('&who likes &{what)'))   # note the ')'
258        assert len(matches) == 2
259        assert matches[0].groupdict() == {'escaped': None,
260                                          'named': 'who',
261                                          'braced': None,
262                                          'invalid': None}
263        assert matches[1].groupdict() == {'escaped': None,
264                                          'named': None,
265                                          'braced': None,
266                                          'invalid': ''}
267
268    def test_sub_typecheck(self):
269        import re
270        KEYCRE = re.compile(r"%\(([^)]*)\)s|.")
271        raises(TypeError, KEYCRE.sub, "hello", {"%(": 1})
272
273
274class AppTestSreScanner:
275
276    def test_scanner_attributes(self):
277        import re
278        p = re.compile("bla")
279        s = p.scanner("blablubla")
280        assert p == s.pattern
281
282    def test_scanner_match(self):
283        import re
284        p = re.compile(".").scanner("bla")
285        assert ("b", "l", "a") == (p.match().group(0),
286                                    p.match().group(0), p.match().group(0))
287        assert None == p.match()
288
289    def test_scanner_match_detail(self):
290        import re
291        p = re.compile("a").scanner("aaXaa")
292        assert "a" == p.match().group(0)
293        assert "a" == p.match().group(0)
294        assert None == p.match()
295        assert "a" == p.match().group(0)
296        assert "a" == p.match().group(0)
297        assert None == p.match()
298        assert None == p.match()
299        assert None == p.match()
300
301    def test_scanner_search(self):
302        import re
303        p = re.compile("\d").scanner("bla23c5a")
304        assert ("2", "3", "5") == (p.search().group(0),
305                                    p.search().group(0), p.search().group(0))
306        assert None == p.search()
307
308    def test_scanner_zero_width_match(self):
309        import re, sys
310        if sys.version_info[:2] == (2, 3):
311            return
312        p = re.compile(".*").scanner("bla")
313        assert ("bla", "") == (p.search().group(0), p.search().group(0))
314        assert None == p.search()
315
316
317class AppTestGetlower:
318
319    def setup_class(cls):
320        # This imports support_test_sre as the global "s"
321        try:
322            cls.space = gettestobjspace(usemodules=('_locale',))
323        except py.test.skip.Exception:
324            cls.space = gettestobjspace(usemodules=('_rawffi',))
325        init_globals_hack(cls.space)
326
327    def setup_method(self, method):
328        import locale
329        locale.setlocale(locale.LC_ALL, (None, None))
330        
331    def teardown_method(self, method):
332        import locale
333        locale.setlocale(locale.LC_ALL, (None, None))
334
335    def test_getlower_no_flags(self):
336        UPPER_AE = "\xc4"
337        s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, UPPER_AE),
338            (u"\u00c4", u"\u00c4"), (u"\u4444", u"\u4444")], 0)
339
340    def test_getlower_locale(self):
341        import locale, sre_constants
342        UPPER_AE = "\xc4"
343        LOWER_AE = "\xe4"
344        UPPER_PI = u"\u03a0"
345        try:
346            locale.setlocale(locale.LC_ALL, "de_DE")
347            s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, LOWER_AE),
348                (u"\u00c4", u"\u00e4"), (UPPER_PI, UPPER_PI)],
349                sre_constants.SRE_FLAG_LOCALE)
350        except locale.Error:
351            # skip test
352            skip("unsupported locale de_DE")
353
354    def test_getlower_unicode(self):
355        import sre_constants
356        UPPER_AE = "\xc4"
357        LOWER_AE = "\xe4"
358        UPPER_PI = u"\u03a0"
359        LOWER_PI = u"\u03c0"
360        s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, LOWER_AE),
361            (u"\u00c4", u"\u00e4"), (UPPER_PI, LOWER_PI),
362            (u"\u4444", u"\u4444")], sre_constants.SRE_FLAG_UNICODE)
363        
364
365class AppTestSimpleSearches:
366
367    def test_search_simple_literal(self):
368        import re
369        assert re.search("bla", "bla")
370        assert re.search("bla", "blab")
371        assert not re.search("bla", "blu")
372
373    def test_search_simple_ats(self):
374        import re
375        assert re.search("^bla", "bla")
376        assert re.search("^bla", "blab")
377        assert not re.search("^bla", "bbla")
378        assert re.search("bla$", "abla")
379        assert re.search("bla$", "bla\n")
380        assert not re.search("bla$", "blaa")
381
382    def test_search_simple_boundaries(self):
383        import re
384        UPPER_PI = u"\u03a0"
385        assert re.search(r"bla\b", "bla")
386        assert re.search(r"bla\b", "bla ja")
387        assert re.search(r"bla\b", u"bla%s" % UPPER_PI)
388        assert not re.search(r"bla\b", "blano")
389        assert not re.search(r"bla\b", u"bla%s" % UPPER_PI, re.UNICODE)
390
391    def test_search_simple_categories(self):
392        import re
393        LOWER_PI = u"\u03c0"
394        INDIAN_DIGIT = u"\u0966"
395        EM_SPACE = u"\u2001"
396        LOWER_AE = "\xe4"
397        assert re.search(r"bla\d\s\w", "bla3 b")
398        assert re.search(r"b\d", u"b%s" % INDIAN_DIGIT, re.UNICODE)
399        assert not re.search(r"b\D", u"b%s" % INDIAN_DIGIT, re.UNICODE)
400        assert re.search(r"b\s", u"b%s" % EM_SPACE, re.UNICODE)
401        assert not re.search(r"b\S", u"b%s" % EM_SPACE, re.UNICODE)
402        assert re.search(r"b\w", u"b%s" % LOWER_PI, re.UNICODE)
403        assert not re.search(r"b\W", u"b%s" % LOWER_PI, re.UNICODE)
404        assert re.search(r"b\w", "b%s" % LOWER_AE, re.UNICODE)
405
406    def test_search_simple_any(self):
407        import re
408        assert re.search(r"b..a", "jboaas")
409        assert not re.search(r"b..a", "jbo\nas")
410        assert re.search(r"b..a", "jbo\nas", re.DOTALL)
411
412    def test_search_simple_in(self):
413        import re
414        UPPER_PI = u"\u03a0"
415        LOWER_PI = u"\u03c0"
416        EM_SPACE = u"\u2001"
417        LINE_SEP = u"\u2028"
418        assert re.search(r"b[\da-z]a", "bb1a")
419        assert re.search(r"b[\da-z]a", "bbsa")
420        assert not re.search(r"b[\da-z]a", "bbSa")
421        assert re.search(r"b[^okd]a", "bsa")
422        assert not re.search(r"b[^okd]a", "bda")
423        assert re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
424            u"b%sa" % UPPER_PI) # bigcharset
425        assert re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
426            u"b%sa" % EM_SPACE)
427        assert not re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
428            u"b%sa" % LINE_SEP)
429
430    def test_search_simple_literal_ignore(self):
431        import re
432        UPPER_PI = u"\u03a0"
433        LOWER_PI = u"\u03c0"
434        assert re.search(r"ba", "ba", re.IGNORECASE)
435        assert re.search(r"ba", "BA", re.IGNORECASE)
436        assert re.search(u"b%s" % UPPER_PI, u"B%s" % LOWER_PI,
437            re.IGNORECASE | re.UNICODE)
438
439    def test_search_simple_in_ignore(self):
440        import re
441        UPPER_PI = u"\u03a0"
442        LOWER_PI = u"\u03c0"
443        assert re.search(r"ba[A-C]", "bac", re.IGNORECASE)
444        assert re.search(r"ba[a-c]", "baB", re.IGNORECASE)
445        assert re.search(u"ba[%s]" % UPPER_PI, "ba%s" % LOWER_PI,
446            re.IGNORECASE | re.UNICODE)
447        assert re.search(r"ba[^A-C]", "bar", re.IGNORECASE)
448        assert not re.search(r"ba[^A-C]", "baA", re.IGNORECASE)
449        assert not re.search(r"ba[^A-C]", "baa", re.IGNORECASE)
450
451    def test_search_simple_branch(self):
452        import re
453        assert re.search(r"a(bb|d[ef])b", "adeb")
454        assert re.search(r"a(bb|d[ef])b", "abbb")
455
456    def test_search_simple_repeat_one(self):
457        import re
458        assert re.search(r"aa+", "aa") # empty tail
459        assert re.search(r"aa+ab", "aaaab") # backtracking
460        assert re.search(r"aa*ab", "aab") # empty match
461        assert re.search(r"a[bc]+", "abbccb")
462        assert "abbcb" == re.search(r"a.+b", "abbcb\nb").group()
463        assert "abbcb\nb" == re.search(r"a.+b", "abbcb\nb", re.DOTALL).group()
464        assert re.search(r"ab+c", "aBbBbBc", re.IGNORECASE)
465        assert not re.search(r"aa{2,3}", "aa") # string too short
466        assert not re.search(r"aa{2,3}b", "aab") # too few repetitions
467        assert not re.search(r"aa+b", "aaaac") # tail doesn't match
468
469    def test_search_simple_min_repeat_one(self):
470        import re
471        assert re.search(r"aa+?", "aa") # empty tail
472        assert re.search(r"aa+?ab", "aaaab") # forward tracking
473        assert re.search(r"a[bc]+?", "abbccb")
474        assert "abb" == re.search(r"a.+?b", "abbcb\nb").group()
475        assert "a\nbb" == re.search(r"a.+b", "a\nbbc", re.DOTALL).group()
476        assert re.search(r"ab+?c", "aBbBbBc", re.IGNORECASE)
477        assert not re.search(r"aa+?", "a") # string too short
478        assert not re.search(r"aa{2,3}?b", "aab") # too few repetitions
479        assert not re.search(r"aa+?b", "aaaac") # tail doesn't match
480        assert re.match(".*?cd", "abcabcde").end(0) == 7
481
482    def test_search_simple_repeat_maximizing(self):
483        import re
484        assert not re.search(r"(ab){3,5}", "abab")
485        assert not re.search(r"(ab){3,5}", "ababa")
486        assert re.search(r"(ab){3,5}", "ababab")
487        assert re.search(r"(ab){3,5}", "abababababab").end(0) == 10
488        assert "ad" == re.search(r"(a.)*", "abacad").group(1)
489        assert ("abcg", "cg") == (
490            re.search(r"(ab(c.)*)+", "ababcecfabcg").groups())
491        assert ("cg", "cg") == (
492            re.search(r"(ab|(c.))+", "abcg").groups())
493        assert ("ab", "cf") == (
494            re.search(r"((c.)|ab)+", "cfab").groups())
495        assert re.search(r".*", "")
496
497    def test_search_simple_repeat_minimizing(self):
498        import re
499        assert not re.search(r"(ab){3,5}?", "abab")
500        assert re.search(r"(ab){3,5}?", "ababab")
501        assert re.search(r"b(a){3,5}?b", "baaaaab")
502        assert not re.search(r"b(a){3,5}?b", "baaaaaab")
503        assert re.search(r"a(b(.)+?)*", "abdbebb")
504
505    def test_search_simple_groupref(self):
506        import re
507        UPPER_PI = u"\u03a0"
508        LOWER_PI = u"\u03c0"
509        assert re.match(r"((ab)+)c\1", "ababcabab")
510        assert not re.match(r"((ab)+)c\1", "ababcab")
511        assert not re.search(r"(a|(b))\2", "aa")
512        assert re.match(r"((ab)+)c\1", "aBAbcAbaB", re.IGNORECASE)
513        assert re.match(r"((a.)+)c\1", u"a%sca%s" % (UPPER_PI, LOWER_PI),
514            re.IGNORECASE | re.UNICODE)
515
516    def test_search_simple_groupref_exists(self):
517        import re, sys
518        if not sys.version_info[:2] == (2, 3):
519            assert re.search(r"(<)?bla(?(1)>)", "<bla>")
520            assert re.search(r"(<)?bla(?(1)>)", "bla")
521            assert not re.match(r"(<)?bla(?(1)>)", "<bla")
522            assert re.search(r"(<)?bla(?(1)>|u)", "blau")
523
524    def test_search_simple_assert(self):
525        import re
526        assert re.search(r"b(?=\d\d).{3,}", "b23a")
527        assert not re.search(r"b(?=\d\d).{3,}", "b2aa")
528        assert re.search(r"b(?<=\d.)a", "2ba")
529        assert not re.search(r"b(?<=\d.)a", "ba")
530
531    def test_search_simple_assert_not(self):
532        import re
533        assert re.search(r"b(?<!\d.)a", "aba")
534        assert re.search(r"b(?<!\d.)a", "ba")
535        assert not re.search(r"b(?<!\d.)a", "11ba")
536
537    def test_bug_725149(self):
538        # mark_stack_base restoring before restoring marks
539        # test copied from CPython test
540        import re
541        assert re.match('(a)(?:(?=(b)*)c)*', 'abb').groups() == ('a', None)
542        assert re.match('(a)((?!(b)*))*', 'abb').groups() == ('a', None, None)
543
544
545class AppTestMarksStack:
546
547    def test_mark_stack_branch(self):
548        import re
549        m = re.match("b(.)a|b.b", "bob")
550        assert None == m.group(1)
551        assert None == m.lastindex
552
553    def test_mark_stack_repeat_one(self):
554        import re
555        m = re.match("\d+1((2)|(3))4", "2212413")
556        assert ("2", "2", None) == m.group(1, 2, 3)
557        assert 1 == m.lastindex
558
559    def test_mark_stack_min_repeat_one(self):
560        import re
561        m = re.match("\d+?1((2)|(3))44", "221341244")
562        assert ("2", "2", None) == m.group(1, 2, 3)
563        assert 1 == m.lastindex
564
565    def test_mark_stack_max_until(self):
566        import re
567        m = re.match("(\d)+1((2)|(3))4", "2212413")
568        assert ("2", "2", None) == m.group(2, 3, 4)
569        assert 2 == m.lastindex
570
571    def test_mark_stack_min_until(self):
572        import re
573        m = re.match("(\d)+?1((2)|(3))44", "221341244")
574        assert ("2", "2", None) == m.group(2, 3, 4)
575        assert 2 == m.lastindex
576        
577
578class AppTestOpcodes:
579
580    def setup_class(cls):
581        try:
582            cls.space = gettestobjspace(usemodules=('_locale',))
583        except py.test.skip.Exception:
584            cls.space = gettestobjspace(usemodules=('_rawffi',))
585        # This imports support_test_sre as the global "s"
586        init_globals_hack(cls.space)
587
588    def test_length_optimization(self):
589        pattern = "bla"
590        opcodes = [s.OPCODES["info"], 3, 3, len(pattern)] \
591            + s.encode_literal(pattern) + [s.OPCODES["success"]]
592        s.assert_no_match(opcodes, ["b", "bl", "ab"])
593
594    def test_literal(self):
595        opcodes = s.encode_literal("bla") + [s.OPCODES["success"]]
596        s.assert_no_match(opcodes, ["bl", "blu"])
597        s.assert_match(opcodes, ["bla", "blab", "cbla", "bbla"])
598
599    def test_not_literal(self):
600        opcodes = s.encode_literal("b") \
601            + [s.OPCODES["not_literal"], ord("a"), s.OPCODES["success"]]
602        s.assert_match(opcodes, ["bx", "ababy"])
603        s.assert_no_match(opcodes, ["ba", "jabadu"])
604
605    def test_unknown(self):
606        raises(RuntimeError, s.search, [55555], "b")
607
608    def test_at_beginning(self):
609        for atname in ["at_beginning", "at_beginning_string"]:
610            opcodes = [s.OPCODES["at"], s.ATCODES[atname]] \
611                + s.encode_literal("bla") + [s.OPCODES["success"]]
612            s.assert_match(opcodes, "bla")
613            s.assert_no_match(opcodes, "abla")
614
615    def test_at_beginning_line(self):
616        opcodes = [s.OPCODES["at"], s.ATCODES["at_beginning_line"]] \
617            + s.encode_literal("bla") + [s.OPCODES["success"]]
618        s.assert_match(opcodes, ["bla", "x\nbla"])
619        s.assert_no_match(opcodes, ["abla", "abla\nubla"])
620
621    def test_at_end(self):
622        opcodes = s.encode_literal("bla") \
623            + [s.OPCODES["at"], s.ATCODES["at_end"], s.OPCODES["success"]]
624        s.assert_match(opcodes, ["bla", "bla\n"])
625        s.assert_no_match(opcodes, ["blau", "abla\nblau"])
626
627    def test_at_end_line(self):
628        opcodes = s.encode_literal("bla") \
629            + [s.OPCODES["at"], s.ATCODES["at_end_line"], s.OPCODES["success"]]
630        s.assert_match(opcodes, ["bla\n", "bla\nx", "bla"])
631        s.assert_no_match(opcodes, ["blau"])
632
633    def test_at_end_string(self):
634        opcodes = s.encode_literal("bla") \
635            + [s.OPCODES["at"], s.ATCODES["at_end_string"], s.OPCODES["success"]]
636        s.assert_match(opcodes, "bla")
637        s.assert_no_match(opcodes, ["blau", "bla\n"])
638
639    def test_at_boundary(self):
640        for atname in "at_boundary", "at_loc_boundary", "at_uni_boundary":
641            opcodes = s.encode_literal("bla") \
642                + [s.OPCODES["at"], s.ATCODES[atname], s.OPCODES["success"]]
643            s.assert_match(opcodes, ["bla", "bla ha", "bla,x"])
644            s.assert_no_match(opcodes, ["blaja", ""])
645            opcodes = [s.OPCODES["at"], s.ATCODES[atname]] \
646                + s.encode_literal("bla") + [s.OPCODES["success"]]
647            s.assert_match(opcodes, "bla")
648            s.assert_no_match(opcodes, "")
649
650    def test_at_non_boundary(self):
651        for atname in "at_non_boundary", "at_loc_non_boundary", "at_uni_non_boundary":
652            opcodes = s.encode_literal("bla") \
653                + [s.OPCODES["at"], s.ATCODES[atname], s.OPCODES["success"]]
654            s.assert_match(opcodes, "blan")
655            s.assert_no_match(opcodes, ["bla ja", "bla"])
656
657    def test_at_loc_boundary(self):
658        import locale
659        try:
660            s.void_locale()
661            opcodes1 = s.encode_literal("bla") \
662                + [s.OPCODES["at"], s.ATCODES["at_loc_boundary"], s.OPCODES["success"]]
663            opcodes2 = s.encode_literal("bla") \
664                + [s.OPCODES["at"], s.ATCODES["at_loc_non_boundary"], s.OPCODES["success"]]
665            s.assert_match(opcodes1, "bla\xFC")
666            s.assert_no_match(opcodes2, "bla\xFC")
667            oldlocale = locale.setlocale(locale.LC_ALL)
668            locale.setlocale(locale.LC_ALL, "de_DE")
669            s.assert_no_match(opcodes1, "bla\xFC")
670            s.assert_match(opcodes2, "bla\xFC")
671            locale.setlocale(locale.LC_ALL, oldlocale)
672        except locale.Error:
673            # skip test
674            skip("locale error")
675
676    def test_at_uni_boundary(self):
677        UPPER_PI = u"\u03a0"
678        LOWER_PI = u"\u03c0"
679        opcodes = s.encode_literal("bl") + [s.OPCODES["any"], s.OPCODES["at"],
680            s.ATCODES["at_uni_boundary"], s.OPCODES["success"]]
681        s.assert_match(opcodes, ["bla ha", u"bl%s ja" % UPPER_PI])
682        s.assert_no_match(opcodes, [u"bla%s" % LOWER_PI])
683        opcodes = s.encode_literal("bl") + [s.OPCODES["any"], s.OPCODES["at"],
684            s.ATCODES["at_uni_non_boundary"], s.OPCODES["success"]]
685        s.assert_match(opcodes, ["blaha", u"bl%sja" % UPPER_PI])
686
687    def test_category_loc_word(self):
688        import locale
689        try:
690            s.void_locale()
691            opcodes1 = s.encode_literal("b") \
692                + [s.OPCODES["category"], s.CHCODES["category_loc_word"], s.OPCODES["success"]]
693            opcodes2 = s.encode_literal("b") \
694                + [s.OPCODES["category"], s.CHCODES["category_loc_not_word"], s.OPCODES["success"]]
695            s.assert_no_match(opcodes1, "b\xFC")
696            s.assert_no_match(opcodes1, u"b\u00FC")
697            s.assert_match(opcodes2, "b\xFC")
698            locale.setlocale(locale.LC_ALL, "de_DE")
699            s.assert_match(opcodes1, "b\xFC")
700            s.assert_no_match(opcodes1, u"b\u00FC")
701            s.assert_no_match(opcodes2, "b\xFC")
702            s.void_locale()
703        except locale.Error:
704            # skip test
705            skip("locale error")
706
707    def test_any(self):
708        opcodes = s.encode_literal("b") + [s.OPCODES["any"]] \
709            + s.encode_literal("a") + [s.OPCODES["success"]]
710        s.assert_match(opcodes, ["b a", "bla", "bboas"])
711        s.assert_no_match(opcodes, ["b\na", "oba", "b"])
712
713    def test_any_all(self):
714        opcodes = s.encode_literal("b") + [s.OPCODES["any_all"]] \
715            + s.encode_literal("a") + [s.OPCODES["success"]]
716        s.assert_match(opcodes, ["b a", "bla", "bboas", "b\na"])
717        s.assert_no_match(opcodes, ["oba", "b"])
718
719    def test_in_failure(self):
720        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 2, s.OPCODES["failure"]] \
721            + s.encode_literal("a") + [s.OPCODES["success"]]
722        s.assert_no_match(opcodes, ["ba", "bla"])
723
724    def test_in_literal(self):
725        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 7] \
726            + s.encode_literal("la") + [s.OPCODES["failure"], s.OPCODES["failure"]] \
727            + s.encode_literal("a") + [s.OPCODES["success"]]
728        s.assert_match(opcodes, ["bla", "baa", "blbla"])
729        s.assert_no_match(opcodes, ["ba", "bja", "blla"])
730
731    def test_in_category(self):
732        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 6, s.OPCODES["category"],
733            s.CHCODES["category_digit"], s.OPCODES["category"], s.CHCODES["category_space"],
734            s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
735        s.assert_match(opcodes, ["b1a", "b a", "b4b\tas"])
736        s.assert_no_match(opcodes, ["baa", "b5"])
737
738    def test_in_charset_ucs2(self):
739        import _sre
740        if _sre.CODESIZE != 2:
741            return
742        # charset bitmap for characters "l" and "h"
743        bitmap = 6 * [0] + [4352] + 9 * [0]
744        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 19, s.OPCODES["charset"]] \
745            + bitmap + [s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
746        s.assert_match(opcodes, ["bla", "bha", "blbha"])
747        s.assert_no_match(opcodes, ["baa", "bl"])
748
749    def _test_in_bigcharset_ucs2(self):
750        # disabled because this actually only works on big-endian machines
751        if _sre.CODESIZE != 2:
752            return
753        # constructing bigcharset for lowercase pi (\u03c0)
754        UPPER_PI = u"\u03a0"
755        LOWER_PI = u"\u03c0"
756        bitmap = 6 * [0] + [4352] + 9 * [0]
757        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 164, s.OPCODES["bigcharset"], 2] \
758            + [0, 1] + 126 * [0] \
759            + 16 * [0] \
760            + 12 * [0] + [1] + 3 * [0] \
761            + [s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
762        s.assert_match(opcodes, [u"b%sa" % LOWER_PI])
763        s.assert_no_match(opcodes, [u"b%sa" % UPPER_PI])
764
765    # XXX bigcharset test for ucs4 missing here
766
767    def test_in_range(self):
768        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 5, s.OPCODES["range"],
769            ord("1"), ord("9"), s.OPCODES["failure"]] \
770            + s.encode_literal("a") + [s.OPCODES["success"]]
771        s.assert_match(opcodes, ["b1a", "b56b7aa"])
772        s.assert_no_match(opcodes, ["baa", "b5"])
773
774    def test_in_negate(self):
775        opcodes = s.encode_literal("b") + [s.OPCODES["in"], 7, s.OPCODES["negate"]] \
776            + s.encode_literal("la") + [s.OPCODES["failure"]] \
777            + s.encode_literal("a") + [s.OPCODES["success"]]
778        s.assert_match(opcodes, ["b1a", "bja", "bubua"])
779        s.assert_no_match(opcodes, ["bla", "baa", "blbla"])
780
781    def test_literal_ignore(self):
782        opcodes = s.encode_literal("b") \
783            + [s.OPCODES["literal_ignore"], ord("a"), s.OPCODES["success"]]
784        s.assert_match(opcodes, ["ba", "bA"])
785        s.assert_no_match(opcodes, ["bb", "bu"])
786
787    def test_not_literal_ignore(self):
788        UPPER_PI = u"\u03a0"
789        opcodes = s.encode_literal("b") \
790            + [s.OPCODES["not_literal_ignore"], ord("a"), s.OPCODES["success"]]
791        s.assert_match(opcodes, ["bb", "bu", u"b%s" % UPPER_PI])
792        s.assert_no_match(opcodes, ["ba", "bA"])
793
794    def test_in_ignore(self):
795        opcodes = s.encode_literal("b") + [s.OPCODES["in_ignore"], 8] \
796            + s.encode_literal("abc") + [s.OPCODES["failure"]] \
797            + s.encode_literal("a") + [s.OPCODES["success"]]
798        s.assert_match(opcodes, ["baa", "bAa", "bbbBa"])
799        s.assert_no_match(opcodes, ["ba", "bja", "blla"])
800
801    def test_in_jump_info(self):
802        for opname in "jump", "info":
803            opcodes = s.encode_literal("b") \
804                + [s.OPCODES[opname], 3, s.OPCODES["failure"], s.OPCODES["failure"]] \
805                + s.encode_literal("a") + [s.OPCODES["success"]]
806            s.assert_match(opcodes, "ba")
807
808    def _test_mark(self):
809        # XXX need to rewrite this implementation-independent
810        opcodes = s.encode_literal("a") + [s.OPCODES["mark"], 0] \
811            + s.encode_literal("b") + [s.OPCODES["mark"], 1, s.OPCODES["success"]]
812        state = self.create_state("abc")
813        _sre._sre_search(state, opcodes)
814        assert 1 == state.lastindex
815        assert 1 == state.lastmark
816        # NB: the following are indexes from the start of the match
817        assert [1, 2] == state.marks
818
819    def test_branch(self):
820        opcodes = [s.OPCODES["branch"], 7] + s.encode_literal("ab") \
821            + [s.OPCODES["jump"], 9, 7] + s.encode_literal("cd") \
822            + [s.OPCODES["jump"], 2, s.OPCODES["failure"], s.OPCODES["success"]]
823        s.assert_match(opcodes, ["ab", "cd"])
824        s.assert_no_match(opcodes, ["aacas", "ac", "bla"])
825
826    def test_repeat_one(self):
827        opcodes = [s.OPCODES["repeat_one"], 6, 1, 65535] + s.encode_literal("a") \
828            + [s.OPCODES["success"]] + s.encode_literal("ab") + [s.OPCODES["success"]]
829        s.assert_match(opcodes, ["aab", "aaaab"])
830        s.assert_no_match(opcodes, ["ab", "a"])
831
832    def test_min_repeat_one(self):
833        opcodes = [s.OPCODES["min_repeat_one"], 5, 1, 65535, s.OPCODES["any"]] \
834            + [s.OPCODES["success"]] + s.encode_literal("b") + [s.OPCODES["success"]]
835        s.assert_match(opcodes, ["aab", "ardb", "bb"])
836        s.assert_no_match(opcodes, ["b"])
837
838    def test_repeat_maximizing(self):
839        opcodes = [s.OPCODES["repeat"], 5, 1, 65535] + s.encode_literal("a") \
840            + [s.OPCODES["max_until"]] + s.encode_literal("b") + [s.OPCODES["success"]]
841        s.assert_match(opcodes, ["ab", "aaaab", "baabb"])
842        s.assert_no_match(opcodes, ["aaa", "", "ac"])
843
844    def test_max_until_zero_width_match(self):
845        # re.compile won't compile prospective zero-with matches (all of them?),
846        # so we can only produce an example by directly constructing bytecodes.
847        # CPython 2.3 fails with a recursion limit exceeded error here.
848        import sys
849        if not sys.version_info[:2] == (2, 3):
850            opcodes = [s.OPCODES["repeat"], 10, 1, 65535, s.OPCODES["repeat_one"],
851                6, 0, 65535] + s.encode_literal("a") + [s.OPCODES["success"],
852                s.OPCODES["max_until"], s.OPCODES["success"]]
853            s.assert_match(opcodes, ["ab", "bb"])
854            assert "" == s.search(opcodes, "bb").group(0)
855
856    def test_repeat_minimizing(self):
857        opcodes = [s.OPCODES["repeat"], 4, 1, 65535, s.OPCODES["any"],
858            s.OPCODES["min_until"]] + s.encode_literal("b") + [s.OPCODES["success"]]
859        s.assert_match(opcodes, ["ab", "aaaab", "baabb"])
860        s.assert_no_match(opcodes, ["b"])
861        assert "aab" == s.search(opcodes, "aabb").group(0)
862
863    def test_groupref(self):
864        opcodes = [s.OPCODES["mark"], 0, s.OPCODES["any"], s.OPCODES["mark"], 1] \
865            + s.encode_literal("a") + [s.OPCODES["groupref"], 0, s.OPCODES["success"]]
866        s.assert_match(opcodes, ["bab", "aaa", "dad"])
867        s.assert_no_match(opcodes, ["ba", "bad", "baad"])
868
869    def test_groupref_ignore(self):
870        opcodes = [s.OPCODES["mark"], 0, s.OPCODES["any"], s.OPCODES["mark"], 1] \
871            + s.encode_literal("a") + [s.OPCODES["groupref_ignore"], 0, s.OPCODES["success"]]
872        s.assert_match(opcodes, ["bab", "baB", "Dad"])
873        s.assert_no_match(opcodes, ["ba", "bad", "baad"])
874
875    def test_assert(self):
876        opcodes = s.encode_literal("a") + [s.OPCODES["assert"], 4, 0] \
877            + s.encode_literal("b") + [s.OPCODES["success"], s.OPCODES["success"]]
878        assert "a" == s.search(opcodes, "ab").group(0)
879        s.assert_no_match(opcodes, ["a", "aa"])
880
881    def test_assert_not(self):
882        opcodes = s.encode_literal("a") + [s.OPCODES["assert_not"], 4, 0] \
883            + s.encode_literal("b") + [s.OPCODES["success"], s.OPCODES["success"]]
884        assert "a" == s.search(opcodes, "ac").group(0)
885        s.assert_match(opcodes, ["a"])
886        s.assert_no_match(opcodes, ["ab"])
887
888    def test_bug(self):
889        import re
890        assert re.sub('=\w{2}', 'x', '=CA') == 'x'
891
892class AppTestOptimizations:
893    """These tests try to trigger optmized edge cases."""
894
895    def test_match_length_optimization(self):
896        import re
897        assert None == re.match("bla", "blub")
898
899    def test_fast_search(self):
900        import re
901        assert None == re.search("bl", "abaub")
902        assert None == re.search("bl", "b")
903        assert ["bl", "bl"] == re.findall("bl", "blbl")
904        assert ["a", "u"] == re.findall("bl(.)", "blablu")
905
906    def test_branch_literal_shortcut(self):
907        import re
908        assert None == re.search("bl|a|c", "hello")
909
910    def test_literal_search(self):
911        import re
912        assert re.search("b(\d)", "ababbbab1")
913        assert None == re.search("b(\d)", "ababbbab")
914
915    def test_repeat_one_literal_tail(self):
916        import re
917        assert re.search(".+ab", "wowowowawoabwowo")
918        assert None == re.search(".+ab", "wowowaowowo")