test_re.py - This is a comprehensive test suite for the `re…

/Lib/test/test_re.py

https://bitbucket.org/mirror/cpython/ · Python · 1869 lines · 1584 code · 178 blank · 107 comment · 117 complexity · a90bba6abd28161c18061a8cb0794575 MD5 · raw file
Large files are truncated click here to view the full file

from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
        cpython_only, captured_stdout
import io
import locale
import re
from re import Scanner
import sre_compile
import sre_constants
import sys
import string
import traceback
import unittest
from weakref import proxy

# Misc tests from Tim Peters' re.doc

# WARNING: Don't change details in these tests if you don't know
# what you're doing. Some of these tests were carefully modeled to
# cover most of the code.

class S(str):
    def __getitem__(self, index):
        return S(super().__getitem__(index))

class B(bytes):
    def __getitem__(self, index):
        return B(super().__getitem__(index))

class ReTests(unittest.TestCase):

    def assertTypedEqual(self, actual, expect, msg=None):
        self.assertEqual(actual, expect, msg)
        def recurse(actual, expect):
            if isinstance(expect, (tuple, list)):
                for x, y in zip(actual, expect):
                    recurse(x, y)
            else:
                self.assertIs(type(actual), type(expect), msg)
        recurse(actual, expect)

    def checkPatternError(self, pattern, errmsg, pos=None):
        with self.assertRaises(re.error) as cm:
            re.compile(pattern)
        with self.subTest(pattern=pattern):
            err = cm.exception
            self.assertEqual(err.msg, errmsg)
            if pos is not None:
                self.assertEqual(err.pos, pos)

    def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
        with self.assertRaises(re.error) as cm:
            re.sub(pattern, repl, string)
        with self.subTest(pattern=pattern, repl=repl):
            err = cm.exception
            self.assertEqual(err.msg, errmsg)
            if pos is not None:
                self.assertEqual(err.pos, pos)

    def test_keep_buffer(self):
        # See bug 14212
        b = bytearray(b'x')
        it = re.finditer(b'a', b)
        with self.assertRaises(BufferError):
            b.extend(b'x'*400)
        list(it)
        del it
        gc_collect()
        b.extend(b'x'*400)

    def test_weakref(self):
        s = 'QabbbcR'
        x = re.compile('ab+c')
        y = proxy(x)
        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))

    def test_search_star_plus(self):
        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
        self.assertIsNone(re.search('x', 'aaa'))
        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
        self.assertIsNone(re.match('a+', 'xxx'))

    def bump_num(self, matchobj):
        int_value = int(matchobj.group(0))
        return str(int_value + 1)

    def test_basic_re_sub(self):
        self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
        self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
        self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
        self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
        self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
        self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
        for y in ("\xe0", "\u0430", "\U0001d49c"):
            self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')

        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
                         '9.3 -3 24x100y')
        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
                         '9.3 -3 23x99y')

        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')

        s = r"\1\1"
        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)

        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')

        self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
        self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
        self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
        for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
            with self.subTest(c):
                with self.assertRaises(re.error):
                    self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)

        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')

    def test_bug_449964(self):
        # fails for group followed by other escape
        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
                         'xx\bxx\b')

    def test_bug_449000(self):
        # Test for sub() on escaped characters
        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
                         'abc\ndef\n')
        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
                         'abc\ndef\n')
        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
                         'abc\ndef\n')
        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                         'abc\ndef\n')

    def test_bug_1661(self):
        # Verify that flags do not get silently ignored with compiled patterns
        pattern = re.compile('.')
        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
        self.assertRaises(ValueError, re.compile, pattern, re.I)

    def test_bug_3629(self):
        # A regex that triggered a bug in the sre-code validator
        re.compile("(?P<quote>)(?(quote))")

    def test_sub_template_numeric_escape(self):
        # bug 776311 and friends
        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
        self.assertEqual(re.sub('x', r'\377', 'x'), '\377')

        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')

        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')

        self.checkTemplateError('x', r'\400', 'x',
                                r'octal escape value \400 outside of '
                                r'range 0-0o377', 0)
        self.checkTemplateError('x', r'\777', 'x',
                                r'octal escape value \777 outside of '
                                r'range 0-0o377', 0)

        self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
        self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
        self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
        self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'

        # in python2.3 (etc), these loop endlessly in sre_parser.py
        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
                         'xz8')
        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
                         'xza')

    def test_qualified_re_sub(self):
        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
        self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')

    def test_bug_114660(self):
        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
                         'hello there')

    def test_bug_462270(self):
        # Test for empty sub() behaviour, see SF bug #462270
        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')

    def test_symbolic_groups(self):
        re.compile('(?P<a>x)(?P=a)(?(a)y)')
        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
        re.compile('(?P<a1>x)\1(?(1)y)')
        self.checkPatternError('(?P<a>)(?P<a>)',
                               "redefinition of group name 'a' as group 2; "
                               "was group 1")
        self.checkPatternError('(?P<a>(?P=a))',
                               "cannot refer to an open group", 10)
        self.checkPatternError('(?Pxy)', 'unknown extension ?Px')
        self.checkPatternError('(?P<a>)(?P=a', 'missing ), unterminated name', 11)
        self.checkPatternError('(?P=', 'missing group name', 4)
        self.checkPatternError('(?P=)', 'missing group name', 4)
        self.checkPatternError('(?P=1)', "bad character in group name '1'", 4)
        self.checkPatternError('(?P=a)', "unknown group name 'a'")
        self.checkPatternError('(?P=a1)', "unknown group name 'a1'")
        self.checkPatternError('(?P=a.)', "bad character in group name 'a.'", 4)
        self.checkPatternError('(?P<)', 'missing >, unterminated name', 4)
        self.checkPatternError('(?P<a', 'missing >, unterminated name', 4)
        self.checkPatternError('(?P<', 'missing group name', 4)
        self.checkPatternError('(?P<>)', 'missing group name', 4)
        self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
        self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
        self.checkPatternError(r'(?(', 'missing group name', 3)
        self.checkPatternError(r'(?())', 'missing group name', 3)
        self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)
        self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
        self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
        self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
        # New valid/invalid identifiers in Python 3
        re.compile('(?P<Âľ>x)(?P=Âľ)(?(Âľ)y)')
        re.compile('(?P<đđŤđŚđ đŹđĄđ˘>x)(?P=đđŤđŚđ đŹđĄđ˘)(?(đđŤđŚđ đŹđĄđ˘)y)')
        self.checkPatternError('(?P<ÂŠ>x)', "bad character in group name 'ÂŠ'", 4)
        # Support > 100 groups.
        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
        pat = '(?:%s)(?(200)z|t)' % pat
        self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

    def test_symbolic_refs(self):
        self.checkTemplateError('(?P<a>x)', '\g<a', 'xx',
                                'missing >, unterminated name', 3)
        self.checkTemplateError('(?P<a>x)', '\g<', 'xx',
                                'missing group name', 3)
        self.checkTemplateError('(?P<a>x)', '\g', 'xx', 'missing <', 2)
        self.checkTemplateError('(?P<a>x)', '\g<a a>', 'xx',
                                "bad character in group name 'a a'", 3)
        self.checkTemplateError('(?P<a>x)', '\g<>', 'xx',
                                'missing group name', 3)
        self.checkTemplateError('(?P<a>x)', '\g<1a1>', 'xx',
                                "bad character in group name '1a1'", 3)
        self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
                                'invalid group reference')
        self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
                                'invalid group reference')
        with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
            re.sub('(?P<a>x)', '\g<ab>', 'xx')
        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
        self.checkTemplateError('(?P<a>x)', '\g<-1>', 'xx',
                                "bad character in group name '-1'", 3)
        # New valid/invalid identifiers in Python 3
        self.assertEqual(re.sub('(?P<Âľ>x)', r'\g<Âľ>', 'xx'), 'xx')
        self.assertEqual(re.sub('(?P<đđŤđŚđ đŹđĄđ˘>x)', r'\g<đđŤđŚđ đŹđĄđ˘>', 'xx'), 'xx')
        self.checkTemplateError('(?P<a>x)', '\g<ÂŠ>', 'xx',
                                "bad character in group name 'ÂŠ'", 3)
        # Support > 100 groups.
        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
        self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')

    def test_re_subn(self):
        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
        self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))

    def test_re_split(self):
        for string in ":a:b::c", S(":a:b::c"):
            self.assertTypedEqual(re.split(":", string),
                                  ['', 'a', 'b', '', 'c'])
            self.assertTypedEqual(re.split(":+", string),
                                  ['', 'a', 'b', 'c'])
            self.assertTypedEqual(re.split("(:+)", string),
                                  ['', ':', 'a', ':', 'b', '::', 'c'])
        for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
                       memoryview(b":a:b::c")):
            self.assertTypedEqual(re.split(b":", string),
                                  [b'', b'a', b'b', b'', b'c'])
            self.assertTypedEqual(re.split(b":+", string),
                                  [b'', b'a', b'b', b'c'])
            self.assertTypedEqual(re.split(b"(:+)", string),
                                  [b'', b':', b'a', b':', b'b', b'::', b'c'])
        for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
                        "\U0001d49c\U0001d49e\U0001d4b5"):
            string = ":%s:%s::%s" % (a, b, c)
            self.assertEqual(re.split(":", string), ['', a, b, '', c])
            self.assertEqual(re.split(":+", string), ['', a, b, c])
            self.assertEqual(re.split("(:+)", string),
                             ['', ':', a, ':', b, '::', c])

        self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
        self.assertEqual(re.split("(:)+", ":a:b::c"),
                         ['', ':', 'a', ':', 'b', ':', 'c'])
        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
                         ['', ':', 'a', ':b::', 'c'])
        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
                          None, '::', 'c'])
        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
                         ['', 'a', '', '', 'c'])

        for sep, expected in [
            (':*', ['', 'a', 'b', 'c']),
            ('(?::*)', ['', 'a', 'b', 'c']),
            ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
            ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
        ]:
            with self.subTest(sep=sep), self.assertWarns(FutureWarning):
                self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

        for sep, expected in [
            ('', [':a:b::c']),
            (r'\b', [':a:b::c']),
            (r'(?=:)', [':a:b::c']),
            (r'(?<=:)', [':a:b::c']),
        ]:
            with self.subTest(sep=sep), self.assertRaises(ValueError):
                self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

    def test_qualified_re_split(self):
        self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
        self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
        self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
                         ['', ':', 'a', ':', 'b::c'])
        self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
                         ['', ':', 'a', ':', 'b::c'])
        with self.assertWarns(FutureWarning):
            self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
                             ['', ':', 'a', ':', 'b::c'])

    def test_re_findall(self):
        self.assertEqual(re.findall(":+", "abc"), [])
        for string in "a:b::c:::d", S("a:b::c:::d"):
            self.assertTypedEqual(re.findall(":+", string),
                                  [":", "::", ":::"])
            self.assertTypedEqual(re.findall("(:+)", string),
                                  [":", "::", ":::"])
            self.assertTypedEqual(re.findall("(:)(:*)", string),
                                  [(":", ""), (":", ":"), (":", "::")])
        for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
                       memoryview(b"a:b::c:::d")):
            self.assertTypedEqual(re.findall(b":+", string),
                                  [b":", b"::", b":::"])
            self.assertTypedEqual(re.findall(b"(:+)", string),
                                  [b":", b"::", b":::"])
            self.assertTypedEqual(re.findall(b"(:)(:*)", string),
                                  [(b":", b""), (b":", b":"), (b":", b"::")])
        for x in ("\xe0", "\u0430", "\U0001d49c"):
            xx = x * 2
            xxx = x * 3
            string = "a%sb%sc%sd" % (x, xx, xxx)
            self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
            self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
            self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
                             [(x, ""), (x, x), (x, xx)])

    def test_bug_117612(self):
        self.assertEqual(re.findall(r"(a|(b))", "aba"),
                         [("a", ""),("b", "b"),("a", "")])

    def test_re_match(self):
        for string in 'a', S('a'):
            self.assertEqual(re.match('a', string).groups(), ())
            self.assertEqual(re.match('(a)', string).groups(), ('a',))
            self.assertEqual(re.match('(a)', string).group(0), 'a')
            self.assertEqual(re.match('(a)', string).group(1), 'a')
            self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
        for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
            self.assertEqual(re.match(b'a', string).groups(), ())
            self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
            self.assertEqual(re.match(b'(a)', string).group(0), b'a')
            self.assertEqual(re.match(b'(a)', string).group(1), b'a')
            self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
        for a in ("\xe0", "\u0430", "\U0001d49c"):
            self.assertEqual(re.match(a, a).groups(), ())
            self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
            self.assertEqual(re.match('(%s)' % a, a).group(0), a)
            self.assertEqual(re.match('(%s)' % a, a).group(1), a)
            self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))

        pat = re.compile('((a)|(b))(c)?')
        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))

        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
                         (None, 'b', None))
        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))

    def test_group(self):
        class Index:
            def __init__(self, value):
                self.value = value
            def __index__(self):
                return self.value
        # A single group
        m = re.match('(a)(b)', 'ab')
        self.assertEqual(m.group(), 'ab')
        self.assertEqual(m.group(0), 'ab')
        self.assertEqual(m.group(1), 'a')
        self.assertEqual(m.group(Index(1)), 'a')
        self.assertRaises(IndexError, m.group, -1)
        self.assertRaises(IndexError, m.group, 3)
        self.assertRaises(IndexError, m.group, 1<<1000)
        self.assertRaises(IndexError, m.group, Index(1<<1000))
        self.assertRaises(IndexError, m.group, 'x')
        # Multiple groups
        self.assertEqual(m.group(2, 1), ('b', 'a'))
        self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))

    def test_re_fullmatch(self):
        # Issue 16203: Proposal: add re.fullmatch() method.
        self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
        for string in "ab", S("ab"):
            self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
        for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
            self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
        for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
            r = r"%s|%s" % (a, a + b)
            self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
        self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
        self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
        self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
        self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
        self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
        self.assertIsNone(re.fullmatch(r"a+", "ab"))
        self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
        self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
        self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
        self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
        self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
        self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))

        self.assertEqual(
            re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
        self.assertEqual(
            re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
        self.assertEqual(
            re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))

    def test_re_groupref_exists(self):
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
                         ('(', 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
                         (None, 'a'))
        self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
        self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
                         ('a', 'b'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(),
                         ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(),
                         ('a', None, 'd'))
        self.assertIsNone(p.match('abd'))
        self.assertIsNone(p.match('ac'))

        # Support > 100 groups.
        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
        pat = '(?:%s)(?(200)z)' % pat
        self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

        self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
        self.checkPatternError(r'()(?(1)a|b',
                               'missing ), unterminated subpattern', 2)
        self.checkPatternError(r'()(?(1)a|b|c)',
                               'conditional backref with more than '
                               'two branches', 10)

    def test_re_groupref_overflow(self):
        self.checkTemplateError('()', '\g<%s>' % sre_constants.MAXGROUPS, 'xx',
                                'invalid group reference', 3)
        self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
                               'invalid group reference', 10)

    def test_re_groupref(self):
        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
                         ('|', 'a'))
        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
                         (None, 'a'))
        self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
        self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
                         ('a', 'a'))
        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
                         (None, None))

        self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)

    def test_groupdict(self):
        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
                                  'first second').groupdict(),
                         {'first':'first', 'second':'second'})

    def test_expand(self):
        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
                                  "first second")
                                  .expand(r"\2 \1 \g<second> \g<first>"),
                         "second first second first")
        self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
                                  "first")
                                  .expand(r"\2 \g<second>"),
                         " ")

    def test_repeat_minmax(self):
        self.assertIsNone(re.match("^(\w){1}$", "abc"))
        self.assertIsNone(re.match("^(\w){1}?$", "abc"))
        self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
        self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))

        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")

        self.assertIsNone(re.match("^x{1}$", "xxx"))
        self.assertIsNone(re.match("^x{1}?$", "xxx"))
        self.assertIsNone(re.match("^x{1,2}$", "xxx"))
        self.assertIsNone(re.match("^x{1,2}?$", "xxx"))

        self.assertTrue(re.match("^x{3}$", "xxx"))
        self.assertTrue(re.match("^x{1,3}$", "xxx"))
        self.assertTrue(re.match("^x{3,3}$", "xxx"))
        self.assertTrue(re.match("^x{1,4}$", "xxx"))
        self.assertTrue(re.match("^x{3,4}?$", "xxx"))
        self.assertTrue(re.match("^x{3}?$", "xxx"))
        self.assertTrue(re.match("^x{1,3}?$", "xxx"))
        self.assertTrue(re.match("^x{1,4}?$", "xxx"))
        self.assertTrue(re.match("^x{3,4}?$", "xxx"))

        self.assertIsNone(re.match("^x{}$", "xxx"))
        self.assertTrue(re.match("^x{}$", "x{}"))

        self.checkPatternError(r'x{2,1}',
                               'min repeat greater than max repeat', 2)

    def test_getattr(self):
        self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
        self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
        self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
        self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
        self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
                         {'first': 1, 'other': 2})

        self.assertEqual(re.match("(a)", "a").pos, 0)
        self.assertEqual(re.match("(a)", "a").endpos, 1)
        self.assertEqual(re.match("(a)", "a").string, "a")
        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
        self.assertTrue(re.match("(a)", "a").re)

        # Issue 14260. groupindex should be non-modifiable mapping.
        p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
        self.assertEqual(sorted(p.groupindex), ['first', 'other'])
        self.assertEqual(p.groupindex['other'], 2)
        with self.assertRaises(TypeError):
            p.groupindex['other'] = 0
        self.assertEqual(p.groupindex['other'], 2)

    def test_special_escapes(self):
        self.assertEqual(re.search(r"\b(b.)\b",
                                   "abcd abc bcd bx").group(1), "bx")
        self.assertEqual(re.search(r"\B(b.)\B",
                                   "abc bcd bc abxd").group(1), "bx")
        self.assertEqual(re.search(r"\b(b.)\b",
                                   "abcd abc bcd bx", re.ASCII).group(1), "bx")
        self.assertEqual(re.search(r"\B(b.)\B",
                                   "abc bcd bc abxd", re.ASCII).group(1), "bx")
        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
        self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
        self.assertEqual(re.search(br"\b(b.)\b",
                                   b"abcd abc bcd bx").group(1), b"bx")
        self.assertEqual(re.search(br"\B(b.)\B",
                                   b"abc bcd bc abxd").group(1), b"bx")
        self.assertEqual(re.search(br"\b(b.)\b",
                                   b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
        self.assertEqual(re.search(br"\B(b.)\B",
                                   b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
        self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
        self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
        self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
        self.assertEqual(re.search(r"\d\D\w\W\s\S",
                                   "1aa! a").group(0), "1aa! a")
        self.assertEqual(re.search(br"\d\D\w\W\s\S",
                                   b"1aa! a").group(0), b"1aa! a")
        self.assertEqual(re.search(r"\d\D\w\W\s\S",
                                   "1aa! a", re.ASCII).group(0), "1aa! a")
        self.assertEqual(re.search(br"\d\D\w\W\s\S",
                                   b"1aa! a", re.LOCALE).group(0), b"1aa! a")

    def test_other_escapes(self):
        self.checkPatternError("\\", 'bad escape (end of pattern)', 0)
        self.assertEqual(re.match(r"\(", '(').group(), '(')
        self.assertIsNone(re.match(r"\(", ')'))
        self.assertEqual(re.match(r"\\", '\\').group(), '\\')
        self.assertEqual(re.match(r"[\]]", ']').group(), ']')
        self.assertIsNone(re.match(r"[\]]", '['))
        self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
        self.assertIsNone(re.match(r"[a\-c]", 'b'))
        self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
        self.assertIsNone(re.match(r"[\^a]+", 'b'))
        re.purge()  # for warnings
        for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
            with self.subTest(c):
                self.assertRaises(re.error, re.compile, '\\%c' % c)
        for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
            with self.subTest(c):
                self.assertRaises(re.error, re.compile, '[\\%c]' % c)

    def test_string_boundaries(self):
        # See http://bugs.python.org/issue10713
        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
                         "abc")
        # There's a word boundary at the start of a string.
        self.assertTrue(re.match(r"\b", "abc"))
        # A non-empty string includes a non-boundary zero-length match.
        self.assertTrue(re.search(r"\B", "abc"))
        # There is no non-boundary match at the start of a string.
        self.assertFalse(re.match(r"\B", "abc"))
        # However, an empty string contains no word boundaries, and also no
        # non-boundaries.
        self.assertIsNone(re.search(r"\B", ""))
        # This one is questionable and different from the perlre behaviour,
        # but describes current behavior.
        self.assertIsNone(re.search(r"\b", ""))
        # A single word-character string has two boundaries, but no
        # non-boundary gaps.
        self.assertEqual(len(re.findall(r"\b", "a")), 2)
        self.assertEqual(len(re.findall(r"\B", "a")), 0)
        # If there are no words, there are no boundaries
        self.assertEqual(len(re.findall(r"\b", " ")), 0)
        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
        # Can match around the whitespace.
        self.assertEqual(len(re.findall(r"\B", " ")), 2)

    def test_bigcharset(self):
        self.assertEqual(re.match("([\u2222\u2223])",
                                  "\u2222").group(1), "\u2222")
        r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
        self.assertEqual(re.match(r, "\uff01").group(), "\uff01")

    def test_big_codesize(self):
        # Issue #1160
        r = re.compile('|'.join(('%d'%x for x in range(10000))))
        self.assertTrue(r.match('1000'))
        self.assertTrue(r.match('9999'))

    def test_anyall(self):
        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
                         "a\nb")
        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
                         "a\n\nb")

    def test_lookahead(self):
        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")

        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")

        # Group reference.
        self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
        self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
        # Conditional group reference.
        self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
        self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
        self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
        self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
        self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
        # Group used before defined.
        self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
        self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
        self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))

    def test_lookbehind(self):
        self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
        self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
        self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
        self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
        # Group reference.
        self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
        self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
        self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
        self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
        # Conditional group reference.
        self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
        self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
        self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
        self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
        self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
        # Group used before defined.
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
        self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
        self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
        # Group defined in the same lookbehind pattern
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')

    def test_ignore_case(self):
        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")

        assert '\u212a'.lower() == 'k' # 'âŞ'
        self.assertTrue(re.match(r'K', '\u212a', re.I))
        self.assertTrue(re.match(r'k', '\u212a', re.I))
        self.assertTrue(re.match(r'\u212a', 'K', re.I))
        self.assertTrue(re.match(r'\u212a', 'k', re.I))
        assert '\u017f'.upper() == 'S' # 'Ĺż'
        self.assertTrue(re.match(r'S', '\u017f', re.I))
        self.assertTrue(re.match(r's', '\u017f', re.I))
        self.assertTrue(re.match(r'\u017f', 'S', re.I))
        self.assertTrue(re.match(r'\u017f', 's', re.I))
        assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ďŹ', 'ďŹ'
        self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
        self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))

    def test_ignore_case_set(self):
        self.assertTrue(re.match(r'[19A]', 'A', re.I))
        self.assertTrue(re.match(r'[19a]', 'a', re.I))
        self.assertTrue(re.match(r'[19a]', 'A', re.I))
        self.assertTrue(re.match(r'[19A]', 'a', re.I))
        self.assertTrue(re.match(br'[19A]', b'A', re.I))
        self.assertTrue(re.match(br'[19a]', b'a', re.I))
        self.assertTrue(re.match(br'[19a]', b'A', re.I))
        self.assertTrue(re.match(br'[19A]', b'a', re.I))
        assert '\u212a'.lower() == 'k' # 'âŞ'
        self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
        self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
        self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
        self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
        assert '\u017f'.upper() == 'S' # 'Ĺż'
        self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
        self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
        self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
        self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
        assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ďŹ', 'ďŹ'
        self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
        self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))

    def test_ignore_case_range(self):
        # Issues #3511, #17381.
        self.assertTrue(re.match(r'[9-a]', '_', re.I))
        self.assertIsNone(re.match(r'[9-A]', '_', re.I))
        self.assertTrue(re.match(br'[9-a]', b'_', re.I))
        self.assertIsNone(re.match(br'[9-A]', b'_', re.I))
        self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
        self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
        self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))
        self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
        self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))
        self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))
        self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))
        self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))
        self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))
        self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))
        self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
        self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))

        assert '\u212a'.lower() == 'k' # 'âŞ'
        self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
        self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
        self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
        self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
        assert '\u017f'.upper() == 'S' # 'Ĺż'
        self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
        self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
        self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
        self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
        assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ďŹ', 'ďŹ'
        self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
        self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))

    def test_category(self):
        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")

    def test_getlower(self):
        import _sre
        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
        self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))

        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
        self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
        self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")

    def test_not_literal(self):
        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")

    def test_search_coverage(self):
        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
        self.assertEqual(re.search("a\s", "a ").group(0), "a ")

    def assertMatch(self, pattern, text, match=None, span=None,
                    matcher=re.match):
        if match is None and span is None:
            # the pattern matches the whole text
            match = text
            span = (0, len(text))
        elif match is None or span is None:
            raise ValueError('If match is not None, span should be specified '
                             '(and vice versa).')
        m = matcher(pattern, text)
        self.assertTrue(m)
        self.assertEqual(m.group(), match)
        self.assertEqual(m.span(), span)

    def test_re_escape(self):
        alnum_chars = string.ascii_letters + string.digits + '_'
        p = ''.join(chr(i) for i in range(256))
        for c in p:
            if c in alnum_chars:
                self.assertEqual(re.escape(c), c)
            elif c == '\x00':
                self.assertEqual(re.escape(c), '\\000')
            else:
                self.assertEqual(re.escape(c), '\\' + c)
            self.assertMatch(re.escape(c), c)
        self.assertMatch(re.escape(p), p)

    def test_re_escape_byte(self):
        alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
        p = bytes(range(256))
        for i in p:
            b = bytes([i])
            if b in alnum_chars:
                self.assertEqual(re.escape(b), b)
            elif i == 0:
                self.assertEqual(re.escape(b), b'\\000')
            else:
                self.assertEqual(re.escape(b), b'\\' + b)
            self.assertMatch(re.escape(b), b)
        self.assertMatch(re.escape(p), p)

    def test_re_escape_non_ascii(self):
        s = 'xxx\u2620\u2620\u2620xxx'
        s_escaped = re.escape(s)
        self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
        self.assertMatch(s_escaped, s)
        self.assertMatch('.%s+.' % re.escape('\u2620'), s,
                         'x\u2620\u2620\u2620x', (2, 7), re.search)

    def test_re_escape_non_ascii_bytes(self):
        b = 'y\u2620y\u2620y'.encode('utf-8')
        b_escaped = re.escape(b)
        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
        self.assertMatch(b_escaped, b)
        res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
        self.assertEqual(len(res), 2)

    def test_pickling(self):
        import pickle
        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
            pickled = pickle.dumps(oldpat, proto)
            newpat = pickle.loads(pickled)
            self.assertEqual(newpat, oldpat)
        # current pickle expects the _compile() reconstructor in re module
        from re import _compile

    def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE)

    def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
            self.assertTrue(re.compile('^pattern$', flag))
        for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
            self.assertTrue(re.compile(b'^pattern$', flag))

    def test_sre_character_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
            if i < 256:
                self.assertTrue(re.match(r"\%03o" % i, chr(i)))
                self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
                self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
                self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
                self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
                self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
            if i < 0x10000:
                self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
                self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
                self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
            self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
            self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
            self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
        self.assertTrue(re.match(r"\0", "\000"))
        self.assertTrue(re.match(r"\08", "\0008"))
        self.assertTrue(re.match(r"\01", "\001"))
        self.assertTrue(re.match(r"\018", "\0018"))
        self.checkPatternError(r"\567",
                               r'octal escape value \567 outside of '
                               r'range 0-0o377', 0)
        self.checkPatternError(r"\911", 'invalid group reference', 0)
        self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
        self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
        self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
        self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)
        self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)
        self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)
        self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)

    def test_sre_character_class_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
            if i < 256:
                self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
                self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
                self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
                self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)…
Summary ✨

This is a comprehensive test suite for the re (regular expression) module in Python, covering various aspects such as syntax errors, search functionality, grouping, and locale sensitivity. It includes benchmarks to measure performance and tests with different input patterns and strings to ensure correct behavior under various conditions. The code uses the unittest framework to run and report on the test results.
Tech Fingerprint

Alerts (73)

'def' Ensure functions have docstrings for documentation
31 33 41 50 59 70 76 88 92 132 137 148 156 160 207 211 215 220 258 289 296 349 360 386 390 423 444 474 511 517 531 536 546 582 604 636 655 681 687 693 699 727 753 779 802 835 838 850 854 858 872 885 899 907 915 925 932 938 970
'isinstance(' Overuse may indicate design issues; consider polymorphism
34
'type(' Use isinstance() for type checking instead of type()
38
'list(' Avoid unnecessary list conversions; use generators where possible
65
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
66
'lambda' Avoid complex 'lambda' functions; prefer named functions for clarity and debugging
108 114
Complexity hotspot; lines 125 to 127 (total complexity: 3)
125 126 127
Complexity hotspot; line 860 (total complexity: 3)
860
Complexity hotspot; line 864 (total complexity: 3)
864
Complexity hotspot; lines 874 to 876 (total complexity: 3)
874 875 876