PageRenderTime 56ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/Lib/test/test_re.py

https://bitbucket.org/mirror/cpython/
Python | 1869 lines | 1817 code | 34 blank | 18 comment | 18 complexity | a90bba6abd28161c18061a8cb0794575 MD5 | raw file
Possible License(s): Unlicense, 0BSD, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
  2. cpython_only, captured_stdout
  3. import io
  4. import locale
  5. import re
  6. from re import Scanner
  7. import sre_compile
  8. import sre_constants
  9. import sys
  10. import string
  11. import traceback
  12. import unittest
  13. from weakref import proxy
  14. # Misc tests from Tim Peters' re.doc
  15. # WARNING: Don't change details in these tests if you don't know
  16. # what you're doing. Some of these tests were carefully modeled to
  17. # cover most of the code.
  18. class S(str):
  19. def __getitem__(self, index):
  20. return S(super().__getitem__(index))
  21. class B(bytes):
  22. def __getitem__(self, index):
  23. return B(super().__getitem__(index))
  24. class ReTests(unittest.TestCase):
  25. def assertTypedEqual(self, actual, expect, msg=None):
  26. self.assertEqual(actual, expect, msg)
  27. def recurse(actual, expect):
  28. if isinstance(expect, (tuple, list)):
  29. for x, y in zip(actual, expect):
  30. recurse(x, y)
  31. else:
  32. self.assertIs(type(actual), type(expect), msg)
  33. recurse(actual, expect)
  34. def checkPatternError(self, pattern, errmsg, pos=None):
  35. with self.assertRaises(re.error) as cm:
  36. re.compile(pattern)
  37. with self.subTest(pattern=pattern):
  38. err = cm.exception
  39. self.assertEqual(err.msg, errmsg)
  40. if pos is not None:
  41. self.assertEqual(err.pos, pos)
  42. def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
  43. with self.assertRaises(re.error) as cm:
  44. re.sub(pattern, repl, string)
  45. with self.subTest(pattern=pattern, repl=repl):
  46. err = cm.exception
  47. self.assertEqual(err.msg, errmsg)
  48. if pos is not None:
  49. self.assertEqual(err.pos, pos)
  50. def test_keep_buffer(self):
  51. # See bug 14212
  52. b = bytearray(b'x')
  53. it = re.finditer(b'a', b)
  54. with self.assertRaises(BufferError):
  55. b.extend(b'x'*400)
  56. list(it)
  57. del it
  58. gc_collect()
  59. b.extend(b'x'*400)
  60. def test_weakref(self):
  61. s = 'QabbbcR'
  62. x = re.compile('ab+c')
  63. y = proxy(x)
  64. self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
  65. def test_search_star_plus(self):
  66. self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
  67. self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
  68. self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
  69. self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
  70. self.assertIsNone(re.search('x', 'aaa'))
  71. self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
  72. self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
  73. self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
  74. self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
  75. self.assertIsNone(re.match('a+', 'xxx'))
  76. def bump_num(self, matchobj):
  77. int_value = int(matchobj.group(0))
  78. return str(int_value + 1)
  79. def test_basic_re_sub(self):
  80. self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
  81. self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
  82. self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
  83. self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
  84. self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
  85. self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
  86. for y in ("\xe0", "\u0430", "\U0001d49c"):
  87. self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
  88. self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
  89. self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
  90. '9.3 -3 24x100y')
  91. self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
  92. '9.3 -3 23x99y')
  93. self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
  94. self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
  95. s = r"\1\1"
  96. self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
  97. self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
  98. self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
  99. self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
  100. self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
  101. self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
  102. self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
  103. self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
  104. self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
  105. self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
  106. (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
  107. for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
  108. with self.subTest(c):
  109. with self.assertRaises(re.error):
  110. self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
  111. self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
  112. def test_bug_449964(self):
  113. # fails for group followed by other escape
  114. self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
  115. 'xx\bxx\b')
  116. def test_bug_449000(self):
  117. # Test for sub() on escaped characters
  118. self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
  119. 'abc\ndef\n')
  120. self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
  121. 'abc\ndef\n')
  122. self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
  123. 'abc\ndef\n')
  124. self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
  125. 'abc\ndef\n')
  126. def test_bug_1661(self):
  127. # Verify that flags do not get silently ignored with compiled patterns
  128. pattern = re.compile('.')
  129. self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
  130. self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
  131. self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
  132. self.assertRaises(ValueError, re.compile, pattern, re.I)
  133. def test_bug_3629(self):
  134. # A regex that triggered a bug in the sre-code validator
  135. re.compile("(?P<quote>)(?(quote))")
  136. def test_sub_template_numeric_escape(self):
  137. # bug 776311 and friends
  138. self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
  139. self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
  140. self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
  141. self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
  142. self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
  143. self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
  144. self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
  145. self.assertEqual(re.sub('x', r'\377', 'x'), '\377')
  146. self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
  147. self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
  148. self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
  149. self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
  150. self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
  151. self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
  152. self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
  153. self.checkTemplateError('x', r'\400', 'x',
  154. r'octal escape value \400 outside of '
  155. r'range 0-0o377', 0)
  156. self.checkTemplateError('x', r'\777', 'x',
  157. r'octal escape value \777 outside of '
  158. r'range 0-0o377', 0)
  159. self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
  160. self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
  161. self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
  162. self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
  163. self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
  164. self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
  165. self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
  166. self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
  167. self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
  168. self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
  169. self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
  170. self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
  171. # in python2.3 (etc), these loop endlessly in sre_parser.py
  172. self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
  173. self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
  174. 'xz8')
  175. self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
  176. 'xza')
  177. def test_qualified_re_sub(self):
  178. self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
  179. self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
  180. def test_bug_114660(self):
  181. self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
  182. 'hello there')
  183. def test_bug_462270(self):
  184. # Test for empty sub() behaviour, see SF bug #462270
  185. self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
  186. self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
  187. def test_symbolic_groups(self):
  188. re.compile('(?P<a>x)(?P=a)(?(a)y)')
  189. re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
  190. re.compile('(?P<a1>x)\1(?(1)y)')
  191. self.checkPatternError('(?P<a>)(?P<a>)',
  192. "redefinition of group name 'a' as group 2; "
  193. "was group 1")
  194. self.checkPatternError('(?P<a>(?P=a))',
  195. "cannot refer to an open group", 10)
  196. self.checkPatternError('(?Pxy)', 'unknown extension ?Px')
  197. self.checkPatternError('(?P<a>)(?P=a', 'missing ), unterminated name', 11)
  198. self.checkPatternError('(?P=', 'missing group name', 4)
  199. self.checkPatternError('(?P=)', 'missing group name', 4)
  200. self.checkPatternError('(?P=1)', "bad character in group name '1'", 4)
  201. self.checkPatternError('(?P=a)', "unknown group name 'a'")
  202. self.checkPatternError('(?P=a1)', "unknown group name 'a1'")
  203. self.checkPatternError('(?P=a.)', "bad character in group name 'a.'", 4)
  204. self.checkPatternError('(?P<)', 'missing >, unterminated name', 4)
  205. self.checkPatternError('(?P<a', 'missing >, unterminated name', 4)
  206. self.checkPatternError('(?P<', 'missing group name', 4)
  207. self.checkPatternError('(?P<>)', 'missing group name', 4)
  208. self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
  209. self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
  210. self.checkPatternError(r'(?(', 'missing group name', 3)
  211. self.checkPatternError(r'(?())', 'missing group name', 3)
  212. self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)
  213. self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
  214. self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
  215. self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
  216. # New valid/invalid identifiers in Python 3
  217. re.compile('(?P<Âľ>x)(?P=Âľ)(?(Âľ)y)')
  218. re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
  219. self.checkPatternError('(?P<Š>x)', "bad character in group name 'Š'", 4)
  220. # Support > 100 groups.
  221. pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
  222. pat = '(?:%s)(?(200)z|t)' % pat
  223. self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
  224. def test_symbolic_refs(self):
  225. self.checkTemplateError('(?P<a>x)', '\g<a', 'xx',
  226. 'missing >, unterminated name', 3)
  227. self.checkTemplateError('(?P<a>x)', '\g<', 'xx',
  228. 'missing group name', 3)
  229. self.checkTemplateError('(?P<a>x)', '\g', 'xx', 'missing <', 2)
  230. self.checkTemplateError('(?P<a>x)', '\g<a a>', 'xx',
  231. "bad character in group name 'a a'", 3)
  232. self.checkTemplateError('(?P<a>x)', '\g<>', 'xx',
  233. 'missing group name', 3)
  234. self.checkTemplateError('(?P<a>x)', '\g<1a1>', 'xx',
  235. "bad character in group name '1a1'", 3)
  236. self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
  237. 'invalid group reference')
  238. self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
  239. 'invalid group reference')
  240. with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
  241. re.sub('(?P<a>x)', '\g<ab>', 'xx')
  242. self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
  243. self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
  244. self.checkTemplateError('(?P<a>x)', '\g<-1>', 'xx',
  245. "bad character in group name '-1'", 3)
  246. # New valid/invalid identifiers in Python 3
  247. self.assertEqual(re.sub('(?P<Âľ>x)', r'\g<Âľ>', 'xx'), 'xx')
  248. self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
  249. self.checkTemplateError('(?P<a>x)', '\g<Š>', 'xx',
  250. "bad character in group name 'Š'", 3)
  251. # Support > 100 groups.
  252. pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
  253. self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
  254. def test_re_subn(self):
  255. self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
  256. self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
  257. self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
  258. self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
  259. self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
  260. def test_re_split(self):
  261. for string in ":a:b::c", S(":a:b::c"):
  262. self.assertTypedEqual(re.split(":", string),
  263. ['', 'a', 'b', '', 'c'])
  264. self.assertTypedEqual(re.split(":+", string),
  265. ['', 'a', 'b', 'c'])
  266. self.assertTypedEqual(re.split("(:+)", string),
  267. ['', ':', 'a', ':', 'b', '::', 'c'])
  268. for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
  269. memoryview(b":a:b::c")):
  270. self.assertTypedEqual(re.split(b":", string),
  271. [b'', b'a', b'b', b'', b'c'])
  272. self.assertTypedEqual(re.split(b":+", string),
  273. [b'', b'a', b'b', b'c'])
  274. self.assertTypedEqual(re.split(b"(:+)", string),
  275. [b'', b':', b'a', b':', b'b', b'::', b'c'])
  276. for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
  277. "\U0001d49c\U0001d49e\U0001d4b5"):
  278. string = ":%s:%s::%s" % (a, b, c)
  279. self.assertEqual(re.split(":", string), ['', a, b, '', c])
  280. self.assertEqual(re.split(":+", string), ['', a, b, c])
  281. self.assertEqual(re.split("(:+)", string),
  282. ['', ':', a, ':', b, '::', c])
  283. self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
  284. self.assertEqual(re.split("(:)+", ":a:b::c"),
  285. ['', ':', 'a', ':', 'b', ':', 'c'])
  286. self.assertEqual(re.split("([b:]+)", ":a:b::c"),
  287. ['', ':', 'a', ':b::', 'c'])
  288. self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
  289. ['', None, ':', 'a', None, ':', '', 'b', None, '',
  290. None, '::', 'c'])
  291. self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
  292. ['', 'a', '', '', 'c'])
  293. for sep, expected in [
  294. (':*', ['', 'a', 'b', 'c']),
  295. ('(?::*)', ['', 'a', 'b', 'c']),
  296. ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
  297. ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
  298. ]:
  299. with self.subTest(sep=sep), self.assertWarns(FutureWarning):
  300. self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
  301. for sep, expected in [
  302. ('', [':a:b::c']),
  303. (r'\b', [':a:b::c']),
  304. (r'(?=:)', [':a:b::c']),
  305. (r'(?<=:)', [':a:b::c']),
  306. ]:
  307. with self.subTest(sep=sep), self.assertRaises(ValueError):
  308. self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
  309. def test_qualified_re_split(self):
  310. self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
  311. self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
  312. self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
  313. ['', ':', 'a', ':', 'b::c'])
  314. self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
  315. ['', ':', 'a', ':', 'b::c'])
  316. with self.assertWarns(FutureWarning):
  317. self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
  318. ['', ':', 'a', ':', 'b::c'])
  319. def test_re_findall(self):
  320. self.assertEqual(re.findall(":+", "abc"), [])
  321. for string in "a:b::c:::d", S("a:b::c:::d"):
  322. self.assertTypedEqual(re.findall(":+", string),
  323. [":", "::", ":::"])
  324. self.assertTypedEqual(re.findall("(:+)", string),
  325. [":", "::", ":::"])
  326. self.assertTypedEqual(re.findall("(:)(:*)", string),
  327. [(":", ""), (":", ":"), (":", "::")])
  328. for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
  329. memoryview(b"a:b::c:::d")):
  330. self.assertTypedEqual(re.findall(b":+", string),
  331. [b":", b"::", b":::"])
  332. self.assertTypedEqual(re.findall(b"(:+)", string),
  333. [b":", b"::", b":::"])
  334. self.assertTypedEqual(re.findall(b"(:)(:*)", string),
  335. [(b":", b""), (b":", b":"), (b":", b"::")])
  336. for x in ("\xe0", "\u0430", "\U0001d49c"):
  337. xx = x * 2
  338. xxx = x * 3
  339. string = "a%sb%sc%sd" % (x, xx, xxx)
  340. self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
  341. self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
  342. self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
  343. [(x, ""), (x, x), (x, xx)])
  344. def test_bug_117612(self):
  345. self.assertEqual(re.findall(r"(a|(b))", "aba"),
  346. [("a", ""),("b", "b"),("a", "")])
  347. def test_re_match(self):
  348. for string in 'a', S('a'):
  349. self.assertEqual(re.match('a', string).groups(), ())
  350. self.assertEqual(re.match('(a)', string).groups(), ('a',))
  351. self.assertEqual(re.match('(a)', string).group(0), 'a')
  352. self.assertEqual(re.match('(a)', string).group(1), 'a')
  353. self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
  354. for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
  355. self.assertEqual(re.match(b'a', string).groups(), ())
  356. self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
  357. self.assertEqual(re.match(b'(a)', string).group(0), b'a')
  358. self.assertEqual(re.match(b'(a)', string).group(1), b'a')
  359. self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
  360. for a in ("\xe0", "\u0430", "\U0001d49c"):
  361. self.assertEqual(re.match(a, a).groups(), ())
  362. self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
  363. self.assertEqual(re.match('(%s)' % a, a).group(0), a)
  364. self.assertEqual(re.match('(%s)' % a, a).group(1), a)
  365. self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
  366. pat = re.compile('((a)|(b))(c)?')
  367. self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
  368. self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
  369. self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
  370. self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
  371. self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
  372. pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
  373. self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
  374. self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
  375. (None, 'b', None))
  376. self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
  377. def test_group(self):
  378. class Index:
  379. def __init__(self, value):
  380. self.value = value
  381. def __index__(self):
  382. return self.value
  383. # A single group
  384. m = re.match('(a)(b)', 'ab')
  385. self.assertEqual(m.group(), 'ab')
  386. self.assertEqual(m.group(0), 'ab')
  387. self.assertEqual(m.group(1), 'a')
  388. self.assertEqual(m.group(Index(1)), 'a')
  389. self.assertRaises(IndexError, m.group, -1)
  390. self.assertRaises(IndexError, m.group, 3)
  391. self.assertRaises(IndexError, m.group, 1<<1000)
  392. self.assertRaises(IndexError, m.group, Index(1<<1000))
  393. self.assertRaises(IndexError, m.group, 'x')
  394. # Multiple groups
  395. self.assertEqual(m.group(2, 1), ('b', 'a'))
  396. self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))
  397. def test_re_fullmatch(self):
  398. # Issue 16203: Proposal: add re.fullmatch() method.
  399. self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
  400. for string in "ab", S("ab"):
  401. self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
  402. for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
  403. self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
  404. for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
  405. r = r"%s|%s" % (a, a + b)
  406. self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
  407. self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
  408. self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
  409. self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
  410. self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
  411. self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
  412. self.assertIsNone(re.fullmatch(r"a+", "ab"))
  413. self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
  414. self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
  415. self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
  416. self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
  417. self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
  418. self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
  419. self.assertEqual(
  420. re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
  421. self.assertEqual(
  422. re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
  423. self.assertEqual(
  424. re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
  425. def test_re_groupref_exists(self):
  426. self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
  427. ('(', 'a'))
  428. self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
  429. (None, 'a'))
  430. self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
  431. self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
  432. self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
  433. ('a', 'b'))
  434. self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
  435. (None, 'd'))
  436. self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
  437. (None, 'd'))
  438. self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
  439. ('a', ''))
  440. # Tests for bug #1177831: exercise groups other than the first group
  441. p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
  442. self.assertEqual(p.match('abc').groups(),
  443. ('a', 'b', 'c'))
  444. self.assertEqual(p.match('ad').groups(),
  445. ('a', None, 'd'))
  446. self.assertIsNone(p.match('abd'))
  447. self.assertIsNone(p.match('ac'))
  448. # Support > 100 groups.
  449. pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
  450. pat = '(?:%s)(?(200)z)' % pat
  451. self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
  452. self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
  453. self.checkPatternError(r'()(?(1)a|b',
  454. 'missing ), unterminated subpattern', 2)
  455. self.checkPatternError(r'()(?(1)a|b|c)',
  456. 'conditional backref with more than '
  457. 'two branches', 10)
  458. def test_re_groupref_overflow(self):
  459. self.checkTemplateError('()', '\g<%s>' % sre_constants.MAXGROUPS, 'xx',
  460. 'invalid group reference', 3)
  461. self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
  462. 'invalid group reference', 10)
  463. def test_re_groupref(self):
  464. self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
  465. ('|', 'a'))
  466. self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
  467. (None, 'a'))
  468. self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
  469. self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
  470. self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
  471. ('a', 'a'))
  472. self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
  473. (None, None))
  474. self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)
  475. def test_groupdict(self):
  476. self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
  477. 'first second').groupdict(),
  478. {'first':'first', 'second':'second'})
  479. def test_expand(self):
  480. self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
  481. "first second")
  482. .expand(r"\2 \1 \g<second> \g<first>"),
  483. "second first second first")
  484. self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
  485. "first")
  486. .expand(r"\2 \g<second>"),
  487. " ")
  488. def test_repeat_minmax(self):
  489. self.assertIsNone(re.match("^(\w){1}$", "abc"))
  490. self.assertIsNone(re.match("^(\w){1}?$", "abc"))
  491. self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
  492. self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
  493. self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
  494. self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
  495. self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
  496. self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
  497. self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
  498. self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
  499. self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
  500. self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
  501. self.assertIsNone(re.match("^x{1}$", "xxx"))
  502. self.assertIsNone(re.match("^x{1}?$", "xxx"))
  503. self.assertIsNone(re.match("^x{1,2}$", "xxx"))
  504. self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
  505. self.assertTrue(re.match("^x{3}$", "xxx"))
  506. self.assertTrue(re.match("^x{1,3}$", "xxx"))
  507. self.assertTrue(re.match("^x{3,3}$", "xxx"))
  508. self.assertTrue(re.match("^x{1,4}$", "xxx"))
  509. self.assertTrue(re.match("^x{3,4}?$", "xxx"))
  510. self.assertTrue(re.match("^x{3}?$", "xxx"))
  511. self.assertTrue(re.match("^x{1,3}?$", "xxx"))
  512. self.assertTrue(re.match("^x{1,4}?$", "xxx"))
  513. self.assertTrue(re.match("^x{3,4}?$", "xxx"))
  514. self.assertIsNone(re.match("^x{}$", "xxx"))
  515. self.assertTrue(re.match("^x{}$", "x{}"))
  516. self.checkPatternError(r'x{2,1}',
  517. 'min repeat greater than max repeat', 2)
  518. def test_getattr(self):
  519. self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
  520. self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
  521. self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
  522. self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
  523. self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
  524. {'first': 1, 'other': 2})
  525. self.assertEqual(re.match("(a)", "a").pos, 0)
  526. self.assertEqual(re.match("(a)", "a").endpos, 1)
  527. self.assertEqual(re.match("(a)", "a").string, "a")
  528. self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
  529. self.assertTrue(re.match("(a)", "a").re)
  530. # Issue 14260. groupindex should be non-modifiable mapping.
  531. p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
  532. self.assertEqual(sorted(p.groupindex), ['first', 'other'])
  533. self.assertEqual(p.groupindex['other'], 2)
  534. with self.assertRaises(TypeError):
  535. p.groupindex['other'] = 0
  536. self.assertEqual(p.groupindex['other'], 2)
  537. def test_special_escapes(self):
  538. self.assertEqual(re.search(r"\b(b.)\b",
  539. "abcd abc bcd bx").group(1), "bx")
  540. self.assertEqual(re.search(r"\B(b.)\B",
  541. "abc bcd bc abxd").group(1), "bx")
  542. self.assertEqual(re.search(r"\b(b.)\b",
  543. "abcd abc bcd bx", re.ASCII).group(1), "bx")
  544. self.assertEqual(re.search(r"\B(b.)\B",
  545. "abc bcd bc abxd", re.ASCII).group(1), "bx")
  546. self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
  547. self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
  548. self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
  549. self.assertEqual(re.search(br"\b(b.)\b",
  550. b"abcd abc bcd bx").group(1), b"bx")
  551. self.assertEqual(re.search(br"\B(b.)\B",
  552. b"abc bcd bc abxd").group(1), b"bx")
  553. self.assertEqual(re.search(br"\b(b.)\b",
  554. b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
  555. self.assertEqual(re.search(br"\B(b.)\B",
  556. b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
  557. self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
  558. self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
  559. self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
  560. self.assertEqual(re.search(r"\d\D\w\W\s\S",
  561. "1aa! a").group(0), "1aa! a")
  562. self.assertEqual(re.search(br"\d\D\w\W\s\S",
  563. b"1aa! a").group(0), b"1aa! a")
  564. self.assertEqual(re.search(r"\d\D\w\W\s\S",
  565. "1aa! a", re.ASCII).group(0), "1aa! a")
  566. self.assertEqual(re.search(br"\d\D\w\W\s\S",
  567. b"1aa! a", re.LOCALE).group(0), b"1aa! a")
  568. def test_other_escapes(self):
  569. self.checkPatternError("\\", 'bad escape (end of pattern)', 0)
  570. self.assertEqual(re.match(r"\(", '(').group(), '(')
  571. self.assertIsNone(re.match(r"\(", ')'))
  572. self.assertEqual(re.match(r"\\", '\\').group(), '\\')
  573. self.assertEqual(re.match(r"[\]]", ']').group(), ']')
  574. self.assertIsNone(re.match(r"[\]]", '['))
  575. self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
  576. self.assertIsNone(re.match(r"[a\-c]", 'b'))
  577. self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
  578. self.assertIsNone(re.match(r"[\^a]+", 'b'))
  579. re.purge() # for warnings
  580. for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
  581. with self.subTest(c):
  582. self.assertRaises(re.error, re.compile, '\\%c' % c)
  583. for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
  584. with self.subTest(c):
  585. self.assertRaises(re.error, re.compile, '[\\%c]' % c)
  586. def test_string_boundaries(self):
  587. # See http://bugs.python.org/issue10713
  588. self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
  589. "abc")
  590. # There's a word boundary at the start of a string.
  591. self.assertTrue(re.match(r"\b", "abc"))
  592. # A non-empty string includes a non-boundary zero-length match.
  593. self.assertTrue(re.search(r"\B", "abc"))
  594. # There is no non-boundary match at the start of a string.
  595. self.assertFalse(re.match(r"\B", "abc"))
  596. # However, an empty string contains no word boundaries, and also no
  597. # non-boundaries.
  598. self.assertIsNone(re.search(r"\B", ""))
  599. # This one is questionable and different from the perlre behaviour,
  600. # but describes current behavior.
  601. self.assertIsNone(re.search(r"\b", ""))
  602. # A single word-character string has two boundaries, but no
  603. # non-boundary gaps.
  604. self.assertEqual(len(re.findall(r"\b", "a")), 2)
  605. self.assertEqual(len(re.findall(r"\B", "a")), 0)
  606. # If there are no words, there are no boundaries
  607. self.assertEqual(len(re.findall(r"\b", " ")), 0)
  608. self.assertEqual(len(re.findall(r"\b", " ")), 0)
  609. # Can match around the whitespace.
  610. self.assertEqual(len(re.findall(r"\B", " ")), 2)
  611. def test_bigcharset(self):
  612. self.assertEqual(re.match("([\u2222\u2223])",
  613. "\u2222").group(1), "\u2222")
  614. r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
  615. self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
  616. def test_big_codesize(self):
  617. # Issue #1160
  618. r = re.compile('|'.join(('%d'%x for x in range(10000))))
  619. self.assertTrue(r.match('1000'))
  620. self.assertTrue(r.match('9999'))
  621. def test_anyall(self):
  622. self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
  623. "a\nb")
  624. self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
  625. "a\n\nb")
  626. def test_lookahead(self):
  627. self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
  628. self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
  629. self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
  630. self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
  631. self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
  632. self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
  633. self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
  634. self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
  635. self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
  636. self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
  637. self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
  638. # Group reference.
  639. self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
  640. self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
  641. # Conditional group reference.
  642. self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
  643. self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
  644. self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
  645. self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
  646. self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
  647. # Group used before defined.
  648. self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
  649. self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
  650. self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
  651. def test_lookbehind(self):
  652. self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
  653. self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
  654. self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
  655. self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
  656. # Group reference.
  657. self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
  658. self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
  659. self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
  660. self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
  661. # Conditional group reference.
  662. self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
  663. self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
  664. self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
  665. self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
  666. self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
  667. # Group used before defined.
  668. self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
  669. self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
  670. self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
  671. # Group defined in the same lookbehind pattern
  672. self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
  673. self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
  674. self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
  675. self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
  676. def test_ignore_case(self):
  677. self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
  678. self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
  679. self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
  680. self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
  681. self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
  682. self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
  683. self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
  684. self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
  685. self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
  686. self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
  687. assert '\u212a'.lower() == 'k' # 'K'
  688. self.assertTrue(re.match(r'K', '\u212a', re.I))
  689. self.assertTrue(re.match(r'k', '\u212a', re.I))
  690. self.assertTrue(re.match(r'\u212a', 'K', re.I))
  691. self.assertTrue(re.match(r'\u212a', 'k', re.I))
  692. assert '\u017f'.upper() == 'S' # 'Ĺż'
  693. self.assertTrue(re.match(r'S', '\u017f', re.I))
  694. self.assertTrue(re.match(r's', '\u017f', re.I))
  695. self.assertTrue(re.match(r'\u017f', 'S', re.I))
  696. self.assertTrue(re.match(r'\u017f', 's', re.I))
  697. assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
  698. self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
  699. self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
  700. def test_ignore_case_set(self):
  701. self.assertTrue(re.match(r'[19A]', 'A', re.I))
  702. self.assertTrue(re.match(r'[19a]', 'a', re.I))
  703. self.assertTrue(re.match(r'[19a]', 'A', re.I))
  704. self.assertTrue(re.match(r'[19A]', 'a', re.I))
  705. self.assertTrue(re.match(br'[19A]', b'A', re.I))
  706. self.assertTrue(re.match(br'[19a]', b'a', re.I))
  707. self.assertTrue(re.match(br'[19a]', b'A', re.I))
  708. self.assertTrue(re.match(br'[19A]', b'a', re.I))
  709. assert '\u212a'.lower() == 'k' # 'K'
  710. self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
  711. self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
  712. self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
  713. self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
  714. assert '\u017f'.upper() == 'S' # 'Ĺż'
  715. self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
  716. self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
  717. self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
  718. self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
  719. assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
  720. self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
  721. self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
  722. def test_ignore_case_range(self):
  723. # Issues #3511, #17381.
  724. self.assertTrue(re.match(r'[9-a]', '_', re.I))
  725. self.assertIsNone(re.match(r'[9-A]', '_', re.I))
  726. self.assertTrue(re.match(br'[9-a]', b'_', re.I))
  727. self.assertIsNone(re.match(br'[9-A]', b'_', re.I))
  728. self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
  729. self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
  730. self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))
  731. self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
  732. self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))
  733. self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))
  734. self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))
  735. self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))
  736. self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))
  737. self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))
  738. self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
  739. self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
  740. assert '\u212a'.lower() == 'k' # 'K'
  741. self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
  742. self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
  743. self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
  744. self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
  745. assert '\u017f'.upper() == 'S' # 'Ĺż'
  746. self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
  747. self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
  748. self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
  749. self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
  750. assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
  751. self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
  752. self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
  753. def test_category(self):
  754. self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
  755. def test_getlower(self):
  756. import _sre
  757. self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
  758. self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
  759. self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
  760. self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
  761. self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
  762. self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
  763. self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
  764. self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
  765. def test_not_literal(self):
  766. self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
  767. self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
  768. def test_search_coverage(self):
  769. self.assertEqual(re.search("\s(b)", " b").group(1), "b")
  770. self.assertEqual(re.search("a\s", "a ").group(0), "a ")
  771. def assertMatch(self, pattern, text, match=None, span=None,
  772. matcher=re.match):
  773. if match is None and span is None:
  774. # the pattern matches the whole text
  775. match = text
  776. span = (0, len(text))
  777. elif match is None or span is None:
  778. raise ValueError('If match is not None, span should be specified '
  779. '(and vice versa).')
  780. m = matcher(pattern, text)
  781. self.assertTrue(m)
  782. self.assertEqual(m.group(), match)
  783. self.assertEqual(m.span(), span)
  784. def test_re_escape(self):
  785. alnum_chars = string.ascii_letters + string.digits + '_'
  786. p = ''.join(chr(i) for i in range(256))
  787. for c in p:
  788. if c in alnum_chars:
  789. self.assertEqual(re.escape(c), c)
  790. elif c == '\x00':
  791. self.assertEqual(re.escape(c), '\\000')
  792. else:
  793. self.assertEqual(re.escape(c), '\\' + c)
  794. self.assertMatch(re.escape(c), c)
  795. self.assertMatch(re.escape(p), p)
  796. def test_re_escape_byte(self):
  797. alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
  798. p = bytes(range(256))
  799. for i in p:
  800. b = bytes([i])
  801. if b in alnum_chars:
  802. self.assertEqual(re.escape(b), b)
  803. elif i == 0:
  804. self.assertEqual(re.escape(b), b'\\000')
  805. else:
  806. self.assertEqual(re.escape(b), b'\\' + b)
  807. self.assertMatch(re.escape(b), b)
  808. self.assertMatch(re.escape(p), p)
  809. def test_re_escape_non_ascii(self):
  810. s = 'xxx\u2620\u2620\u2620xxx'
  811. s_escaped = re.escape(s)
  812. self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
  813. self.assertMatch(s_escaped, s)
  814. self.assertMatch('.%s+.' % re.escape('\u2620'), s,
  815. 'x\u2620\u2620\u2620x', (2, 7), re.search)
  816. def test_re_escape_non_ascii_bytes(self):
  817. b = 'y\u2620y\u2620y'.encode('utf-8')
  818. b_escaped = re.escape(b)
  819. self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
  820. self.assertMatch(b_escaped, b)
  821. res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
  822. self.assertEqual(len(res), 2)
  823. def test_pickling(self):
  824. import pickle
  825. oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
  826. for proto in range(pickle.HIGHEST_PROTOCOL + 1):
  827. pickled = pickle.dumps(oldpat, proto)
  828. newpat = pickle.loads(pickled)
  829. self.assertEqual(newpat, oldpat)
  830. # current pickle expects the _compile() reconstructor in re module
  831. from re import _compile
  832. def test_constants(self):
  833. self.assertEqual(re.I, re.IGNORECASE)
  834. self.assertEqual(re.L, re.LOCALE)
  835. self.assertEqual(re.M, re.MULTILINE)
  836. self.assertEqual(re.S, re.DOTALL)
  837. self.assertEqual(re.X, re.VERBOSE)
  838. def test_flags(self):
  839. for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
  840. self.assertTrue(re.compile('^pattern$', flag))
  841. for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
  842. self.assertTrue(re.compile(b'^pattern$', flag))
  843. def test_sre_character_literals(self):
  844. for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
  845. if i < 256:
  846. self.assertTrue(re.match(r"\%03o" % i, chr(i)))
  847. self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
  848. self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
  849. self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
  850. self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
  851. self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
  852. if i < 0x10000:
  853. self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
  854. self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
  855. self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
  856. self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
  857. self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
  858. self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
  859. self.assertTrue(re.match(r"\0", "\000"))
  860. self.assertTrue(re.match(r"\08", "\0008"))
  861. self.assertTrue(re.match(r"\01", "\001"))
  862. self.assertTrue(re.match(r"\018", "\0018"))
  863. self.checkPatternError(r"\567",
  864. r'octal escape value \567 outside of '
  865. r'range 0-0o377', 0)
  866. self.checkPatternError(r"\911", 'invalid group reference', 0)
  867. self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
  868. self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
  869. self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
  870. self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)
  871. self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)
  872. self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)
  873. self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)
  874. def test_sre_character_class_literals(self):
  875. for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
  876. if i < 256:
  877. self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
  878. self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
  879. self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
  880. self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)

Large files files are truncated, but you can click here to view the full file