PageRenderTime 75ms CodeModel.GetById 34ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/module/_sre/test/test_app_sre.py

https://bitbucket.org/ltratt/pypy
Python | 1001 lines | 986 code | 3 blank | 12 comment | 1 complexity | a8421dddfc1c4fea371797928df4a345 MD5 | raw file
Possible License(s): Apache-2.0, AGPL-3.0, BSD-3-Clause
  1. """Regular expression tests specific to _sre.py and accumulated during TDD."""
  2. import os
  3. import py
  4. from py.test import raises, skip
  5. from pypy.interpreter.gateway import app2interp_temp
  6. def init_app_test(cls, space):
  7. cls.w_s = space.appexec(
  8. [space.wrap(os.path.realpath(os.path.dirname(__file__)))],
  9. """(this_dir):
  10. import sys
  11. # Uh-oh, ugly hack
  12. sys.path.insert(0, this_dir)
  13. try:
  14. import support_test_app_sre
  15. return support_test_app_sre
  16. finally:
  17. sys.path.pop(0)
  18. """)
  19. class AppTestSrePy:
  20. def test_magic(self):
  21. import _sre, sre_constants
  22. assert sre_constants.MAGIC == _sre.MAGIC
  23. def test_codesize(self):
  24. import _sre
  25. assert _sre.getcodesize() == _sre.CODESIZE
  26. class AppTestSrePattern:
  27. def setup_class(cls):
  28. # This imports support_test_sre as the global "s"
  29. init_app_test(cls, cls.space)
  30. def test_copy(self):
  31. # copy support is disabled by default in _sre.c
  32. import re
  33. p = re.compile("b")
  34. raises(TypeError, p.__copy__) # p.__copy__() should raise
  35. raises(TypeError, p.__deepcopy__) # p.__deepcopy__() should raise
  36. def test_creation_attributes(self):
  37. import re
  38. pattern_string = "(b)l(?P<g>a)"
  39. p = re.compile(pattern_string, re.I | re.M)
  40. assert pattern_string == p.pattern
  41. assert re.I | re.M == p.flags
  42. assert 2 == p.groups
  43. assert {"g": 2} == p.groupindex
  44. def test_repeat_minmax_overflow(self):
  45. import re
  46. string = "x" * 100000
  47. assert re.match(r".{%d}" % (self.s.MAXREPEAT - 1), string) is None
  48. assert re.match(r".{,%d}" % (self.s.MAXREPEAT - 1), string).span() == (0, 100000)
  49. assert re.match(r".{%d,}?" % (self.s.MAXREPEAT - 1), string) is None
  50. raises(OverflowError, re.compile, r".{%d}" % self.s.MAXREPEAT)
  51. raises(OverflowError, re.compile, r".{,%d}" % self.s.MAXREPEAT)
  52. raises(OverflowError, re.compile, r".{%d,}?" % self.s.MAXREPEAT)
  53. def test_match_none(self):
  54. import re
  55. p = re.compile("bla")
  56. none_matches = ["b", "bl", "blub", "jupidu"]
  57. for string in none_matches:
  58. assert None == p.match(string)
  59. def test_pos_endpos(self):
  60. import re
  61. # XXX maybe fancier tests here
  62. p = re.compile("bl(a)")
  63. tests = [("abla", 0, 4), ("abla", 1, 4), ("ablaa", 1, 4)]
  64. for string, pos, endpos in tests:
  65. assert p.search(string, pos, endpos)
  66. tests = [("abla", 0, 3), ("abla", 2, 4)]
  67. for string, pos, endpos in tests:
  68. assert not p.search(string, pos, endpos)
  69. def test_findall(self):
  70. import re
  71. assert ["b"] == re.findall("b", "bla")
  72. assert ["a", "u"] == re.findall("b(.)", "abalbus")
  73. assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
  74. assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
  75. def test_finditer(self):
  76. import re
  77. it = re.finditer("b(.)", "brabbel")
  78. assert "br" == it.next().group(0)
  79. assert "bb" == it.next().group(0)
  80. raises(StopIteration, it.next)
  81. def test_split(self):
  82. import re
  83. assert ["a", "o", "u", ""] == re.split("b", "abobub")
  84. assert ["a", "o", "ub"] == re.split("b", "abobub", 2)
  85. assert ['', 'a', 'l', 'a', 'lla'] == re.split("b(a)", "balballa")
  86. assert ['', 'a', None, 'l', 'u', None, 'lla'] == (
  87. re.split("b([ua]|(s))", "balbulla"))
  88. def test_weakref(self):
  89. import re, _weakref
  90. _weakref.ref(re.compile(r""))
  91. class AppTestSreMatch:
  92. spaceconfig = dict(usemodules=('array', ))
  93. def test_copy(self):
  94. import re
  95. # copy support is disabled by default in _sre.c
  96. m = re.match("bla", "bla")
  97. raises(TypeError, m.__copy__)
  98. raises(TypeError, m.__deepcopy__)
  99. def test_match_attributes(self):
  100. import re
  101. c = re.compile("bla")
  102. m = c.match("blastring")
  103. assert "blastring" == m.string
  104. assert c == m.re
  105. assert 0 == m.pos
  106. assert 9 == m.endpos
  107. assert None == m.lastindex
  108. assert None == m.lastgroup
  109. assert ((0, 3),) == m.regs
  110. def test_match_attributes_with_groups(self):
  111. import re
  112. m = re.search("a(b)(?P<name>c)", "aabcd")
  113. assert 0 == m.pos
  114. assert 5 == m.endpos
  115. assert 2 == m.lastindex
  116. assert "name" == m.lastgroup
  117. assert ((1, 4), (2, 3), (3, 4)) == m.regs
  118. def test_regs_overlapping_groups(self):
  119. import re
  120. m = re.match("a((b)c)", "abc")
  121. assert ((0, 3), (1, 3), (1, 2)) == m.regs
  122. def test_start_end_span(self):
  123. import re
  124. m = re.search("a((b)c)", "aabcd")
  125. assert (1, 4) == (m.start(), m.end())
  126. assert (1, 4) == m.span()
  127. assert (2, 4) == (m.start(1), m.end(1))
  128. assert (2, 4) == m.span(1)
  129. assert (2, 3) == (m.start(2), m.end(2))
  130. assert (2, 3) == m.span(2)
  131. raises(IndexError, m.start, 3)
  132. raises(IndexError, m.end, 3)
  133. raises(IndexError, m.span, 3)
  134. raises(IndexError, m.start, -1)
  135. def test_groups(self):
  136. import re
  137. m = re.search("a((.).)", "aabcd")
  138. assert ("ab", "a") == m.groups()
  139. assert ("ab", "a") == m.groups(True)
  140. m = re.search("a((\d)|(\s))", "aa1b")
  141. assert ("1", "1", None) == m.groups()
  142. assert ("1", "1", True) == m.groups(True)
  143. m = re.search("a((\d)|(\s))", "a ")
  144. assert (" ", None, " ") == m.groups()
  145. m = re.match("(a)", "a")
  146. assert ("a",) == m.groups()
  147. def test_groupdict(self):
  148. import re
  149. m = re.search("a((.).)", "aabcd")
  150. assert {} == m.groupdict()
  151. m = re.search("a((?P<first>.).)", "aabcd")
  152. assert {"first": "a"} == m.groupdict()
  153. m = re.search("a((?P<first>\d)|(?P<second>\s))", "aa1b")
  154. assert {"first": "1", "second": None} == m.groupdict()
  155. assert {"first": "1", "second": True} == m.groupdict(True)
  156. def test_group(self):
  157. import re
  158. m = re.search("a((?P<first>\d)|(?P<second>\s))", "aa1b")
  159. assert "a1" == m.group()
  160. assert ("1", "1", None) == m.group(1, 2, 3)
  161. assert ("1", None) == m.group("first", "second")
  162. raises(IndexError, m.group, 1, 4)
  163. assert ("1", None) == m.group(1, "second")
  164. raises(IndexError, m.group, 'foobarbaz')
  165. raises(IndexError, m.group, 'first', 'foobarbaz')
  166. def test_group_takes_long(self):
  167. import re
  168. import sys
  169. if sys.version_info < (2, 7, 9):
  170. skip()
  171. assert re.match("(foo)", "foo").group(1L) == "foo"
  172. exc = raises(IndexError, re.match("", "").group, sys.maxint + 1)
  173. assert str(exc.value) == "no such group"
  174. def test_expand(self):
  175. import re
  176. m = re.search("a(..)(?P<name>..)", "ab1bc")
  177. assert "b1bcbc" == m.expand(r"\1\g<name>\2")
  178. def test_sub(self):
  179. import re
  180. assert "bbbbb" == re.sub("a", "b", "ababa")
  181. assert ("bbbbb", 3) == re.subn("a", "b", "ababa")
  182. assert "dddd" == re.sub("[abc]", "d", "abcd")
  183. assert ("dddd", 3) == re.subn("[abc]", "d", "abcd")
  184. assert "rbd\nbr\n" == re.sub("a(.)", r"b\1\n", "radar")
  185. assert ("rbd\nbr\n", 2) == re.subn("a(.)", r"b\1\n", "radar")
  186. assert ("bbbba", 2) == re.subn("a", "b", "ababa", 2)
  187. def test_sub_unicode(self):
  188. import re
  189. assert isinstance(re.sub(u"a", u"b", u""), unicode)
  190. # the input is returned unmodified if no substitution is performed,
  191. # which (if interpreted literally, as CPython does) gives the
  192. # following strangeish rules:
  193. assert isinstance(re.sub(u"a", u"b", "diwoiioamoi"), unicode)
  194. assert isinstance(re.sub(u"a", u"b", "diwoiiobmoi"), str)
  195. assert isinstance(re.sub(u'x', 'y', 'x'), str)
  196. def test_sub_callable(self):
  197. import re
  198. def call_me(match):
  199. ret = ""
  200. for char in match.group():
  201. ret += chr(ord(char) + 1)
  202. return ret
  203. assert ("bbbbb", 3) == re.subn("a", call_me, "ababa")
  204. def test_sub_callable_returns_none(self):
  205. import re
  206. def call_me(match):
  207. return None
  208. assert "acd" == re.sub("b", call_me, "abcd")
  209. def test_sub_callable_suddenly_unicode(self):
  210. import re
  211. def call_me(match):
  212. if match.group() == 'A':
  213. return unichr(0x3039)
  214. return ''
  215. assert (u"bb\u3039b", 2) == re.subn("[aA]", call_me, "babAb")
  216. def test_sub_subclass_of_str(self):
  217. import re
  218. class MyString(str):
  219. pass
  220. class MyUnicode(unicode):
  221. pass
  222. s1 = MyString('zz')
  223. s2 = re.sub('aa', 'bb', s1)
  224. assert s2 == s1
  225. assert type(s2) is str # and not MyString
  226. s2 = re.sub(u'aa', u'bb', s1)
  227. assert s2 == s1
  228. assert type(s2) is str # and not MyString
  229. u1 = MyUnicode(u'zz')
  230. u2 = re.sub(u'aa', u'bb', u1)
  231. assert u2 == u1
  232. assert type(u2) is unicode # and not MyUnicode
  233. def test_sub_bug(self):
  234. import re
  235. assert re.sub('=\w{2}', 'x', '=CA') == 'x'
  236. def test_match_array(self):
  237. import re, array
  238. a = array.array('c', 'hello')
  239. m = re.match('hel+', a)
  240. assert m.end() == 4
  241. def test_match_typeerror(self):
  242. import re
  243. raises(TypeError, re.match, 'hel+', list('hello'))
  244. def test_group_bugs(self):
  245. import re
  246. r = re.compile(r"""
  247. \&(?:
  248. (?P<escaped>\&) |
  249. (?P<named>[_a-z][_a-z0-9]*) |
  250. {(?P<braced>[_a-z][_a-z0-9]*)} |
  251. (?P<invalid>)
  252. )
  253. """, re.IGNORECASE | re.VERBOSE)
  254. matches = list(r.finditer('this &gift is for &{who} &&'))
  255. assert len(matches) == 3
  256. assert matches[0].groupdict() == {'escaped': None,
  257. 'named': 'gift',
  258. 'braced': None,
  259. 'invalid': None}
  260. assert matches[1].groupdict() == {'escaped': None,
  261. 'named': None,
  262. 'braced': 'who',
  263. 'invalid': None}
  264. assert matches[2].groupdict() == {'escaped': '&',
  265. 'named': None,
  266. 'braced': None,
  267. 'invalid': None}
  268. matches = list(r.finditer('&who likes &{what)')) # note the ')'
  269. assert len(matches) == 2
  270. assert matches[0].groupdict() == {'escaped': None,
  271. 'named': 'who',
  272. 'braced': None,
  273. 'invalid': None}
  274. assert matches[1].groupdict() == {'escaped': None,
  275. 'named': None,
  276. 'braced': None,
  277. 'invalid': ''}
  278. def test_sub_typecheck(self):
  279. import re
  280. KEYCRE = re.compile(r"%\(([^)]*)\)s|.")
  281. raises(TypeError, KEYCRE.sub, "hello", {"%(": 1})
  282. class AppTestSreScanner:
  283. def test_scanner_attributes(self):
  284. import re
  285. p = re.compile("bla")
  286. s = p.scanner("blablubla")
  287. assert p == s.pattern
  288. def test_scanner_match(self):
  289. import re
  290. p = re.compile(".").scanner("bla")
  291. assert ("b", "l", "a") == (p.match().group(0),
  292. p.match().group(0), p.match().group(0))
  293. assert None == p.match()
  294. def test_scanner_match_detail(self):
  295. import re
  296. p = re.compile("a").scanner("aaXaa")
  297. assert "a" == p.match().group(0)
  298. assert "a" == p.match().group(0)
  299. assert None == p.match()
  300. assert "a" == p.match().group(0)
  301. assert "a" == p.match().group(0)
  302. assert None == p.match()
  303. assert None == p.match()
  304. assert None == p.match()
  305. def test_scanner_search(self):
  306. import re
  307. p = re.compile("\d").scanner("bla23c5a")
  308. assert ("2", "3", "5") == (p.search().group(0),
  309. p.search().group(0), p.search().group(0))
  310. assert None == p.search()
  311. def test_scanner_zero_width_match(self):
  312. import re, sys
  313. if sys.version_info[:2] == (2, 3):
  314. skip("2.3 is different here")
  315. p = re.compile(".*").scanner("bla")
  316. assert ("bla", "") == (p.search().group(0), p.search().group(0))
  317. assert None == p.search()
  318. class AppTestGetlower:
  319. spaceconfig = dict(usemodules=('_locale',))
  320. def setup_class(cls):
  321. # This imports support_test_sre as the global "s"
  322. init_app_test(cls, cls.space)
  323. def setup_method(self, method):
  324. import locale
  325. locale.setlocale(locale.LC_ALL, (None, None))
  326. def teardown_method(self, method):
  327. import locale
  328. locale.setlocale(locale.LC_ALL, (None, None))
  329. def test_getlower_no_flags(self):
  330. s = self.s
  331. UPPER_AE = "\xc4"
  332. s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, UPPER_AE),
  333. (u"\u00c4", u"\u00c4"), (u"\u4444", u"\u4444")], 0)
  334. def test_getlower_locale(self):
  335. s = self.s
  336. import locale, sre_constants
  337. UPPER_AE = "\xc4"
  338. LOWER_AE = "\xe4"
  339. UPPER_PI = u"\u03a0"
  340. try:
  341. locale.setlocale(locale.LC_ALL, "de_DE")
  342. s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, LOWER_AE),
  343. (u"\u00c4", u"\u00e4"), (UPPER_PI, UPPER_PI)],
  344. sre_constants.SRE_FLAG_LOCALE)
  345. except locale.Error:
  346. # skip test
  347. skip("unsupported locale de_DE")
  348. def test_getlower_unicode(self):
  349. s = self.s
  350. import sre_constants
  351. UPPER_AE = "\xc4"
  352. LOWER_AE = "\xe4"
  353. UPPER_PI = u"\u03a0"
  354. LOWER_PI = u"\u03c0"
  355. s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, LOWER_AE),
  356. (u"\u00c4", u"\u00e4"), (UPPER_PI, LOWER_PI),
  357. (u"\u4444", u"\u4444")], sre_constants.SRE_FLAG_UNICODE)
  358. class AppTestSimpleSearches:
  359. spaceconfig = {"usemodules": ['array']}
  360. def test_search_simple_literal(self):
  361. import re
  362. assert re.search("bla", "bla")
  363. assert re.search("bla", "blab")
  364. assert not re.search("bla", "blu")
  365. def test_search_simple_ats(self):
  366. import re
  367. assert re.search("^bla", "bla")
  368. assert re.search("^bla", "blab")
  369. assert not re.search("^bla", "bbla")
  370. assert re.search("bla$", "abla")
  371. assert re.search("bla$", "bla\n")
  372. assert not re.search("bla$", "blaa")
  373. def test_search_simple_boundaries(self):
  374. import re
  375. UPPER_PI = u"\u03a0"
  376. assert re.search(r"bla\b", "bla")
  377. assert re.search(r"bla\b", "bla ja")
  378. assert re.search(r"bla\b", u"bla%s" % UPPER_PI)
  379. assert not re.search(r"bla\b", "blano")
  380. assert not re.search(r"bla\b", u"bla%s" % UPPER_PI, re.UNICODE)
  381. def test_search_simple_categories(self):
  382. import re
  383. LOWER_PI = u"\u03c0"
  384. INDIAN_DIGIT = u"\u0966"
  385. EM_SPACE = u"\u2001"
  386. LOWER_AE = "\xe4"
  387. assert re.search(r"bla\d\s\w", "bla3 b")
  388. assert re.search(r"b\d", u"b%s" % INDIAN_DIGIT, re.UNICODE)
  389. assert not re.search(r"b\D", u"b%s" % INDIAN_DIGIT, re.UNICODE)
  390. assert re.search(r"b\s", u"b%s" % EM_SPACE, re.UNICODE)
  391. assert not re.search(r"b\S", u"b%s" % EM_SPACE, re.UNICODE)
  392. assert re.search(r"b\w", u"b%s" % LOWER_PI, re.UNICODE)
  393. assert not re.search(r"b\W", u"b%s" % LOWER_PI, re.UNICODE)
  394. assert re.search(r"b\w", "b%s" % LOWER_AE, re.UNICODE)
  395. def test_search_simple_any(self):
  396. import re
  397. assert re.search(r"b..a", "jboaas")
  398. assert not re.search(r"b..a", "jbo\nas")
  399. assert re.search(r"b..a", "jbo\nas", re.DOTALL)
  400. def test_search_simple_in(self):
  401. import re
  402. UPPER_PI = u"\u03a0"
  403. LOWER_PI = u"\u03c0"
  404. EM_SPACE = u"\u2001"
  405. LINE_SEP = u"\u2028"
  406. assert re.search(r"b[\da-z]a", "bb1a")
  407. assert re.search(r"b[\da-z]a", "bbsa")
  408. assert not re.search(r"b[\da-z]a", "bbSa")
  409. assert re.search(r"b[^okd]a", "bsa")
  410. assert not re.search(r"b[^okd]a", "bda")
  411. assert re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
  412. u"b%sa" % UPPER_PI) # bigcharset
  413. assert re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
  414. u"b%sa" % EM_SPACE)
  415. assert not re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
  416. u"b%sa" % LINE_SEP)
  417. def test_search_simple_literal_ignore(self):
  418. import re
  419. UPPER_PI = u"\u03a0"
  420. LOWER_PI = u"\u03c0"
  421. assert re.search(r"ba", "ba", re.IGNORECASE)
  422. assert re.search(r"ba", "BA", re.IGNORECASE)
  423. assert re.search(u"b%s" % UPPER_PI, u"B%s" % LOWER_PI,
  424. re.IGNORECASE | re.UNICODE)
  425. def test_search_simple_in_ignore(self):
  426. import re
  427. UPPER_PI = u"\u03a0"
  428. LOWER_PI = u"\u03c0"
  429. assert re.search(r"ba[A-C]", "bac", re.IGNORECASE)
  430. assert re.search(r"ba[a-c]", "baB", re.IGNORECASE)
  431. assert re.search(u"ba[%s]" % UPPER_PI, "ba%s" % LOWER_PI,
  432. re.IGNORECASE | re.UNICODE)
  433. assert re.search(r"ba[^A-C]", "bar", re.IGNORECASE)
  434. assert not re.search(r"ba[^A-C]", "baA", re.IGNORECASE)
  435. assert not re.search(r"ba[^A-C]", "baa", re.IGNORECASE)
  436. def test_search_simple_branch(self):
  437. import re
  438. assert re.search(r"a(bb|d[ef])b", "adeb")
  439. assert re.search(r"a(bb|d[ef])b", "abbb")
  440. def test_search_simple_repeat_one(self):
  441. import re
  442. assert re.search(r"aa+", "aa") # empty tail
  443. assert re.search(r"aa+ab", "aaaab") # backtracking
  444. assert re.search(r"aa*ab", "aab") # empty match
  445. assert re.search(r"a[bc]+", "abbccb")
  446. assert "abbcb" == re.search(r"a.+b", "abbcb\nb").group()
  447. assert "abbcb\nb" == re.search(r"a.+b", "abbcb\nb", re.DOTALL).group()
  448. assert re.search(r"ab+c", "aBbBbBc", re.IGNORECASE)
  449. assert not re.search(r"aa{2,3}", "aa") # string too short
  450. assert not re.search(r"aa{2,3}b", "aab") # too few repetitions
  451. assert not re.search(r"aa+b", "aaaac") # tail doesn't match
  452. def test_search_simple_min_repeat_one(self):
  453. import re
  454. assert re.search(r"aa+?", "aa") # empty tail
  455. assert re.search(r"aa+?ab", "aaaab") # forward tracking
  456. assert re.search(r"a[bc]+?", "abbccb")
  457. assert "abb" == re.search(r"a.+?b", "abbcb\nb").group()
  458. assert "a\nbb" == re.search(r"a.+b", "a\nbbc", re.DOTALL).group()
  459. assert re.search(r"ab+?c", "aBbBbBc", re.IGNORECASE)
  460. assert not re.search(r"aa+?", "a") # string too short
  461. assert not re.search(r"aa{2,3}?b", "aab") # too few repetitions
  462. assert not re.search(r"aa+?b", "aaaac") # tail doesn't match
  463. assert re.match(".*?cd", "abcabcde").end(0) == 7
  464. def test_search_simple_repeat_maximizing(self):
  465. import re
  466. assert not re.search(r"(ab){3,5}", "abab")
  467. assert not re.search(r"(ab){3,5}", "ababa")
  468. assert re.search(r"(ab){3,5}", "ababab")
  469. assert re.search(r"(ab){3,5}", "abababababab").end(0) == 10
  470. assert "ad" == re.search(r"(a.)*", "abacad").group(1)
  471. assert ("abcg", "cg") == (
  472. re.search(r"(ab(c.)*)+", "ababcecfabcg").groups())
  473. assert ("cg", "cg") == (
  474. re.search(r"(ab|(c.))+", "abcg").groups())
  475. assert ("ab", "cf") == (
  476. re.search(r"((c.)|ab)+", "cfab").groups())
  477. assert re.search(r".*", "")
  478. def test_search_simple_repeat_minimizing(self):
  479. import re
  480. assert not re.search(r"(ab){3,5}?", "abab")
  481. assert re.search(r"(ab){3,5}?", "ababab")
  482. assert re.search(r"b(a){3,5}?b", "baaaaab")
  483. assert not re.search(r"b(a){3,5}?b", "baaaaaab")
  484. assert re.search(r"a(b(.)+?)*", "abdbebb")
  485. def test_search_simple_groupref(self):
  486. import re
  487. UPPER_PI = u"\u03a0"
  488. LOWER_PI = u"\u03c0"
  489. assert re.match(r"((ab)+)c\1", "ababcabab")
  490. assert not re.match(r"((ab)+)c\1", "ababcab")
  491. assert not re.search(r"(a|(b))\2", "aa")
  492. assert re.match(r"((ab)+)c\1", "aBAbcAbaB", re.IGNORECASE)
  493. assert re.match(r"((a.)+)c\1", u"a%sca%s" % (UPPER_PI, LOWER_PI),
  494. re.IGNORECASE | re.UNICODE)
  495. def test_search_simple_groupref_exists(self):
  496. import re, sys
  497. if not sys.version_info[:2] == (2, 3):
  498. assert re.search(r"(<)?bla(?(1)>)", "<bla>")
  499. assert re.search(r"(<)?bla(?(1)>)", "bla")
  500. assert not re.match(r"(<)?bla(?(1)>)", "<bla")
  501. assert re.search(r"(<)?bla(?(1)>|u)", "blau")
  502. def test_search_simple_assert(self):
  503. import re
  504. assert re.search(r"b(?=\d\d).{3,}", "b23a")
  505. assert not re.search(r"b(?=\d\d).{3,}", "b2aa")
  506. assert re.search(r"b(?<=\d.)a", "2ba")
  507. assert not re.search(r"b(?<=\d.)a", "ba")
  508. def test_search_simple_assert_not(self):
  509. import re
  510. assert re.search(r"b(?<!\d.)a", "aba")
  511. assert re.search(r"b(?<!\d.)a", "ba")
  512. assert not re.search(r"b(?<!\d.)a", "11ba")
  513. class AppTestMarksStack:
  514. def test_mark_stack_branch(self):
  515. import re
  516. m = re.match("b(.)a|b.b", "bob")
  517. assert None == m.group(1)
  518. assert None == m.lastindex
  519. def test_mark_stack_repeat_one(self):
  520. import re
  521. m = re.match("\d+1((2)|(3))4", "2212413")
  522. assert ("2", "2", None) == m.group(1, 2, 3)
  523. assert 1 == m.lastindex
  524. def test_mark_stack_min_repeat_one(self):
  525. import re
  526. m = re.match("\d+?1((2)|(3))44", "221341244")
  527. assert ("2", "2", None) == m.group(1, 2, 3)
  528. assert 1 == m.lastindex
  529. def test_mark_stack_max_until(self):
  530. import re
  531. m = re.match("(\d)+1((2)|(3))4", "2212413")
  532. assert ("2", "2", None) == m.group(2, 3, 4)
  533. assert 2 == m.lastindex
  534. def test_mark_stack_min_until(self):
  535. import re
  536. m = re.match("(\d)+?1((2)|(3))44", "221341244")
  537. assert ("2", "2", None) == m.group(2, 3, 4)
  538. assert 2 == m.lastindex
  539. def test_bug_725149(self):
  540. # mark_stack_base restoring before restoring marks
  541. # test copied from CPython test
  542. import re
  543. assert re.match('(a)(?:(?=(b)*)c)*', 'abb').groups() == ('a', None)
  544. assert re.match('(a)((?!(b)*))*', 'abb').groups() == ('a', None, None)
  545. class AppTestOpcodes:
  546. spaceconfig = dict(usemodules=('_locale',))
  547. def setup_class(cls):
  548. if cls.runappdirect:
  549. py.test.skip("can only be run on py.py: _sre opcodes don't match")
  550. # This imports support_test_sre as the global "s"
  551. init_app_test(cls, cls.space)
  552. def test_length_optimization(self):
  553. s = self.s
  554. pattern = "bla"
  555. opcodes = [s.OPCODES["info"], 3, 3, len(pattern)] \
  556. + s.encode_literal(pattern) + [s.OPCODES["success"]]
  557. s.assert_no_match(opcodes, ["b", "bl", "ab"])
  558. def test_literal(self):
  559. s = self.s
  560. opcodes = s.encode_literal("bla") + [s.OPCODES["success"]]
  561. s.assert_no_match(opcodes, ["bl", "blu"])
  562. s.assert_match(opcodes, ["bla", "blab", "cbla", "bbla"])
  563. def test_not_literal(self):
  564. s = self.s
  565. opcodes = s.encode_literal("b") \
  566. + [s.OPCODES["not_literal"], ord("a"), s.OPCODES["success"]]
  567. s.assert_match(opcodes, ["bx", "ababy"])
  568. s.assert_no_match(opcodes, ["ba", "jabadu"])
  569. def test_unknown(self):
  570. s = self.s
  571. raises(RuntimeError, s.search, [55555], "b")
  572. def test_at_beginning(self):
  573. s = self.s
  574. for atname in ["at_beginning", "at_beginning_string"]:
  575. opcodes = [s.OPCODES["at"], s.ATCODES[atname]] \
  576. + s.encode_literal("bla") + [s.OPCODES["success"]]
  577. s.assert_match(opcodes, "bla")
  578. s.assert_no_match(opcodes, "abla")
  579. def test_at_beginning_line(self):
  580. s = self.s
  581. opcodes = [s.OPCODES["at"], s.ATCODES["at_beginning_line"]] \
  582. + s.encode_literal("bla") + [s.OPCODES["success"]]
  583. s.assert_match(opcodes, ["bla", "x\nbla"])
  584. s.assert_no_match(opcodes, ["abla", "abla\nubla"])
  585. def test_at_end(self):
  586. s = self.s
  587. opcodes = s.encode_literal("bla") \
  588. + [s.OPCODES["at"], s.ATCODES["at_end"], s.OPCODES["success"]]
  589. s.assert_match(opcodes, ["bla", "bla\n"])
  590. s.assert_no_match(opcodes, ["blau", "abla\nblau"])
  591. def test_at_end_line(self):
  592. s = self.s
  593. opcodes = s.encode_literal("bla") \
  594. + [s.OPCODES["at"], s.ATCODES["at_end_line"], s.OPCODES["success"]]
  595. s.assert_match(opcodes, ["bla\n", "bla\nx", "bla"])
  596. s.assert_no_match(opcodes, ["blau"])
  597. def test_at_end_string(self):
  598. s = self.s
  599. opcodes = s.encode_literal("bla") \
  600. + [s.OPCODES["at"], s.ATCODES["at_end_string"], s.OPCODES["success"]]
  601. s.assert_match(opcodes, "bla")
  602. s.assert_no_match(opcodes, ["blau", "bla\n"])
  603. def test_at_boundary(self):
  604. s = self.s
  605. for atname in "at_boundary", "at_loc_boundary", "at_uni_boundary":
  606. opcodes = s.encode_literal("bla") \
  607. + [s.OPCODES["at"], s.ATCODES[atname], s.OPCODES["success"]]
  608. s.assert_match(opcodes, ["bla", "bla ha", "bla,x"])
  609. s.assert_no_match(opcodes, ["blaja", ""])
  610. opcodes = [s.OPCODES["at"], s.ATCODES[atname]] \
  611. + s.encode_literal("bla") + [s.OPCODES["success"]]
  612. s.assert_match(opcodes, "bla")
  613. s.assert_no_match(opcodes, "")
  614. def test_at_non_boundary(self):
  615. s = self.s
  616. for atname in "at_non_boundary", "at_loc_non_boundary", "at_uni_non_boundary":
  617. opcodes = s.encode_literal("bla") \
  618. + [s.OPCODES["at"], s.ATCODES[atname], s.OPCODES["success"]]
  619. s.assert_match(opcodes, "blan")
  620. s.assert_no_match(opcodes, ["bla ja", "bla"])
  621. def test_at_loc_boundary(self):
  622. s = self.s
  623. import locale
  624. try:
  625. s.void_locale()
  626. opcodes1 = s.encode_literal("bla") \
  627. + [s.OPCODES["at"], s.ATCODES["at_loc_boundary"], s.OPCODES["success"]]
  628. opcodes2 = s.encode_literal("bla") \
  629. + [s.OPCODES["at"], s.ATCODES["at_loc_non_boundary"], s.OPCODES["success"]]
  630. s.assert_match(opcodes1, "bla\xFC")
  631. s.assert_no_match(opcodes2, "bla\xFC")
  632. oldlocale = locale.setlocale(locale.LC_ALL)
  633. locale.setlocale(locale.LC_ALL, "de_DE")
  634. s.assert_no_match(opcodes1, "bla\xFC")
  635. s.assert_match(opcodes2, "bla\xFC")
  636. locale.setlocale(locale.LC_ALL, oldlocale)
  637. except locale.Error:
  638. # skip test
  639. skip("locale error")
  640. def test_at_uni_boundary(self):
  641. s = self.s
  642. UPPER_PI = u"\u03a0"
  643. LOWER_PI = u"\u03c0"
  644. opcodes = s.encode_literal("bl") + [s.OPCODES["any"], s.OPCODES["at"],
  645. s.ATCODES["at_uni_boundary"], s.OPCODES["success"]]
  646. s.assert_match(opcodes, ["bla ha", u"bl%s ja" % UPPER_PI])
  647. s.assert_no_match(opcodes, [u"bla%s" % LOWER_PI])
  648. opcodes = s.encode_literal("bl") + [s.OPCODES["any"], s.OPCODES["at"],
  649. s.ATCODES["at_uni_non_boundary"], s.OPCODES["success"]]
  650. s.assert_match(opcodes, ["blaha", u"bl%sja" % UPPER_PI])
  651. def test_category_loc_word(self):
  652. s = self.s
  653. import locale
  654. try:
  655. s.void_locale()
  656. opcodes1 = s.encode_literal("b") \
  657. + [s.OPCODES["category"], s.CHCODES["category_loc_word"], s.OPCODES["success"]]
  658. opcodes2 = s.encode_literal("b") \
  659. + [s.OPCODES["category"], s.CHCODES["category_loc_not_word"], s.OPCODES["success"]]
  660. s.assert_no_match(opcodes1, "b\xFC")
  661. s.assert_no_match(opcodes1, u"b\u00FC")
  662. s.assert_match(opcodes2, "b\xFC")
  663. locale.setlocale(locale.LC_ALL, "de_DE")
  664. s.assert_match(opcodes1, "b\xFC")
  665. s.assert_no_match(opcodes1, u"b\u00FC")
  666. s.assert_no_match(opcodes2, "b\xFC")
  667. s.void_locale()
  668. except locale.Error:
  669. # skip test
  670. skip("locale error")
  671. def test_any(self):
  672. s = self.s
  673. opcodes = s.encode_literal("b") + [s.OPCODES["any"]] \
  674. + s.encode_literal("a") + [s.OPCODES["success"]]
  675. s.assert_match(opcodes, ["b a", "bla", "bboas"])
  676. s.assert_no_match(opcodes, ["b\na", "oba", "b"])
  677. def test_any_all(self):
  678. s = self.s
  679. opcodes = s.encode_literal("b") + [s.OPCODES["any_all"]] \
  680. + s.encode_literal("a") + [s.OPCODES["success"]]
  681. s.assert_match(opcodes, ["b a", "bla", "bboas", "b\na"])
  682. s.assert_no_match(opcodes, ["oba", "b"])
  683. def test_in_failure(self):
  684. s = self.s
  685. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 2, s.OPCODES["failure"]] \
  686. + s.encode_literal("a") + [s.OPCODES["success"]]
  687. s.assert_no_match(opcodes, ["ba", "bla"])
  688. def test_in_literal(self):
  689. s = self.s
  690. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 7] \
  691. + s.encode_literal("la") + [s.OPCODES["failure"], s.OPCODES["failure"]] \
  692. + s.encode_literal("a") + [s.OPCODES["success"]]
  693. s.assert_match(opcodes, ["bla", "baa", "blbla"])
  694. s.assert_no_match(opcodes, ["ba", "bja", "blla"])
  695. def test_in_category(self):
  696. s = self.s
  697. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 6, s.OPCODES["category"],
  698. s.CHCODES["category_digit"], s.OPCODES["category"], s.CHCODES["category_space"],
  699. s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
  700. s.assert_match(opcodes, ["b1a", "b a", "b4b\tas"])
  701. s.assert_no_match(opcodes, ["baa", "b5"])
  702. def test_in_charset_ucs2(self):
  703. import _sre
  704. if _sre.CODESIZE != 2:
  705. return
  706. s = self.s
  707. # charset bitmap for characters "l" and "h"
  708. bitmap = 6 * [0] + [4352] + 9 * [0]
  709. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 19, s.OPCODES["charset"]] \
  710. + bitmap + [s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
  711. s.assert_match(opcodes, ["bla", "bha", "blbha"])
  712. s.assert_no_match(opcodes, ["baa", "bl"])
  713. def _test_in_bigcharset_ucs2(self):
  714. # disabled because this actually only works on big-endian machines
  715. if _sre.CODESIZE != 2:
  716. return
  717. s = self.s
  718. # constructing bigcharset for lowercase pi (\u03c0)
  719. UPPER_PI = u"\u03a0"
  720. LOWER_PI = u"\u03c0"
  721. bitmap = 6 * [0] + [4352] + 9 * [0]
  722. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 164, s.OPCODES["bigcharset"], 2] \
  723. + [0, 1] + 126 * [0] \
  724. + 16 * [0] \
  725. + 12 * [0] + [1] + 3 * [0] \
  726. + [s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
  727. s.assert_match(opcodes, [u"b%sa" % LOWER_PI])
  728. s.assert_no_match(opcodes, [u"b%sa" % UPPER_PI])
  729. # XXX bigcharset test for ucs4 missing here
  730. def test_in_range(self):
  731. s = self.s
  732. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 5, s.OPCODES["range"],
  733. ord("1"), ord("9"), s.OPCODES["failure"]] \
  734. + s.encode_literal("a") + [s.OPCODES["success"]]
  735. s.assert_match(opcodes, ["b1a", "b56b7aa"])
  736. s.assert_no_match(opcodes, ["baa", "b5"])
  737. def test_in_negate(self):
  738. s = self.s
  739. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 7, s.OPCODES["negate"]] \
  740. + s.encode_literal("la") + [s.OPCODES["failure"]] \
  741. + s.encode_literal("a") + [s.OPCODES["success"]]
  742. s.assert_match(opcodes, ["b1a", "bja", "bubua"])
  743. s.assert_no_match(opcodes, ["bla", "baa", "blbla"])
  744. def test_literal_ignore(self):
  745. s = self.s
  746. opcodes = s.encode_literal("b") \
  747. + [s.OPCODES["literal_ignore"], ord("a"), s.OPCODES["success"]]
  748. s.assert_match(opcodes, ["ba", "bA"])
  749. s.assert_no_match(opcodes, ["bb", "bu"])
  750. def test_not_literal_ignore(self):
  751. s = self.s
  752. UPPER_PI = u"\u03a0"
  753. opcodes = s.encode_literal("b") \
  754. + [s.OPCODES["not_literal_ignore"], ord("a"), s.OPCODES["success"]]
  755. s.assert_match(opcodes, ["bb", "bu", u"b%s" % UPPER_PI])
  756. s.assert_no_match(opcodes, ["ba", "bA"])
  757. def test_in_ignore(self):
  758. s = self.s
  759. opcodes = s.encode_literal("b") + [s.OPCODES["in_ignore"], 8] \
  760. + s.encode_literal("abc") + [s.OPCODES["failure"]] \
  761. + s.encode_literal("a") + [s.OPCODES["success"]]
  762. s.assert_match(opcodes, ["baa", "bAa", "bbbBa"])
  763. s.assert_no_match(opcodes, ["ba", "bja", "blla"])
  764. def test_in_jump_info(self):
  765. s = self.s
  766. for opname in "jump", "info":
  767. opcodes = s.encode_literal("b") \
  768. + [s.OPCODES[opname], 3, s.OPCODES["failure"], s.OPCODES["failure"]] \
  769. + s.encode_literal("a") + [s.OPCODES["success"]]
  770. s.assert_match(opcodes, "ba")
  771. def _test_mark(self):
  772. s = self.s
  773. # XXX need to rewrite this implementation-independent
  774. opcodes = s.encode_literal("a") + [s.OPCODES["mark"], 0] \
  775. + s.encode_literal("b") + [s.OPCODES["mark"], 1, s.OPCODES["success"]]
  776. state = self.create_state("abc")
  777. _sre._sre_search(state, opcodes)
  778. assert 1 == state.lastindex
  779. assert 1 == state.lastmark
  780. # NB: the following are indexes from the start of the match
  781. assert [1, 2] == state.marks
  782. def test_branch(self):
  783. s = self.s
  784. opcodes = [s.OPCODES["branch"], 7] + s.encode_literal("ab") \
  785. + [s.OPCODES["jump"], 9, 7] + s.encode_literal("cd") \
  786. + [s.OPCODES["jump"], 2, s.OPCODES["failure"], s.OPCODES["success"]]
  787. s.assert_match(opcodes, ["ab", "cd"])
  788. s.assert_no_match(opcodes, ["aacas", "ac", "bla"])
  789. def test_repeat_one(self):
  790. s = self.s
  791. opcodes = [s.OPCODES["repeat_one"], 6, 1, self.s.MAXREPEAT] + s.encode_literal("a") \
  792. + [s.OPCODES["success"]] + s.encode_literal("ab") + [s.OPCODES["success"]]
  793. s.assert_match(opcodes, ["aab", "aaaab"])
  794. s.assert_no_match(opcodes, ["ab", "a"])
  795. def test_min_repeat_one(self):
  796. s = self.s
  797. opcodes = [s.OPCODES["min_repeat_one"], 5, 1, self.s.MAXREPEAT, s.OPCODES["any"]] \
  798. + [s.OPCODES["success"]] + s.encode_literal("b") + [s.OPCODES["success"]]
  799. s.assert_match(opcodes, ["aab", "ardb", "bb"])
  800. s.assert_no_match(opcodes, ["b"])
  801. def test_repeat_maximizing(self):
  802. s = self.s
  803. opcodes = [s.OPCODES["repeat"], 5, 1, self.s.MAXREPEAT] + s.encode_literal("a") \
  804. + [s.OPCODES["max_until"]] + s.encode_literal("b") + [s.OPCODES["success"]]
  805. s.assert_match(opcodes, ["ab", "aaaab", "baabb"])
  806. s.assert_no_match(opcodes, ["aaa", "", "ac"])
  807. def test_max_until_zero_width_match(self):
  808. # re.compile won't compile prospective zero-with matches (all of them?),
  809. # so we can only produce an example by directly constructing bytecodes.
  810. # CPython 2.3 fails with a recursion limit exceeded error here.
  811. import sys
  812. if not sys.version_info[:2] == (2, 3):
  813. s = self.s
  814. opcodes = [s.OPCODES["repeat"], 10, 1, self.s.MAXREPEAT, s.OPCODES["repeat_one"],
  815. 6, 0, self.s.MAXREPEAT] + s.encode_literal("a") + [s.OPCODES["success"],
  816. s.OPCODES["max_until"], s.OPCODES["success"]]
  817. s.assert_match(opcodes, ["ab", "bb"])
  818. assert "" == s.search(opcodes, "bb").group(0)
  819. def test_repeat_minimizing(self):
  820. s = self.s
  821. opcodes = [s.OPCODES["repeat"], 4, 1, self.s.MAXREPEAT, s.OPCODES["any"],
  822. s.OPCODES["min_until"]] + s.encode_literal("b") + [s.OPCODES["success"]]
  823. s.assert_match(opcodes, ["ab", "aaaab", "baabb"])
  824. s.assert_no_match(opcodes, ["b"])
  825. assert "aab" == s.search(opcodes, "aabb").group(0)
  826. def test_groupref(self):
  827. s = self.s
  828. opcodes = [s.OPCODES["mark"], 0, s.OPCODES["any"], s.OPCODES["mark"], 1] \
  829. + s.encode_literal("a") + [s.OPCODES["groupref"], 0, s.OPCODES["success"]]
  830. s.assert_match(opcodes, ["bab", "aaa", "dad"])
  831. s.assert_no_match(opcodes, ["ba", "bad", "baad"])
  832. def test_groupref_ignore(self):
  833. s = self.s
  834. opcodes = [s.OPCODES["mark"], 0, s.OPCODES["any"], s.OPCODES["mark"], 1] \
  835. + s.encode_literal("a") + [s.OPCODES["groupref_ignore"], 0, s.OPCODES["success"]]
  836. s.assert_match(opcodes, ["bab", "baB", "Dad"])
  837. s.assert_no_match(opcodes, ["ba", "bad", "baad"])
  838. def test_assert(self):
  839. s = self.s
  840. opcodes = s.encode_literal("a") + [s.OPCODES["assert"], 4, 0] \
  841. + s.encode_literal("b") + [s.OPCODES["success"], s.OPCODES["success"]]
  842. assert "a" == s.search(opcodes, "ab").group(0)
  843. s.assert_no_match(opcodes, ["a", "aa"])
  844. def test_assert_not(self):
  845. s = self.s
  846. opcodes = s.encode_literal("a") + [s.OPCODES["assert_not"], 4, 0] \
  847. + s.encode_literal("b") + [s.OPCODES["success"], s.OPCODES["success"]]
  848. assert "a" == s.search(opcodes, "ac").group(0)
  849. s.assert_match(opcodes, ["a"])
  850. s.assert_no_match(opcodes, ["ab"])
  851. class AppTestOptimizations:
  852. """These tests try to trigger optmized edge cases."""
  853. def test_match_length_optimization(self):
  854. import re
  855. assert None == re.match("bla", "blub")
  856. def test_fast_search(self):
  857. import re
  858. assert None == re.search("bl", "abaub")
  859. assert None == re.search("bl", "b")
  860. assert ["bl", "bl"] == re.findall("bl", "blbl")
  861. assert ["a", "u"] == re.findall("bl(.)", "blablu")
  862. def test_branch_literal_shortcut(self):
  863. import re
  864. assert None == re.search("bl|a|c", "hello")
  865. def test_literal_search(self):
  866. import re
  867. assert re.search("b(\d)", "ababbbab1")
  868. assert None == re.search("b(\d)", "ababbbab")
  869. def test_repeat_one_literal_tail(self):
  870. import re
  871. assert re.search(".+ab", "wowowowawoabwowo")
  872. assert None == re.search(".+ab", "wowowaowowo")