PageRenderTime 78ms CodeModel.GetById 39ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/module/_sre/test/test_app_sre.py

https://bitbucket.org/mpavone/pypy
Python | 974 lines | 961 code | 1 blank | 12 comment | 1 complexity | d7578a43bf47f3cd225c92faaaa3a3d5 MD5 | raw file
  1. """Regular expression tests specific to _sre.py and accumulated during TDD."""
  2. import autopath
  3. import py
  4. from py.test import raises, skip
  5. from pypy.interpreter.gateway import app2interp_temp
  6. def init_app_test(cls, space):
  7. cls.w_s = space.appexec([space.wrap(autopath.this_dir)],
  8. """(this_dir):
  9. import sys
  10. # Uh-oh, ugly hack
  11. sys.path.insert(0, this_dir)
  12. try:
  13. import support_test_app_sre
  14. return support_test_app_sre
  15. finally:
  16. sys.path.pop(0)
  17. """)
  18. class AppTestSrePy:
  19. def test_magic(self):
  20. import _sre, sre_constants
  21. assert sre_constants.MAGIC == _sre.MAGIC
  22. def test_codesize(self):
  23. import _sre
  24. assert _sre.getcodesize() == _sre.CODESIZE
  25. class AppTestSrePattern:
  26. def test_copy(self):
  27. # copy support is disabled by default in _sre.c
  28. import re
  29. p = re.compile("b")
  30. raises(TypeError, p.__copy__) # p.__copy__() should raise
  31. raises(TypeError, p.__deepcopy__) # p.__deepcopy__() should raise
  32. def test_creation_attributes(self):
  33. import re
  34. pattern_string = "(b)l(?P<g>a)"
  35. p = re.compile(pattern_string, re.I | re.M)
  36. assert pattern_string == p.pattern
  37. assert re.I | re.M == p.flags
  38. assert 2 == p.groups
  39. assert {"g": 2} == p.groupindex
  40. def test_match_none(self):
  41. import re
  42. p = re.compile("bla")
  43. none_matches = ["b", "bl", "blub", "jupidu"]
  44. for string in none_matches:
  45. assert None == p.match(string)
  46. def test_pos_endpos(self):
  47. import re
  48. # XXX maybe fancier tests here
  49. p = re.compile("bl(a)")
  50. tests = [("abla", 0, 4), ("abla", 1, 4), ("ablaa", 1, 4)]
  51. for string, pos, endpos in tests:
  52. assert p.search(string, pos, endpos)
  53. tests = [("abla", 0, 3), ("abla", 2, 4)]
  54. for string, pos, endpos in tests:
  55. assert not p.search(string, pos, endpos)
  56. def test_findall(self):
  57. import re
  58. assert ["b"] == re.findall("b", "bla")
  59. assert ["a", "u"] == re.findall("b(.)", "abalbus")
  60. assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
  61. assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
  62. def test_finditer(self):
  63. import re
  64. it = re.finditer("b(.)", "brabbel")
  65. assert "br" == it.next().group(0)
  66. assert "bb" == it.next().group(0)
  67. raises(StopIteration, it.next)
  68. def test_split(self):
  69. import re
  70. assert ["a", "o", "u", ""] == re.split("b", "abobub")
  71. assert ["a", "o", "ub"] == re.split("b", "abobub", 2)
  72. assert ['', 'a', 'l', 'a', 'lla'] == re.split("b(a)", "balballa")
  73. assert ['', 'a', None, 'l', 'u', None, 'lla'] == (
  74. re.split("b([ua]|(s))", "balbulla"))
  75. def test_weakref(self):
  76. import re, _weakref
  77. _weakref.ref(re.compile(r""))
  78. class AppTestSreMatch:
  79. spaceconfig = dict(usemodules=('array', ))
  80. def test_copy(self):
  81. import re
  82. # copy support is disabled by default in _sre.c
  83. m = re.match("bla", "bla")
  84. raises(TypeError, m.__copy__)
  85. raises(TypeError, m.__deepcopy__)
  86. def test_match_attributes(self):
  87. import re
  88. c = re.compile("bla")
  89. m = c.match("blastring")
  90. assert "blastring" == m.string
  91. assert c == m.re
  92. assert 0 == m.pos
  93. assert 9 == m.endpos
  94. assert None == m.lastindex
  95. assert None == m.lastgroup
  96. assert ((0, 3),) == m.regs
  97. def test_match_attributes_with_groups(self):
  98. import re
  99. m = re.search("a(b)(?P<name>c)", "aabcd")
  100. assert 0 == m.pos
  101. assert 5 == m.endpos
  102. assert 2 == m.lastindex
  103. assert "name" == m.lastgroup
  104. assert ((1, 4), (2, 3), (3, 4)) == m.regs
  105. def test_regs_overlapping_groups(self):
  106. import re
  107. m = re.match("a((b)c)", "abc")
  108. assert ((0, 3), (1, 3), (1, 2)) == m.regs
  109. def test_start_end_span(self):
  110. import re
  111. m = re.search("a((b)c)", "aabcd")
  112. assert (1, 4) == (m.start(), m.end())
  113. assert (1, 4) == m.span()
  114. assert (2, 4) == (m.start(1), m.end(1))
  115. assert (2, 4) == m.span(1)
  116. assert (2, 3) == (m.start(2), m.end(2))
  117. assert (2, 3) == m.span(2)
  118. raises(IndexError, m.start, 3)
  119. raises(IndexError, m.end, 3)
  120. raises(IndexError, m.span, 3)
  121. raises(IndexError, m.start, -1)
  122. def test_groups(self):
  123. import re
  124. m = re.search("a((.).)", "aabcd")
  125. assert ("ab", "a") == m.groups()
  126. assert ("ab", "a") == m.groups(True)
  127. m = re.search("a((\d)|(\s))", "aa1b")
  128. assert ("1", "1", None) == m.groups()
  129. assert ("1", "1", True) == m.groups(True)
  130. m = re.search("a((\d)|(\s))", "a ")
  131. assert (" ", None, " ") == m.groups()
  132. m = re.match("(a)", "a")
  133. assert ("a",) == m.groups()
  134. def test_groupdict(self):
  135. import re
  136. m = re.search("a((.).)", "aabcd")
  137. assert {} == m.groupdict()
  138. m = re.search("a((?P<first>.).)", "aabcd")
  139. assert {"first": "a"} == m.groupdict()
  140. m = re.search("a((?P<first>\d)|(?P<second>\s))", "aa1b")
  141. assert {"first": "1", "second": None} == m.groupdict()
  142. assert {"first": "1", "second": True} == m.groupdict(True)
  143. def test_group(self):
  144. import re
  145. m = re.search("a((?P<first>\d)|(?P<second>\s))", "aa1b")
  146. assert "a1" == m.group()
  147. assert ("1", "1", None) == m.group(1, 2, 3)
  148. assert ("1", None) == m.group("first", "second")
  149. raises(IndexError, m.group, 1, 4)
  150. def test_expand(self):
  151. import re
  152. m = re.search("a(..)(?P<name>..)", "ab1bc")
  153. assert "b1bcbc" == m.expand(r"\1\g<name>\2")
  154. def test_sub(self):
  155. import re
  156. assert "bbbbb" == re.sub("a", "b", "ababa")
  157. assert ("bbbbb", 3) == re.subn("a", "b", "ababa")
  158. assert "dddd" == re.sub("[abc]", "d", "abcd")
  159. assert ("dddd", 3) == re.subn("[abc]", "d", "abcd")
  160. assert "rbd\nbr\n" == re.sub("a(.)", r"b\1\n", "radar")
  161. assert ("rbd\nbr\n", 2) == re.subn("a(.)", r"b\1\n", "radar")
  162. assert ("bbbba", 2) == re.subn("a", "b", "ababa", 2)
  163. def test_sub_unicode(self):
  164. import re
  165. assert isinstance(re.sub(u"a", u"b", u""), unicode)
  166. # the input is returned unmodified if no substitution is performed,
  167. # which (if interpreted literally, as CPython does) gives the
  168. # following strangeish rules:
  169. assert isinstance(re.sub(u"a", u"b", "diwoiioamoi"), unicode)
  170. assert isinstance(re.sub(u"a", u"b", "diwoiiobmoi"), str)
  171. assert isinstance(re.sub(u'x', 'y', 'x'), str)
  172. def test_sub_callable(self):
  173. import re
  174. def call_me(match):
  175. ret = ""
  176. for char in match.group():
  177. ret += chr(ord(char) + 1)
  178. return ret
  179. assert ("bbbbb", 3) == re.subn("a", call_me, "ababa")
  180. def test_sub_callable_returns_none(self):
  181. import re
  182. def call_me(match):
  183. return None
  184. assert "acd" == re.sub("b", call_me, "abcd")
  185. def test_sub_callable_suddenly_unicode(self):
  186. import re
  187. def call_me(match):
  188. if match.group() == 'A':
  189. return unichr(0x3039)
  190. return ''
  191. assert (u"bb\u3039b", 2) == re.subn("[aA]", call_me, "babAb")
  192. def test_sub_subclass_of_str(self):
  193. import re
  194. class MyString(str):
  195. pass
  196. class MyUnicode(unicode):
  197. pass
  198. s1 = MyString('zz')
  199. s2 = re.sub('aa', 'bb', s1)
  200. assert s2 == s1
  201. assert type(s2) is str # and not MyString
  202. s2 = re.sub(u'aa', u'bb', s1)
  203. assert s2 == s1
  204. assert type(s2) is str # and not MyString
  205. u1 = MyUnicode(u'zz')
  206. u2 = re.sub(u'aa', u'bb', u1)
  207. assert u2 == u1
  208. assert type(u2) is unicode # and not MyUnicode
  209. def test_match_array(self):
  210. import re, array
  211. a = array.array('c', 'hello')
  212. m = re.match('hel+', a)
  213. assert m.end() == 4
  214. def test_match_typeerror(self):
  215. import re
  216. raises(TypeError, re.match, 'hel+', list('hello'))
  217. def test_group_bugs(self):
  218. import re
  219. r = re.compile(r"""
  220. \&(?:
  221. (?P<escaped>\&) |
  222. (?P<named>[_a-z][_a-z0-9]*) |
  223. {(?P<braced>[_a-z][_a-z0-9]*)} |
  224. (?P<invalid>)
  225. )
  226. """, re.IGNORECASE | re.VERBOSE)
  227. matches = list(r.finditer('this &gift is for &{who} &&'))
  228. assert len(matches) == 3
  229. assert matches[0].groupdict() == {'escaped': None,
  230. 'named': 'gift',
  231. 'braced': None,
  232. 'invalid': None}
  233. assert matches[1].groupdict() == {'escaped': None,
  234. 'named': None,
  235. 'braced': 'who',
  236. 'invalid': None}
  237. assert matches[2].groupdict() == {'escaped': '&',
  238. 'named': None,
  239. 'braced': None,
  240. 'invalid': None}
  241. matches = list(r.finditer('&who likes &{what)')) # note the ')'
  242. assert len(matches) == 2
  243. assert matches[0].groupdict() == {'escaped': None,
  244. 'named': 'who',
  245. 'braced': None,
  246. 'invalid': None}
  247. assert matches[1].groupdict() == {'escaped': None,
  248. 'named': None,
  249. 'braced': None,
  250. 'invalid': ''}
  251. def test_sub_typecheck(self):
  252. import re
  253. KEYCRE = re.compile(r"%\(([^)]*)\)s|.")
  254. raises(TypeError, KEYCRE.sub, "hello", {"%(": 1})
  255. class AppTestSreScanner:
  256. def test_scanner_attributes(self):
  257. import re
  258. p = re.compile("bla")
  259. s = p.scanner("blablubla")
  260. assert p == s.pattern
  261. def test_scanner_match(self):
  262. import re
  263. p = re.compile(".").scanner("bla")
  264. assert ("b", "l", "a") == (p.match().group(0),
  265. p.match().group(0), p.match().group(0))
  266. assert None == p.match()
  267. def test_scanner_match_detail(self):
  268. import re
  269. p = re.compile("a").scanner("aaXaa")
  270. assert "a" == p.match().group(0)
  271. assert "a" == p.match().group(0)
  272. assert None == p.match()
  273. assert "a" == p.match().group(0)
  274. assert "a" == p.match().group(0)
  275. assert None == p.match()
  276. assert None == p.match()
  277. assert None == p.match()
  278. def test_scanner_search(self):
  279. import re
  280. p = re.compile("\d").scanner("bla23c5a")
  281. assert ("2", "3", "5") == (p.search().group(0),
  282. p.search().group(0), p.search().group(0))
  283. assert None == p.search()
  284. def test_scanner_zero_width_match(self):
  285. import re, sys
  286. if sys.version_info[:2] == (2, 3):
  287. skip("2.3 is different here")
  288. p = re.compile(".*").scanner("bla")
  289. assert ("bla", "") == (p.search().group(0), p.search().group(0))
  290. assert None == p.search()
  291. class AppTestGetlower:
  292. spaceconfig = dict(usemodules=('_locale',))
  293. def setup_class(cls):
  294. # This imports support_test_sre as the global "s"
  295. init_app_test(cls, cls.space)
  296. def setup_method(self, method):
  297. import locale
  298. locale.setlocale(locale.LC_ALL, (None, None))
  299. def teardown_method(self, method):
  300. import locale
  301. locale.setlocale(locale.LC_ALL, (None, None))
  302. def test_getlower_no_flags(self):
  303. s = self.s
  304. UPPER_AE = "\xc4"
  305. s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, UPPER_AE),
  306. (u"\u00c4", u"\u00c4"), (u"\u4444", u"\u4444")], 0)
  307. def test_getlower_locale(self):
  308. s = self.s
  309. import locale, sre_constants
  310. UPPER_AE = "\xc4"
  311. LOWER_AE = "\xe4"
  312. UPPER_PI = u"\u03a0"
  313. try:
  314. locale.setlocale(locale.LC_ALL, "de_DE")
  315. s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, LOWER_AE),
  316. (u"\u00c4", u"\u00e4"), (UPPER_PI, UPPER_PI)],
  317. sre_constants.SRE_FLAG_LOCALE)
  318. except locale.Error:
  319. # skip test
  320. skip("unsupported locale de_DE")
  321. def test_getlower_unicode(self):
  322. s = self.s
  323. import sre_constants
  324. UPPER_AE = "\xc4"
  325. LOWER_AE = "\xe4"
  326. UPPER_PI = u"\u03a0"
  327. LOWER_PI = u"\u03c0"
  328. s.assert_lower_equal([("a", "a"), ("A", "a"), (UPPER_AE, LOWER_AE),
  329. (u"\u00c4", u"\u00e4"), (UPPER_PI, LOWER_PI),
  330. (u"\u4444", u"\u4444")], sre_constants.SRE_FLAG_UNICODE)
  331. class AppTestSimpleSearches:
  332. def test_search_simple_literal(self):
  333. import re
  334. assert re.search("bla", "bla")
  335. assert re.search("bla", "blab")
  336. assert not re.search("bla", "blu")
  337. def test_search_simple_ats(self):
  338. import re
  339. assert re.search("^bla", "bla")
  340. assert re.search("^bla", "blab")
  341. assert not re.search("^bla", "bbla")
  342. assert re.search("bla$", "abla")
  343. assert re.search("bla$", "bla\n")
  344. assert not re.search("bla$", "blaa")
  345. def test_search_simple_boundaries(self):
  346. import re
  347. UPPER_PI = u"\u03a0"
  348. assert re.search(r"bla\b", "bla")
  349. assert re.search(r"bla\b", "bla ja")
  350. assert re.search(r"bla\b", u"bla%s" % UPPER_PI)
  351. assert not re.search(r"bla\b", "blano")
  352. assert not re.search(r"bla\b", u"bla%s" % UPPER_PI, re.UNICODE)
  353. def test_search_simple_categories(self):
  354. import re
  355. LOWER_PI = u"\u03c0"
  356. INDIAN_DIGIT = u"\u0966"
  357. EM_SPACE = u"\u2001"
  358. LOWER_AE = "\xe4"
  359. assert re.search(r"bla\d\s\w", "bla3 b")
  360. assert re.search(r"b\d", u"b%s" % INDIAN_DIGIT, re.UNICODE)
  361. assert not re.search(r"b\D", u"b%s" % INDIAN_DIGIT, re.UNICODE)
  362. assert re.search(r"b\s", u"b%s" % EM_SPACE, re.UNICODE)
  363. assert not re.search(r"b\S", u"b%s" % EM_SPACE, re.UNICODE)
  364. assert re.search(r"b\w", u"b%s" % LOWER_PI, re.UNICODE)
  365. assert not re.search(r"b\W", u"b%s" % LOWER_PI, re.UNICODE)
  366. assert re.search(r"b\w", "b%s" % LOWER_AE, re.UNICODE)
  367. def test_search_simple_any(self):
  368. import re
  369. assert re.search(r"b..a", "jboaas")
  370. assert not re.search(r"b..a", "jbo\nas")
  371. assert re.search(r"b..a", "jbo\nas", re.DOTALL)
  372. def test_search_simple_in(self):
  373. import re
  374. UPPER_PI = u"\u03a0"
  375. LOWER_PI = u"\u03c0"
  376. EM_SPACE = u"\u2001"
  377. LINE_SEP = u"\u2028"
  378. assert re.search(r"b[\da-z]a", "bb1a")
  379. assert re.search(r"b[\da-z]a", "bbsa")
  380. assert not re.search(r"b[\da-z]a", "bbSa")
  381. assert re.search(r"b[^okd]a", "bsa")
  382. assert not re.search(r"b[^okd]a", "bda")
  383. assert re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
  384. u"b%sa" % UPPER_PI) # bigcharset
  385. assert re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
  386. u"b%sa" % EM_SPACE)
  387. assert not re.search(u"b[%s%s%s]a" % (LOWER_PI, UPPER_PI, EM_SPACE),
  388. u"b%sa" % LINE_SEP)
  389. def test_search_simple_literal_ignore(self):
  390. import re
  391. UPPER_PI = u"\u03a0"
  392. LOWER_PI = u"\u03c0"
  393. assert re.search(r"ba", "ba", re.IGNORECASE)
  394. assert re.search(r"ba", "BA", re.IGNORECASE)
  395. assert re.search(u"b%s" % UPPER_PI, u"B%s" % LOWER_PI,
  396. re.IGNORECASE | re.UNICODE)
  397. def test_search_simple_in_ignore(self):
  398. import re
  399. UPPER_PI = u"\u03a0"
  400. LOWER_PI = u"\u03c0"
  401. assert re.search(r"ba[A-C]", "bac", re.IGNORECASE)
  402. assert re.search(r"ba[a-c]", "baB", re.IGNORECASE)
  403. assert re.search(u"ba[%s]" % UPPER_PI, "ba%s" % LOWER_PI,
  404. re.IGNORECASE | re.UNICODE)
  405. assert re.search(r"ba[^A-C]", "bar", re.IGNORECASE)
  406. assert not re.search(r"ba[^A-C]", "baA", re.IGNORECASE)
  407. assert not re.search(r"ba[^A-C]", "baa", re.IGNORECASE)
  408. def test_search_simple_branch(self):
  409. import re
  410. assert re.search(r"a(bb|d[ef])b", "adeb")
  411. assert re.search(r"a(bb|d[ef])b", "abbb")
  412. def test_search_simple_repeat_one(self):
  413. import re
  414. assert re.search(r"aa+", "aa") # empty tail
  415. assert re.search(r"aa+ab", "aaaab") # backtracking
  416. assert re.search(r"aa*ab", "aab") # empty match
  417. assert re.search(r"a[bc]+", "abbccb")
  418. assert "abbcb" == re.search(r"a.+b", "abbcb\nb").group()
  419. assert "abbcb\nb" == re.search(r"a.+b", "abbcb\nb", re.DOTALL).group()
  420. assert re.search(r"ab+c", "aBbBbBc", re.IGNORECASE)
  421. assert not re.search(r"aa{2,3}", "aa") # string too short
  422. assert not re.search(r"aa{2,3}b", "aab") # too few repetitions
  423. assert not re.search(r"aa+b", "aaaac") # tail doesn't match
  424. def test_search_simple_min_repeat_one(self):
  425. import re
  426. assert re.search(r"aa+?", "aa") # empty tail
  427. assert re.search(r"aa+?ab", "aaaab") # forward tracking
  428. assert re.search(r"a[bc]+?", "abbccb")
  429. assert "abb" == re.search(r"a.+?b", "abbcb\nb").group()
  430. assert "a\nbb" == re.search(r"a.+b", "a\nbbc", re.DOTALL).group()
  431. assert re.search(r"ab+?c", "aBbBbBc", re.IGNORECASE)
  432. assert not re.search(r"aa+?", "a") # string too short
  433. assert not re.search(r"aa{2,3}?b", "aab") # too few repetitions
  434. assert not re.search(r"aa+?b", "aaaac") # tail doesn't match
  435. assert re.match(".*?cd", "abcabcde").end(0) == 7
  436. def test_search_simple_repeat_maximizing(self):
  437. import re
  438. assert not re.search(r"(ab){3,5}", "abab")
  439. assert not re.search(r"(ab){3,5}", "ababa")
  440. assert re.search(r"(ab){3,5}", "ababab")
  441. assert re.search(r"(ab){3,5}", "abababababab").end(0) == 10
  442. assert "ad" == re.search(r"(a.)*", "abacad").group(1)
  443. assert ("abcg", "cg") == (
  444. re.search(r"(ab(c.)*)+", "ababcecfabcg").groups())
  445. assert ("cg", "cg") == (
  446. re.search(r"(ab|(c.))+", "abcg").groups())
  447. assert ("ab", "cf") == (
  448. re.search(r"((c.)|ab)+", "cfab").groups())
  449. assert re.search(r".*", "")
  450. def test_search_simple_repeat_minimizing(self):
  451. import re
  452. assert not re.search(r"(ab){3,5}?", "abab")
  453. assert re.search(r"(ab){3,5}?", "ababab")
  454. assert re.search(r"b(a){3,5}?b", "baaaaab")
  455. assert not re.search(r"b(a){3,5}?b", "baaaaaab")
  456. assert re.search(r"a(b(.)+?)*", "abdbebb")
  457. def test_search_simple_groupref(self):
  458. import re
  459. UPPER_PI = u"\u03a0"
  460. LOWER_PI = u"\u03c0"
  461. assert re.match(r"((ab)+)c\1", "ababcabab")
  462. assert not re.match(r"((ab)+)c\1", "ababcab")
  463. assert not re.search(r"(a|(b))\2", "aa")
  464. assert re.match(r"((ab)+)c\1", "aBAbcAbaB", re.IGNORECASE)
  465. assert re.match(r"((a.)+)c\1", u"a%sca%s" % (UPPER_PI, LOWER_PI),
  466. re.IGNORECASE | re.UNICODE)
  467. def test_search_simple_groupref_exists(self):
  468. import re, sys
  469. if not sys.version_info[:2] == (2, 3):
  470. assert re.search(r"(<)?bla(?(1)>)", "<bla>")
  471. assert re.search(r"(<)?bla(?(1)>)", "bla")
  472. assert not re.match(r"(<)?bla(?(1)>)", "<bla")
  473. assert re.search(r"(<)?bla(?(1)>|u)", "blau")
  474. def test_search_simple_assert(self):
  475. import re
  476. assert re.search(r"b(?=\d\d).{3,}", "b23a")
  477. assert not re.search(r"b(?=\d\d).{3,}", "b2aa")
  478. assert re.search(r"b(?<=\d.)a", "2ba")
  479. assert not re.search(r"b(?<=\d.)a", "ba")
  480. def test_search_simple_assert_not(self):
  481. import re
  482. assert re.search(r"b(?<!\d.)a", "aba")
  483. assert re.search(r"b(?<!\d.)a", "ba")
  484. assert not re.search(r"b(?<!\d.)a", "11ba")
  485. def test_bug_725149(self):
  486. # mark_stack_base restoring before restoring marks
  487. # test copied from CPython test
  488. import re
  489. assert re.match('(a)(?:(?=(b)*)c)*', 'abb').groups() == ('a', None)
  490. assert re.match('(a)((?!(b)*))*', 'abb').groups() == ('a', None, None)
  491. class AppTestMarksStack:
  492. def test_mark_stack_branch(self):
  493. import re
  494. m = re.match("b(.)a|b.b", "bob")
  495. assert None == m.group(1)
  496. assert None == m.lastindex
  497. def test_mark_stack_repeat_one(self):
  498. import re
  499. m = re.match("\d+1((2)|(3))4", "2212413")
  500. assert ("2", "2", None) == m.group(1, 2, 3)
  501. assert 1 == m.lastindex
  502. def test_mark_stack_min_repeat_one(self):
  503. import re
  504. m = re.match("\d+?1((2)|(3))44", "221341244")
  505. assert ("2", "2", None) == m.group(1, 2, 3)
  506. assert 1 == m.lastindex
  507. def test_mark_stack_max_until(self):
  508. import re
  509. m = re.match("(\d)+1((2)|(3))4", "2212413")
  510. assert ("2", "2", None) == m.group(2, 3, 4)
  511. assert 2 == m.lastindex
  512. def test_mark_stack_min_until(self):
  513. import re
  514. m = re.match("(\d)+?1((2)|(3))44", "221341244")
  515. assert ("2", "2", None) == m.group(2, 3, 4)
  516. assert 2 == m.lastindex
  517. class AppTestOpcodes:
  518. spaceconfig = dict(usemodules=('_locale',))
  519. def setup_class(cls):
  520. if cls.runappdirect:
  521. py.test.skip("can only be run on py.py: _sre opcodes don't match")
  522. # This imports support_test_sre as the global "s"
  523. init_app_test(cls, cls.space)
  524. def test_length_optimization(self):
  525. s = self.s
  526. pattern = "bla"
  527. opcodes = [s.OPCODES["info"], 3, 3, len(pattern)] \
  528. + s.encode_literal(pattern) + [s.OPCODES["success"]]
  529. s.assert_no_match(opcodes, ["b", "bl", "ab"])
  530. def test_literal(self):
  531. s = self.s
  532. opcodes = s.encode_literal("bla") + [s.OPCODES["success"]]
  533. s.assert_no_match(opcodes, ["bl", "blu"])
  534. s.assert_match(opcodes, ["bla", "blab", "cbla", "bbla"])
  535. def test_not_literal(self):
  536. s = self.s
  537. opcodes = s.encode_literal("b") \
  538. + [s.OPCODES["not_literal"], ord("a"), s.OPCODES["success"]]
  539. s.assert_match(opcodes, ["bx", "ababy"])
  540. s.assert_no_match(opcodes, ["ba", "jabadu"])
  541. def test_unknown(self):
  542. s = self.s
  543. raises(RuntimeError, s.search, [55555], "b")
  544. def test_at_beginning(self):
  545. s = self.s
  546. for atname in ["at_beginning", "at_beginning_string"]:
  547. opcodes = [s.OPCODES["at"], s.ATCODES[atname]] \
  548. + s.encode_literal("bla") + [s.OPCODES["success"]]
  549. s.assert_match(opcodes, "bla")
  550. s.assert_no_match(opcodes, "abla")
  551. def test_at_beginning_line(self):
  552. s = self.s
  553. opcodes = [s.OPCODES["at"], s.ATCODES["at_beginning_line"]] \
  554. + s.encode_literal("bla") + [s.OPCODES["success"]]
  555. s.assert_match(opcodes, ["bla", "x\nbla"])
  556. s.assert_no_match(opcodes, ["abla", "abla\nubla"])
  557. def test_at_end(self):
  558. s = self.s
  559. opcodes = s.encode_literal("bla") \
  560. + [s.OPCODES["at"], s.ATCODES["at_end"], s.OPCODES["success"]]
  561. s.assert_match(opcodes, ["bla", "bla\n"])
  562. s.assert_no_match(opcodes, ["blau", "abla\nblau"])
  563. def test_at_end_line(self):
  564. s = self.s
  565. opcodes = s.encode_literal("bla") \
  566. + [s.OPCODES["at"], s.ATCODES["at_end_line"], s.OPCODES["success"]]
  567. s.assert_match(opcodes, ["bla\n", "bla\nx", "bla"])
  568. s.assert_no_match(opcodes, ["blau"])
  569. def test_at_end_string(self):
  570. s = self.s
  571. opcodes = s.encode_literal("bla") \
  572. + [s.OPCODES["at"], s.ATCODES["at_end_string"], s.OPCODES["success"]]
  573. s.assert_match(opcodes, "bla")
  574. s.assert_no_match(opcodes, ["blau", "bla\n"])
  575. def test_at_boundary(self):
  576. s = self.s
  577. for atname in "at_boundary", "at_loc_boundary", "at_uni_boundary":
  578. opcodes = s.encode_literal("bla") \
  579. + [s.OPCODES["at"], s.ATCODES[atname], s.OPCODES["success"]]
  580. s.assert_match(opcodes, ["bla", "bla ha", "bla,x"])
  581. s.assert_no_match(opcodes, ["blaja", ""])
  582. opcodes = [s.OPCODES["at"], s.ATCODES[atname]] \
  583. + s.encode_literal("bla") + [s.OPCODES["success"]]
  584. s.assert_match(opcodes, "bla")
  585. s.assert_no_match(opcodes, "")
  586. def test_at_non_boundary(self):
  587. s = self.s
  588. for atname in "at_non_boundary", "at_loc_non_boundary", "at_uni_non_boundary":
  589. opcodes = s.encode_literal("bla") \
  590. + [s.OPCODES["at"], s.ATCODES[atname], s.OPCODES["success"]]
  591. s.assert_match(opcodes, "blan")
  592. s.assert_no_match(opcodes, ["bla ja", "bla"])
  593. def test_at_loc_boundary(self):
  594. s = self.s
  595. import locale
  596. try:
  597. s.void_locale()
  598. opcodes1 = s.encode_literal("bla") \
  599. + [s.OPCODES["at"], s.ATCODES["at_loc_boundary"], s.OPCODES["success"]]
  600. opcodes2 = s.encode_literal("bla") \
  601. + [s.OPCODES["at"], s.ATCODES["at_loc_non_boundary"], s.OPCODES["success"]]
  602. s.assert_match(opcodes1, "bla\xFC")
  603. s.assert_no_match(opcodes2, "bla\xFC")
  604. oldlocale = locale.setlocale(locale.LC_ALL)
  605. locale.setlocale(locale.LC_ALL, "de_DE")
  606. s.assert_no_match(opcodes1, "bla\xFC")
  607. s.assert_match(opcodes2, "bla\xFC")
  608. locale.setlocale(locale.LC_ALL, oldlocale)
  609. except locale.Error:
  610. # skip test
  611. skip("locale error")
  612. def test_at_uni_boundary(self):
  613. s = self.s
  614. UPPER_PI = u"\u03a0"
  615. LOWER_PI = u"\u03c0"
  616. opcodes = s.encode_literal("bl") + [s.OPCODES["any"], s.OPCODES["at"],
  617. s.ATCODES["at_uni_boundary"], s.OPCODES["success"]]
  618. s.assert_match(opcodes, ["bla ha", u"bl%s ja" % UPPER_PI])
  619. s.assert_no_match(opcodes, [u"bla%s" % LOWER_PI])
  620. opcodes = s.encode_literal("bl") + [s.OPCODES["any"], s.OPCODES["at"],
  621. s.ATCODES["at_uni_non_boundary"], s.OPCODES["success"]]
  622. s.assert_match(opcodes, ["blaha", u"bl%sja" % UPPER_PI])
  623. def test_category_loc_word(self):
  624. s = self.s
  625. import locale
  626. try:
  627. s.void_locale()
  628. opcodes1 = s.encode_literal("b") \
  629. + [s.OPCODES["category"], s.CHCODES["category_loc_word"], s.OPCODES["success"]]
  630. opcodes2 = s.encode_literal("b") \
  631. + [s.OPCODES["category"], s.CHCODES["category_loc_not_word"], s.OPCODES["success"]]
  632. s.assert_no_match(opcodes1, "b\xFC")
  633. s.assert_no_match(opcodes1, u"b\u00FC")
  634. s.assert_match(opcodes2, "b\xFC")
  635. locale.setlocale(locale.LC_ALL, "de_DE")
  636. s.assert_match(opcodes1, "b\xFC")
  637. s.assert_no_match(opcodes1, u"b\u00FC")
  638. s.assert_no_match(opcodes2, "b\xFC")
  639. s.void_locale()
  640. except locale.Error:
  641. # skip test
  642. skip("locale error")
  643. def test_any(self):
  644. s = self.s
  645. opcodes = s.encode_literal("b") + [s.OPCODES["any"]] \
  646. + s.encode_literal("a") + [s.OPCODES["success"]]
  647. s.assert_match(opcodes, ["b a", "bla", "bboas"])
  648. s.assert_no_match(opcodes, ["b\na", "oba", "b"])
  649. def test_any_all(self):
  650. s = self.s
  651. opcodes = s.encode_literal("b") + [s.OPCODES["any_all"]] \
  652. + s.encode_literal("a") + [s.OPCODES["success"]]
  653. s.assert_match(opcodes, ["b a", "bla", "bboas", "b\na"])
  654. s.assert_no_match(opcodes, ["oba", "b"])
  655. def test_in_failure(self):
  656. s = self.s
  657. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 2, s.OPCODES["failure"]] \
  658. + s.encode_literal("a") + [s.OPCODES["success"]]
  659. s.assert_no_match(opcodes, ["ba", "bla"])
  660. def test_in_literal(self):
  661. s = self.s
  662. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 7] \
  663. + s.encode_literal("la") + [s.OPCODES["failure"], s.OPCODES["failure"]] \
  664. + s.encode_literal("a") + [s.OPCODES["success"]]
  665. s.assert_match(opcodes, ["bla", "baa", "blbla"])
  666. s.assert_no_match(opcodes, ["ba", "bja", "blla"])
  667. def test_in_category(self):
  668. s = self.s
  669. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 6, s.OPCODES["category"],
  670. s.CHCODES["category_digit"], s.OPCODES["category"], s.CHCODES["category_space"],
  671. s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
  672. s.assert_match(opcodes, ["b1a", "b a", "b4b\tas"])
  673. s.assert_no_match(opcodes, ["baa", "b5"])
  674. def test_in_charset_ucs2(self):
  675. import _sre
  676. if _sre.CODESIZE != 2:
  677. return
  678. s = self.s
  679. # charset bitmap for characters "l" and "h"
  680. bitmap = 6 * [0] + [4352] + 9 * [0]
  681. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 19, s.OPCODES["charset"]] \
  682. + bitmap + [s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
  683. s.assert_match(opcodes, ["bla", "bha", "blbha"])
  684. s.assert_no_match(opcodes, ["baa", "bl"])
  685. def _test_in_bigcharset_ucs2(self):
  686. # disabled because this actually only works on big-endian machines
  687. if _sre.CODESIZE != 2:
  688. return
  689. s = self.s
  690. # constructing bigcharset for lowercase pi (\u03c0)
  691. UPPER_PI = u"\u03a0"
  692. LOWER_PI = u"\u03c0"
  693. bitmap = 6 * [0] + [4352] + 9 * [0]
  694. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 164, s.OPCODES["bigcharset"], 2] \
  695. + [0, 1] + 126 * [0] \
  696. + 16 * [0] \
  697. + 12 * [0] + [1] + 3 * [0] \
  698. + [s.OPCODES["failure"]] + s.encode_literal("a") + [s.OPCODES["success"]]
  699. s.assert_match(opcodes, [u"b%sa" % LOWER_PI])
  700. s.assert_no_match(opcodes, [u"b%sa" % UPPER_PI])
  701. # XXX bigcharset test for ucs4 missing here
  702. def test_in_range(self):
  703. s = self.s
  704. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 5, s.OPCODES["range"],
  705. ord("1"), ord("9"), s.OPCODES["failure"]] \
  706. + s.encode_literal("a") + [s.OPCODES["success"]]
  707. s.assert_match(opcodes, ["b1a", "b56b7aa"])
  708. s.assert_no_match(opcodes, ["baa", "b5"])
  709. def test_in_negate(self):
  710. s = self.s
  711. opcodes = s.encode_literal("b") + [s.OPCODES["in"], 7, s.OPCODES["negate"]] \
  712. + s.encode_literal("la") + [s.OPCODES["failure"]] \
  713. + s.encode_literal("a") + [s.OPCODES["success"]]
  714. s.assert_match(opcodes, ["b1a", "bja", "bubua"])
  715. s.assert_no_match(opcodes, ["bla", "baa", "blbla"])
  716. def test_literal_ignore(self):
  717. s = self.s
  718. opcodes = s.encode_literal("b") \
  719. + [s.OPCODES["literal_ignore"], ord("a"), s.OPCODES["success"]]
  720. s.assert_match(opcodes, ["ba", "bA"])
  721. s.assert_no_match(opcodes, ["bb", "bu"])
  722. def test_not_literal_ignore(self):
  723. s = self.s
  724. UPPER_PI = u"\u03a0"
  725. opcodes = s.encode_literal("b") \
  726. + [s.OPCODES["not_literal_ignore"], ord("a"), s.OPCODES["success"]]
  727. s.assert_match(opcodes, ["bb", "bu", u"b%s" % UPPER_PI])
  728. s.assert_no_match(opcodes, ["ba", "bA"])
  729. def test_in_ignore(self):
  730. s = self.s
  731. opcodes = s.encode_literal("b") + [s.OPCODES["in_ignore"], 8] \
  732. + s.encode_literal("abc") + [s.OPCODES["failure"]] \
  733. + s.encode_literal("a") + [s.OPCODES["success"]]
  734. s.assert_match(opcodes, ["baa", "bAa", "bbbBa"])
  735. s.assert_no_match(opcodes, ["ba", "bja", "blla"])
  736. def test_in_jump_info(self):
  737. s = self.s
  738. for opname in "jump", "info":
  739. opcodes = s.encode_literal("b") \
  740. + [s.OPCODES[opname], 3, s.OPCODES["failure"], s.OPCODES["failure"]] \
  741. + s.encode_literal("a") + [s.OPCODES["success"]]
  742. s.assert_match(opcodes, "ba")
  743. def _test_mark(self):
  744. s = self.s
  745. # XXX need to rewrite this implementation-independent
  746. opcodes = s.encode_literal("a") + [s.OPCODES["mark"], 0] \
  747. + s.encode_literal("b") + [s.OPCODES["mark"], 1, s.OPCODES["success"]]
  748. state = self.create_state("abc")
  749. _sre._sre_search(state, opcodes)
  750. assert 1 == state.lastindex
  751. assert 1 == state.lastmark
  752. # NB: the following are indexes from the start of the match
  753. assert [1, 2] == state.marks
  754. def test_branch(self):
  755. s = self.s
  756. opcodes = [s.OPCODES["branch"], 7] + s.encode_literal("ab") \
  757. + [s.OPCODES["jump"], 9, 7] + s.encode_literal("cd") \
  758. + [s.OPCODES["jump"], 2, s.OPCODES["failure"], s.OPCODES["success"]]
  759. s.assert_match(opcodes, ["ab", "cd"])
  760. s.assert_no_match(opcodes, ["aacas", "ac", "bla"])
  761. def test_repeat_one(self):
  762. s = self.s
  763. opcodes = [s.OPCODES["repeat_one"], 6, 1, 65535] + s.encode_literal("a") \
  764. + [s.OPCODES["success"]] + s.encode_literal("ab") + [s.OPCODES["success"]]
  765. s.assert_match(opcodes, ["aab", "aaaab"])
  766. s.assert_no_match(opcodes, ["ab", "a"])
  767. def test_min_repeat_one(self):
  768. s = self.s
  769. opcodes = [s.OPCODES["min_repeat_one"], 5, 1, 65535, s.OPCODES["any"]] \
  770. + [s.OPCODES["success"]] + s.encode_literal("b") + [s.OPCODES["success"]]
  771. s.assert_match(opcodes, ["aab", "ardb", "bb"])
  772. s.assert_no_match(opcodes, ["b"])
  773. def test_repeat_maximizing(self):
  774. s = self.s
  775. opcodes = [s.OPCODES["repeat"], 5, 1, 65535] + s.encode_literal("a") \
  776. + [s.OPCODES["max_until"]] + s.encode_literal("b") + [s.OPCODES["success"]]
  777. s.assert_match(opcodes, ["ab", "aaaab", "baabb"])
  778. s.assert_no_match(opcodes, ["aaa", "", "ac"])
  779. def test_max_until_zero_width_match(self):
  780. # re.compile won't compile prospective zero-with matches (all of them?),
  781. # so we can only produce an example by directly constructing bytecodes.
  782. # CPython 2.3 fails with a recursion limit exceeded error here.
  783. import sys
  784. if not sys.version_info[:2] == (2, 3):
  785. s = self.s
  786. opcodes = [s.OPCODES["repeat"], 10, 1, 65535, s.OPCODES["repeat_one"],
  787. 6, 0, 65535] + s.encode_literal("a") + [s.OPCODES["success"],
  788. s.OPCODES["max_until"], s.OPCODES["success"]]
  789. s.assert_match(opcodes, ["ab", "bb"])
  790. assert "" == s.search(opcodes, "bb").group(0)
  791. def test_repeat_minimizing(self):
  792. s = self.s
  793. opcodes = [s.OPCODES["repeat"], 4, 1, 65535, s.OPCODES["any"],
  794. s.OPCODES["min_until"]] + s.encode_literal("b") + [s.OPCODES["success"]]
  795. s.assert_match(opcodes, ["ab", "aaaab", "baabb"])
  796. s.assert_no_match(opcodes, ["b"])
  797. assert "aab" == s.search(opcodes, "aabb").group(0)
  798. def test_groupref(self):
  799. s = self.s
  800. opcodes = [s.OPCODES["mark"], 0, s.OPCODES["any"], s.OPCODES["mark"], 1] \
  801. + s.encode_literal("a") + [s.OPCODES["groupref"], 0, s.OPCODES["success"]]
  802. s.assert_match(opcodes, ["bab", "aaa", "dad"])
  803. s.assert_no_match(opcodes, ["ba", "bad", "baad"])
  804. def test_groupref_ignore(self):
  805. s = self.s
  806. opcodes = [s.OPCODES["mark"], 0, s.OPCODES["any"], s.OPCODES["mark"], 1] \
  807. + s.encode_literal("a") + [s.OPCODES["groupref_ignore"], 0, s.OPCODES["success"]]
  808. s.assert_match(opcodes, ["bab", "baB", "Dad"])
  809. s.assert_no_match(opcodes, ["ba", "bad", "baad"])
  810. def test_assert(self):
  811. s = self.s
  812. opcodes = s.encode_literal("a") + [s.OPCODES["assert"], 4, 0] \
  813. + s.encode_literal("b") + [s.OPCODES["success"], s.OPCODES["success"]]
  814. assert "a" == s.search(opcodes, "ab").group(0)
  815. s.assert_no_match(opcodes, ["a", "aa"])
  816. def test_assert_not(self):
  817. s = self.s
  818. opcodes = s.encode_literal("a") + [s.OPCODES["assert_not"], 4, 0] \
  819. + s.encode_literal("b") + [s.OPCODES["success"], s.OPCODES["success"]]
  820. assert "a" == s.search(opcodes, "ac").group(0)
  821. s.assert_match(opcodes, ["a"])
  822. s.assert_no_match(opcodes, ["ab"])
  823. def test_bug(self):
  824. import re
  825. assert re.sub('=\w{2}', 'x', '=CA') == 'x'
  826. class AppTestOptimizations:
  827. """These tests try to trigger optmized edge cases."""
  828. def test_match_length_optimization(self):
  829. import re
  830. assert None == re.match("bla", "blub")
  831. def test_fast_search(self):
  832. import re
  833. assert None == re.search("bl", "abaub")
  834. assert None == re.search("bl", "b")
  835. assert ["bl", "bl"] == re.findall("bl", "blbl")
  836. assert ["a", "u"] == re.findall("bl(.)", "blablu")
  837. def test_branch_literal_shortcut(self):
  838. import re
  839. assert None == re.search("bl|a|c", "hello")
  840. def test_literal_search(self):
  841. import re
  842. assert re.search("b(\d)", "ababbbab1")
  843. assert None == re.search("b(\d)", "ababbbab")
  844. def test_repeat_one_literal_tail(self):
  845. import re
  846. assert re.search(".+ab", "wowowowawoabwowo")
  847. assert None == re.search(".+ab", "wowowaowowo")