PageRenderTime 58ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/rlib/rsre/test/test_match.py

https://bitbucket.org/pjenvey/pypy-mq
Python | 301 lines | 261 code | 34 blank | 6 comment | 6 complexity | bf5de1f3d2d292326d357ce176a403ae MD5 | raw file
Possible License(s): Apache-2.0, AGPL-3.0, BSD-3-Clause
  1. import re, random, py
  2. from rpython.rlib.rsre import rsre_core
  3. from rpython.rlib.rsre.rpy import get_code, VERSION
  4. def get_code_and_re(regexp):
  5. return get_code(regexp), re.compile(regexp)
  6. def test_get_code_repetition():
  7. c1 = get_code(r"a+")
  8. c2 = get_code(r"a+")
  9. assert c1 == c2
  10. class TestMatch:
  11. def test_or(self):
  12. r = get_code(r"a|bc|def")
  13. assert rsre_core.match(r, "a")
  14. assert rsre_core.match(r, "bc")
  15. assert rsre_core.match(r, "def")
  16. assert not rsre_core.match(r, "ghij")
  17. def test_any(self):
  18. r = get_code(r"ab.cd")
  19. assert rsre_core.match(r, "abXcdef")
  20. assert not rsre_core.match(r, "ab\ncdef")
  21. assert not rsre_core.match(r, "abXcDef")
  22. def test_any_repetition(self):
  23. r = get_code(r"ab.*cd")
  24. assert rsre_core.match(r, "abXXXXcdef")
  25. assert rsre_core.match(r, "abcdef")
  26. assert not rsre_core.match(r, "abX\nXcdef")
  27. assert not rsre_core.match(r, "abXXXXcDef")
  28. def test_any_all(self):
  29. r = get_code(r"(?s)ab.cd")
  30. assert rsre_core.match(r, "abXcdef")
  31. assert rsre_core.match(r, "ab\ncdef")
  32. assert not rsre_core.match(r, "ab\ncDef")
  33. def test_any_all_repetition(self):
  34. r = get_code(r"(?s)ab.*cd")
  35. assert rsre_core.match(r, "abXXXXcdef")
  36. assert rsre_core.match(r, "abcdef")
  37. assert rsre_core.match(r, "abX\nXcdef")
  38. assert not rsre_core.match(r, "abX\nXcDef")
  39. def test_assert(self):
  40. r = get_code(r"abc(?=def)(.)")
  41. res = rsre_core.match(r, "abcdefghi")
  42. assert res is not None and res.get_mark(1) == 4
  43. assert not rsre_core.match(r, "abcdeFghi")
  44. def test_assert_not(self):
  45. r = get_code(r"abc(?!def)(.)")
  46. res = rsre_core.match(r, "abcdeFghi")
  47. assert res is not None and res.get_mark(1) == 4
  48. assert not rsre_core.match(r, "abcdefghi")
  49. def test_lookbehind(self):
  50. r = get_code(r"([a-z]*)(?<=de)")
  51. assert rsre_core.match(r, "ade")
  52. res = rsre_core.match(r, "adefg")
  53. assert res is not None and res.get_mark(1) == 3
  54. assert not rsre_core.match(r, "abc")
  55. assert not rsre_core.match(r, "X")
  56. assert not rsre_core.match(r, "eX")
  57. def test_negative_lookbehind(self):
  58. def found(s):
  59. res = rsre_core.match(r, s)
  60. assert res is not None
  61. return res.get_mark(1)
  62. r = get_code(r"([a-z]*)(?<!dd)")
  63. assert found("ade") == 3
  64. assert found("adefg") == 5
  65. assert found("abcdd") == 4
  66. assert found("abddd") == 3
  67. assert found("adddd") == 2
  68. assert found("ddddd") == 1
  69. assert found("abXde") == 2
  70. def test_at(self):
  71. r = get_code(r"abc$")
  72. assert rsre_core.match(r, "abc")
  73. assert not rsre_core.match(r, "abcd")
  74. assert not rsre_core.match(r, "ab")
  75. def test_repeated_set(self):
  76. r = get_code(r"[a0x]+f")
  77. assert rsre_core.match(r, "a0af")
  78. assert not rsre_core.match(r, "a0yaf")
  79. def test_category(self):
  80. r = get_code(r"[\sx]")
  81. assert rsre_core.match(r, "x")
  82. assert rsre_core.match(r, " ")
  83. assert not rsre_core.match(r, "n")
  84. def test_groupref(self):
  85. r = get_code(r"(xx+)\1+$") # match non-prime numbers of x
  86. assert not rsre_core.match(r, "xx")
  87. assert not rsre_core.match(r, "xxx")
  88. assert rsre_core.match(r, "xxxx")
  89. assert not rsre_core.match(r, "xxxxx")
  90. assert rsre_core.match(r, "xxxxxx")
  91. assert not rsre_core.match(r, "xxxxxxx")
  92. assert rsre_core.match(r, "xxxxxxxx")
  93. assert rsre_core.match(r, "xxxxxxxxx")
  94. def test_groupref_ignore(self):
  95. r = get_code(r"(?i)(xx+)\1+$") # match non-prime numbers of x
  96. assert not rsre_core.match(r, "xX")
  97. assert not rsre_core.match(r, "xxX")
  98. assert rsre_core.match(r, "Xxxx")
  99. assert not rsre_core.match(r, "xxxXx")
  100. assert rsre_core.match(r, "xXxxxx")
  101. assert not rsre_core.match(r, "xxxXxxx")
  102. assert rsre_core.match(r, "xxxxxxXx")
  103. assert rsre_core.match(r, "xxxXxxxxx")
  104. def test_groupref_exists(self):
  105. r = get_code(r"((a)|(b))c(?(2)d)$")
  106. assert not rsre_core.match(r, "ac")
  107. assert rsre_core.match(r, "acd")
  108. assert rsre_core.match(r, "bc")
  109. assert not rsre_core.match(r, "bcd")
  110. #
  111. r = get_code(r"((a)|(b))c(?(2)d|e)$")
  112. assert not rsre_core.match(r, "ac")
  113. assert rsre_core.match(r, "acd")
  114. assert not rsre_core.match(r, "ace")
  115. assert not rsre_core.match(r, "bc")
  116. assert not rsre_core.match(r, "bcd")
  117. assert rsre_core.match(r, "bce")
  118. def test_in_ignore(self):
  119. r = get_code(r"(?i)[a-f]")
  120. assert rsre_core.match(r, "b")
  121. assert rsre_core.match(r, "C")
  122. assert not rsre_core.match(r, "g")
  123. r = get_code(r"(?i)[a-f]+$")
  124. assert rsre_core.match(r, "bCdEf")
  125. assert not rsre_core.match(r, "g")
  126. assert not rsre_core.match(r, "aaagaaa")
  127. def test_not_literal(self):
  128. r = get_code(r"[^a]")
  129. assert rsre_core.match(r, "A")
  130. assert not rsre_core.match(r, "a")
  131. r = get_code(r"[^a]+$")
  132. assert rsre_core.match(r, "Bx123")
  133. assert not rsre_core.match(r, "--a--")
  134. def test_not_literal_ignore(self):
  135. r = get_code(r"(?i)[^a]")
  136. assert rsre_core.match(r, "G")
  137. assert not rsre_core.match(r, "a")
  138. assert not rsre_core.match(r, "A")
  139. r = get_code(r"(?i)[^a]+$")
  140. assert rsre_core.match(r, "Gx123")
  141. assert not rsre_core.match(r, "--A--")
  142. def test_repeated_single_character_pattern(self):
  143. r = get_code(r"foo(?:(?<=foo)x)+$")
  144. assert rsre_core.match(r, "foox")
  145. def test_flatten_marks(self):
  146. r = get_code(r"a(b)c((d)(e))+$")
  147. res = rsre_core.match(r, "abcdedede")
  148. assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
  149. assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
  150. def test_bug1(self):
  151. # REPEAT_ONE inside REPEAT
  152. r = get_code(r"(?:.+)?B")
  153. assert rsre_core.match(r, "AB") is not None
  154. r = get_code(r"(?:AA+?)+B")
  155. assert rsre_core.match(r, "AAAB") is not None
  156. r = get_code(r"(?:AA+)+?B")
  157. assert rsre_core.match(r, "AAAB") is not None
  158. r = get_code(r"(?:AA+?)+?B")
  159. assert rsre_core.match(r, "AAAB") is not None
  160. # REPEAT inside REPEAT
  161. r = get_code(r"(?:(?:xy)+)?B")
  162. assert rsre_core.match(r, "xyB") is not None
  163. r = get_code(r"(?:xy(?:xy)+?)+B")
  164. assert rsre_core.match(r, "xyxyxyB") is not None
  165. r = get_code(r"(?:xy(?:xy)+)+?B")
  166. assert rsre_core.match(r, "xyxyxyB") is not None
  167. r = get_code(r"(?:xy(?:xy)+?)+?B")
  168. assert rsre_core.match(r, "xyxyxyB") is not None
  169. def test_assert_group(self):
  170. r = get_code(r"abc(?=(..)f)(.)")
  171. res = rsre_core.match(r, "abcdefghi")
  172. assert res is not None
  173. assert res.span(2) == (3, 4)
  174. assert res.span(1) == (3, 5)
  175. def test_assert_not_group(self):
  176. r = get_code(r"abc(?!(de)f)(.)")
  177. res = rsre_core.match(r, "abcdeFghi")
  178. assert res is not None
  179. assert res.span(2) == (3, 4)
  180. # this I definitely classify as Horrendously Implementation Dependent.
  181. # CPython answers (3, 5).
  182. assert res.span(1) == (-1, -1)
  183. def test_match_start(self):
  184. r = get_code(r"^ab")
  185. assert rsre_core.match(r, "abc")
  186. assert not rsre_core.match(r, "xxxabc", start=3)
  187. assert not rsre_core.match(r, "xx\nabc", start=3)
  188. #
  189. r = get_code(r"(?m)^ab")
  190. assert rsre_core.match(r, "abc")
  191. assert not rsre_core.match(r, "xxxabc", start=3)
  192. assert rsre_core.match(r, "xx\nabc", start=3)
  193. def test_match_end(self):
  194. r = get_code("ab")
  195. assert rsre_core.match(r, "abc")
  196. assert rsre_core.match(r, "abc", end=333)
  197. assert rsre_core.match(r, "abc", end=3)
  198. assert rsre_core.match(r, "abc", end=2)
  199. assert not rsre_core.match(r, "abc", end=1)
  200. assert not rsre_core.match(r, "abc", end=0)
  201. assert not rsre_core.match(r, "abc", end=-1)
  202. def test_match_bug1(self):
  203. r = get_code(r'(x??)?$')
  204. assert rsre_core.match(r, "x")
  205. def test_match_bug2(self):
  206. r = get_code(r'(x??)??$')
  207. assert rsre_core.match(r, "x")
  208. def test_match_bug3(self):
  209. if VERSION == "2.7.5":
  210. py.test.skip("pattern fails to compile with exactly 2.7.5 "
  211. "(works on 2.7.3 and on 2.7.trunk though)")
  212. r = get_code(r'([ax]*?x*)?$')
  213. assert rsre_core.match(r, "aaxaa")
  214. def test_bigcharset(self):
  215. for i in range(100):
  216. chars = [unichr(random.randrange(0x100, 0xD000))
  217. for n in range(random.randrange(1, 25))]
  218. pattern = u'[%s]' % (u''.join(chars),)
  219. r = get_code(pattern)
  220. for c in chars:
  221. assert rsre_core.match(r, c)
  222. for i in range(200):
  223. c = unichr(random.randrange(0x0, 0xD000))
  224. res = rsre_core.match(r, c)
  225. if c in chars:
  226. assert res is not None
  227. else:
  228. assert res is None
  229. def test_simple_match_1(self):
  230. r = get_code(r"ab*bbbbbbbc")
  231. print r
  232. match = rsre_core.match(r, "abbbbbbbbbcdef")
  233. assert match
  234. assert match.match_end == 11
  235. def test_empty_maxuntil(self):
  236. r = get_code("\\{\\{((?:.*?)+)\\}\\}")
  237. match = rsre_core.match(r, "{{a}}{{b}}")
  238. assert match.group(1) == "a"
  239. def test_fullmatch_1(self):
  240. r = get_code(r"ab*c")
  241. assert not rsre_core.fullmatch(r, "abbbcdef")
  242. assert rsre_core.fullmatch(r, "abbbc")
  243. def test_fullmatch_2(self):
  244. r = get_code(r"a(b*?)")
  245. match = rsre_core.fullmatch(r, "abbb")
  246. assert match.group(1) == "bbb"
  247. assert not rsre_core.fullmatch(r, "abbbc")
  248. def test_fullmatch_3(self):
  249. r = get_code(r"a((bp)*?)c")
  250. match = rsre_core.fullmatch(r, "abpbpbpc")
  251. assert match.group(1) == "bpbpbp"
  252. def test_fullmatch_4(self):
  253. r = get_code(r"a((bp)*)c")
  254. match = rsre_core.fullmatch(r, "abpbpbpc")
  255. assert match.group(1) == "bpbpbp"
  256. def test_fullmatch_assertion(self):
  257. r = get_code(r"(?=a).b")
  258. assert rsre_core.fullmatch(r, "ab")
  259. r = get_code(r"(?!a)..")
  260. assert not rsre_core.fullmatch(r, "ab")