PageRenderTime 109ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/rlib/rsre/rsre_re.py

https://bitbucket.org/yrttyr/pypy
Python | 322 lines | 320 code | 0 blank | 2 comment | 0 complexity | e9836d33e766a0e43f213b9e2e46c9bd MD5 | raw file
  1. """
  2. Testing code. This is not used in a PyPy translation.
  3. It exports the same interface as the Python 're' module.
  4. """
  5. import re, sys
  6. from pypy.rlib.rsre import rsre_core, rsre_char
  7. from pypy.rlib.rsre.test.test_match import get_code as _get_code
  8. from pypy.module.unicodedata import unicodedb
  9. from pypy.rlib.objectmodel import specialize
  10. rsre_char.set_unicode_db(unicodedb)
  11. I = IGNORECASE = re.I # ignore case
  12. L = LOCALE = re.L # assume current 8-bit locale
  13. U = UNICODE = re.U # assume unicode locale
  14. M = MULTILINE = re.M # make anchors look for newline
  15. S = DOTALL = re.S # make dot match newline
  16. X = VERBOSE = re.X # ignore whitespace and comments
  17. def match(pattern, string, flags=0):
  18. return compile(pattern, flags).match(string)
  19. def search(pattern, string, flags=0):
  20. return compile(pattern, flags).search(string)
  21. def findall(pattern, string, flags=0):
  22. return compile(pattern, flags).findall(string)
  23. def finditer(pattern, string, flags=0):
  24. return compile(pattern, flags).finditer(string)
  25. def sub(pattern, repl, string, count=0):
  26. return compile(pattern).sub(repl, string, count)
  27. def subn(pattern, repl, string, count=0):
  28. return compile(pattern).subn(repl, string, count)
  29. def split(pattern, string, maxsplit=0):
  30. return compile(pattern).split(string, maxsplit)
  31. @specialize.memo()
  32. def compile(pattern, flags=0):
  33. code, flags, args = _get_code(pattern, flags, allargs=True)
  34. return RSREPattern(pattern, code, flags, *args)
  35. escape = re.escape
  36. error = re.error
  37. class RSREPattern(object):
  38. def __init__(self, pattern, code, flags,
  39. num_groups, groupindex, indexgroup):
  40. self._code = code
  41. self.pattern = pattern
  42. self.flags = flags
  43. self.groups = num_groups
  44. self.groupindex = groupindex
  45. self._indexgroup = indexgroup
  46. def match(self, string, pos=0, endpos=sys.maxint):
  47. return self._make_match(rsre_core.match(self._code, string,
  48. pos, endpos,
  49. flags=self.flags))
  50. def search(self, string, pos=0, endpos=sys.maxint):
  51. return self._make_match(rsre_core.search(self._code, string,
  52. pos, endpos,
  53. flags=self.flags))
  54. def findall(self, string, pos=0, endpos=sys.maxint):
  55. matchlist = []
  56. for match in self.finditer(string, pos, endpos):
  57. if self.groups == 0 or self.groups == 1:
  58. item = match.group(self.groups)
  59. else:
  60. item = match.groups("")
  61. matchlist.append(item)
  62. return matchlist
  63. def finditer(self, string, pos=0, endpos=sys.maxint):
  64. return iter(self.scanner(string, pos, endpos).search, None)
  65. def subn(self, repl, string, count=0):
  66. filter = repl
  67. if not callable(repl) and "\\" in repl:
  68. # handle non-literal strings; hand it over to the template compiler
  69. filter = re._subx(self, repl)
  70. start = 0
  71. sublist = []
  72. force_unicode = (isinstance(string, unicode) or
  73. isinstance(repl, unicode))
  74. n = last_pos = 0
  75. while not count or n < count:
  76. match = rsre_core.search(self._code, string, start,
  77. flags=self.flags)
  78. if match is None:
  79. break
  80. if last_pos < match.match_start:
  81. sublist.append(string[last_pos:match.match_start])
  82. if not (last_pos == match.match_start
  83. == match.match_end and n > 0):
  84. # the above ignores empty matches on latest position
  85. if callable(filter):
  86. piece = filter(self._make_match(match))
  87. else:
  88. piece = filter
  89. sublist.append(piece)
  90. last_pos = match.match_end
  91. n += 1
  92. elif last_pos >= len(string):
  93. break # empty match at the end: finished
  94. #
  95. start = match.match_end
  96. if start == match.match_start:
  97. start += 1
  98. if last_pos < len(string):
  99. sublist.append(string[last_pos:])
  100. if n == 0:
  101. # not just an optimization -- see test_sub_unicode
  102. return string, n
  103. if force_unicode:
  104. item = u"".join(sublist)
  105. else:
  106. item = "".join(sublist)
  107. return item, n
  108. def sub(self, repl, string, count=0):
  109. item, n = self.subn(repl, string, count)
  110. return item
  111. def split(self, string, maxsplit=0):
  112. splitlist = []
  113. start = 0
  114. n = 0
  115. last = 0
  116. while not maxsplit or n < maxsplit:
  117. match = rsre_core.search(self._code, string, start,
  118. flags=self.flags)
  119. if match is None:
  120. break
  121. if match.match_start == match.match_end: # zero-width match
  122. if match.match_start == len(string): # at end of string
  123. break
  124. start = match.match_end + 1
  125. continue
  126. splitlist.append(string[last:match.match_start])
  127. # add groups (if any)
  128. if self.groups:
  129. match1 = self._make_match(match)
  130. splitlist.extend(match1.groups(None))
  131. n += 1
  132. last = start = match.match_end
  133. splitlist.append(string[last:])
  134. return splitlist
  135. def scanner(self, string, start=0, end=sys.maxint):
  136. return SREScanner(self, string, start, end)
  137. def _make_match(self, res):
  138. if res is None:
  139. return None
  140. return RSREMatch(self, res)
  141. class RSREMatch(object):
  142. def __init__(self, pattern, ctx):
  143. self.re = pattern
  144. self._ctx = ctx
  145. def span(self, groupnum=0):
  146. # if not isinstance(groupnum, (int, long)):
  147. # groupnum = self.re.groupindex[groupnum]
  148. return self._ctx.span(groupnum)
  149. def start(self, groupnum=0):
  150. return self.span(groupnum)[0]
  151. def end(self, groupnum=0):
  152. return self.span(groupnum)[1]
  153. def group(self, group=0):
  154. frm, to = self.span(group)
  155. if 0 <= frm <= to:
  156. return self._ctx._string[frm:to]
  157. else:
  158. return None
  159. # def group(self, *groups):
  160. # groups = groups or (0,)
  161. # result = []
  162. # for group in groups:
  163. # frm, to = self.span(group)
  164. # if 0 <= frm <= to:
  165. # result.append(self._ctx._string[frm:to])
  166. # else:
  167. # result.append(None)
  168. # if len(result) > 1:
  169. # return tuple(result)
  170. def groups(self, default=None):
  171. fmarks = self._ctx.flatten_marks()
  172. grps = []
  173. for i in range(1, self.re.groups+1):
  174. grp = self.group(i)
  175. if grp is None: grp = default
  176. grps.append(grp)
  177. return tuple(grps)
  178. def groupdict(self, default=None):
  179. d = {}
  180. for key, value in self.re.groupindex.iteritems():
  181. grp = self.group(value)
  182. if grp is None: grp = default
  183. d[key] = grp
  184. return d
  185. def expand(self, template):
  186. return re._expand(self.re, self, template)
  187. @property
  188. def regs(self):
  189. fmarks = self._ctx.flatten_marks()
  190. return tuple([(fmarks[i], fmarks[i+1])
  191. for i in range(0, len(fmarks), 2)])
  192. @property
  193. def lastindex(self):
  194. self._ctx.flatten_marks()
  195. if self._ctx.match_lastindex < 0:
  196. return None
  197. return self._ctx.match_lastindex // 2 + 1
  198. @property
  199. def lastgroup(self):
  200. lastindex = self.lastindex
  201. if lastindex < 0 or lastindex >= len(self.re._indexgroup):
  202. return None
  203. return self.re._indexgroup[lastindex]
  204. @property
  205. def string(self):
  206. return self._ctx._string
  207. @property
  208. def pos(self):
  209. return self._ctx.match_start
  210. @property
  211. def endpos(self):
  212. return self._ctx.end
  213. class SREScanner(object):
  214. def __init__(self, pattern, string, start, end):
  215. self.pattern = pattern
  216. self._string = string
  217. self._start = start
  218. self._end = end
  219. def _match_search(self, matcher):
  220. if self._start > len(self._string):
  221. return None
  222. match = matcher(self._string, self._start, self._end)
  223. if match is None:
  224. self._start += 1 # obscure corner case
  225. else:
  226. self._start = match.end()
  227. if match.start() == self._start:
  228. self._start += 1
  229. return match
  230. def match(self):
  231. return self._match_search(self.pattern.match)
  232. def search(self):
  233. return self._match_search(self.pattern.search)
  234. class Scanner:
  235. # This class is copied directly from re.py.
  236. def __init__(self, lexicon, flags=0):
  237. from sre_constants import BRANCH, SUBPATTERN
  238. import sre_parse
  239. self.lexicon = lexicon
  240. # combine phrases into a compound pattern
  241. p = []
  242. s = sre_parse.Pattern()
  243. s.flags = flags
  244. for phrase, action in lexicon:
  245. p.append(sre_parse.SubPattern(s, [
  246. (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
  247. ]))
  248. s.groups = len(p)+1
  249. p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
  250. self.scanner = compile(p)
  251. def scan(self, string):
  252. result = []
  253. append = result.append
  254. match = self.scanner.scanner(string).match
  255. i = 0
  256. while 1:
  257. m = match()
  258. if not m:
  259. break
  260. j = m.end()
  261. if i == j:
  262. break
  263. action = self.lexicon[m.lastindex-1][1]
  264. if callable(action):
  265. self.match = m
  266. action = action(self, m.group())
  267. if action is not None:
  268. append(action)
  269. i = j
  270. return result, string[i:]