PageRenderTime 25ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/rlib/rsre/rsre_re.py

https://bitbucket.org/pwaller/pypy
Python | 320 lines | 318 code | 0 blank | 2 comment | 0 complexity | 018779154488750a6ad33c9404e81e11 MD5 | raw file
  1. """
  2. Testing code. This is not used in a PyPy translation.
  3. It exports the same interface as the Python 're' module.
  4. """
  5. import re, sys
  6. from pypy.rlib.rsre import rsre_core, rsre_char
  7. from pypy.rlib.rsre.test.test_match import get_code as _get_code
  8. from pypy.module.unicodedata import unicodedb
  9. rsre_char.set_unicode_db(unicodedb)
  10. I = IGNORECASE = re.I # ignore case
  11. L = LOCALE = re.L # assume current 8-bit locale
  12. U = UNICODE = re.U # assume unicode locale
  13. M = MULTILINE = re.M # make anchors look for newline
  14. S = DOTALL = re.S # make dot match newline
  15. X = VERBOSE = re.X # ignore whitespace and comments
  16. def match(pattern, string, flags=0):
  17. return compile(pattern, flags).match(string)
  18. def search(pattern, string, flags=0):
  19. return compile(pattern, flags).search(string)
  20. def findall(pattern, string, flags=0):
  21. return compile(pattern, flags).findall(string)
  22. def finditer(pattern, string, flags=0):
  23. return compile(pattern, flags).finditer(string)
  24. def sub(pattern, repl, string, count=0):
  25. return compile(pattern).sub(repl, string, count)
  26. def subn(pattern, repl, string, count=0):
  27. return compile(pattern).subn(repl, string, count)
  28. def split(pattern, string, maxsplit=0):
  29. return compile(pattern).split(string, maxsplit)
  30. def compile(pattern, flags=0):
  31. code, flags, args = _get_code(pattern, flags, allargs=True)
  32. return RSREPattern(pattern, code, flags, *args)
  33. escape = re.escape
  34. error = re.error
  35. class RSREPattern(object):
  36. def __init__(self, pattern, code, flags,
  37. num_groups, groupindex, indexgroup):
  38. self._code = code
  39. self.pattern = pattern
  40. self.flags = flags
  41. self.groups = num_groups
  42. self.groupindex = groupindex
  43. self._indexgroup = indexgroup
  44. def match(self, string, pos=0, endpos=sys.maxint):
  45. return self._make_match(rsre_core.match(self._code, string,
  46. pos, endpos,
  47. flags=self.flags))
  48. def search(self, string, pos=0, endpos=sys.maxint):
  49. return self._make_match(rsre_core.search(self._code, string,
  50. pos, endpos,
  51. flags=self.flags))
  52. def findall(self, string, pos=0, endpos=sys.maxint):
  53. matchlist = []
  54. for match in self.finditer(string, pos, endpos):
  55. if self.groups == 0 or self.groups == 1:
  56. item = match.group(self.groups)
  57. else:
  58. item = match.groups("")
  59. matchlist.append(item)
  60. return matchlist
  61. def finditer(self, string, pos=0, endpos=sys.maxint):
  62. return iter(self.scanner(string, pos, endpos).search, None)
  63. def subn(self, repl, string, count=0):
  64. filter = repl
  65. if not callable(repl) and "\\" in repl:
  66. # handle non-literal strings; hand it over to the template compiler
  67. filter = re._subx(self, repl)
  68. start = 0
  69. sublist = []
  70. force_unicode = (isinstance(string, unicode) or
  71. isinstance(repl, unicode))
  72. n = last_pos = 0
  73. while not count or n < count:
  74. match = rsre_core.search(self._code, string, start,
  75. flags=self.flags)
  76. if match is None:
  77. break
  78. if last_pos < match.match_start:
  79. sublist.append(string[last_pos:match.match_start])
  80. if not (last_pos == match.match_start
  81. == match.match_end and n > 0):
  82. # the above ignores empty matches on latest position
  83. if callable(filter):
  84. piece = filter(self._make_match(match))
  85. else:
  86. piece = filter
  87. sublist.append(piece)
  88. last_pos = match.match_end
  89. n += 1
  90. elif last_pos >= len(string):
  91. break # empty match at the end: finished
  92. #
  93. start = match.match_end
  94. if start == match.match_start:
  95. start += 1
  96. if last_pos < len(string):
  97. sublist.append(string[last_pos:])
  98. if n == 0:
  99. # not just an optimization -- see test_sub_unicode
  100. return string, n
  101. if force_unicode:
  102. item = u"".join(sublist)
  103. else:
  104. item = "".join(sublist)
  105. return item, n
  106. def sub(self, repl, string, count=0):
  107. item, n = self.subn(repl, string, count)
  108. return item
  109. def split(self, string, maxsplit=0):
  110. splitlist = []
  111. start = 0
  112. n = 0
  113. last = 0
  114. while not maxsplit or n < maxsplit:
  115. match = rsre_core.search(self._code, string, start,
  116. flags=self.flags)
  117. if match is None:
  118. break
  119. if match.match_start == match.match_end: # zero-width match
  120. if match.match_start == len(string): # at end of string
  121. break
  122. start = match.match_end + 1
  123. continue
  124. splitlist.append(string[last:match.match_start])
  125. # add groups (if any)
  126. if self.groups:
  127. match1 = self._make_match(match)
  128. splitlist.extend(match1.groups(None))
  129. n += 1
  130. last = start = match.match_end
  131. splitlist.append(string[last:])
  132. return splitlist
  133. def scanner(self, string, start=0, end=sys.maxint):
  134. return SREScanner(self, string, start, end)
  135. def _make_match(self, res):
  136. if res is None:
  137. return None
  138. return RSREMatch(self, res)
  139. class RSREMatch(object):
  140. def __init__(self, pattern, ctx):
  141. self.re = pattern
  142. self._ctx = ctx
  143. def span(self, groupnum=0):
  144. # if not isinstance(groupnum, (int, long)):
  145. # groupnum = self.re.groupindex[groupnum]
  146. return self._ctx.span(groupnum)
  147. def start(self, groupnum=0):
  148. return self.span(groupnum)[0]
  149. def end(self, groupnum=0):
  150. return self.span(groupnum)[1]
  151. def group(self, group=0):
  152. frm, to = self.span(group)
  153. if 0 <= frm <= to:
  154. return self._ctx._string[frm:to]
  155. else:
  156. return None
  157. # def group(self, *groups):
  158. # groups = groups or (0,)
  159. # result = []
  160. # for group in groups:
  161. # frm, to = self.span(group)
  162. # if 0 <= frm <= to:
  163. # result.append(self._ctx._string[frm:to])
  164. # else:
  165. # result.append(None)
  166. # if len(result) > 1:
  167. # return tuple(result)
  168. def groups(self, default=None):
  169. fmarks = self._ctx.flatten_marks()
  170. grps = []
  171. for i in range(1, self.re.groups+1):
  172. grp = self.group(i)
  173. if grp is None: grp = default
  174. grps.append(grp)
  175. return tuple(grps)
  176. def groupdict(self, default=None):
  177. d = {}
  178. for key, value in self.re.groupindex.iteritems():
  179. grp = self.group(value)
  180. if grp is None: grp = default
  181. d[key] = grp
  182. return d
  183. def expand(self, template):
  184. return re._expand(self.re, self, template)
  185. @property
  186. def regs(self):
  187. fmarks = self._ctx.flatten_marks()
  188. return tuple([(fmarks[i], fmarks[i+1])
  189. for i in range(0, len(fmarks), 2)])
  190. @property
  191. def lastindex(self):
  192. self._ctx.flatten_marks()
  193. if self._ctx.match_lastindex < 0:
  194. return None
  195. return self._ctx.match_lastindex // 2 + 1
  196. @property
  197. def lastgroup(self):
  198. lastindex = self.lastindex
  199. if lastindex < 0 or lastindex >= len(self.re._indexgroup):
  200. return None
  201. return self.re._indexgroup[lastindex]
  202. @property
  203. def string(self):
  204. return self._ctx._string
  205. @property
  206. def pos(self):
  207. return self._ctx.match_start
  208. @property
  209. def endpos(self):
  210. return self._ctx.end
  211. class SREScanner(object):
  212. def __init__(self, pattern, string, start, end):
  213. self.pattern = pattern
  214. self._string = string
  215. self._start = start
  216. self._end = end
  217. def _match_search(self, matcher):
  218. if self._start > len(self._string):
  219. return None
  220. match = matcher(self._string, self._start, self._end)
  221. if match is None:
  222. self._start += 1 # obscure corner case
  223. else:
  224. self._start = match.end()
  225. if match.start() == self._start:
  226. self._start += 1
  227. return match
  228. def match(self):
  229. return self._match_search(self.pattern.match)
  230. def search(self):
  231. return self._match_search(self.pattern.search)
  232. class Scanner:
  233. # This class is copied directly from re.py.
  234. def __init__(self, lexicon, flags=0):
  235. from sre_constants import BRANCH, SUBPATTERN
  236. import sre_parse
  237. self.lexicon = lexicon
  238. # combine phrases into a compound pattern
  239. p = []
  240. s = sre_parse.Pattern()
  241. s.flags = flags
  242. for phrase, action in lexicon:
  243. p.append(sre_parse.SubPattern(s, [
  244. (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
  245. ]))
  246. s.groups = len(p)+1
  247. p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
  248. self.scanner = compile(p)
  249. def scan(self, string):
  250. result = []
  251. append = result.append
  252. match = self.scanner.scanner(string).match
  253. i = 0
  254. while 1:
  255. m = match()
  256. if not m:
  257. break
  258. j = m.end()
  259. if i == j:
  260. break
  261. action = self.lexicon[m.lastindex-1][1]
  262. if callable(action):
  263. self.match = m
  264. action = action(self, m.group())
  265. if action is not None:
  266. append(action)
  267. i = j
  268. return result, string[i:]