PageRenderTime 147ms CodeModel.GetById 40ms RepoModel.GetById 5ms app.codeStats 0ms

/pypy/module/_sre/interp_sre.py

https://bitbucket.org/pypy/pypy/
Python | 654 lines | 613 code | 26 blank | 15 comment | 29 complexity | a16220492973c5c042472e8786cfa40c MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. import sys
  2. from pypy.interpreter.baseobjspace import W_Root
  3. from pypy.interpreter.typedef import GetSetProperty, TypeDef
  4. from pypy.interpreter.typedef import interp_attrproperty, interp_attrproperty_w
  5. from pypy.interpreter.typedef import make_weakref_descr
  6. from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
  7. from pypy.interpreter.error import OperationError, oefmt
  8. from rpython.rlib.rarithmetic import intmask
  9. from rpython.rlib import jit
  10. from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
  11. # ____________________________________________________________
  12. #
  13. # Constants and exposed functions
  14. from rpython.rlib.rsre import rsre_core
  15. from rpython.rlib.rsre.rsre_char import MAGIC, CODESIZE, MAXREPEAT, getlower, set_unicode_db
  16. @unwrap_spec(char_ord=int, flags=int)
  17. def w_getlower(space, char_ord, flags):
  18. return space.wrap(getlower(char_ord, flags))
  19. def w_getcodesize(space):
  20. return space.wrap(CODESIZE)
  21. # use the same version of unicodedb as the standard objspace
  22. import pypy.objspace.std.unicodeobject
  23. set_unicode_db(pypy.objspace.std.unicodeobject.unicodedb)
  24. # ____________________________________________________________
  25. #
  26. def slice_w(space, ctx, start, end, w_default):
  27. if 0 <= start <= end:
  28. if isinstance(ctx, rsre_core.BufMatchContext):
  29. return space.newbytes(ctx._buffer.getslice(start, end, 1,
  30. end-start))
  31. if isinstance(ctx, rsre_core.StrMatchContext):
  32. return space.newbytes(ctx._string[start:end])
  33. elif isinstance(ctx, rsre_core.UnicodeMatchContext):
  34. return space.newunicode(ctx._unicodestr[start:end])
  35. else:
  36. # unreachable
  37. raise SystemError
  38. return w_default
  39. @jit.look_inside_iff(lambda ctx, num_groups: jit.isconstant(num_groups))
  40. def do_flatten_marks(ctx, num_groups):
  41. # Returns a list of RPython-level integers.
  42. # Unlike the app-level groups() method, groups are numbered from 0
  43. # and the returned list does not start with the whole match range.
  44. if num_groups == 0:
  45. return None
  46. result = [-1] * (2 * num_groups)
  47. mark = ctx.match_marks
  48. while mark is not None:
  49. index = jit.promote(mark.gid)
  50. if result[index] == -1:
  51. result[index] = mark.position
  52. mark = mark.prev
  53. return result
  54. @jit.look_inside_iff(lambda space, ctx, fmarks, num_groups, w_default: jit.isconstant(num_groups))
  55. def allgroups_w(space, ctx, fmarks, num_groups, w_default):
  56. grps = [slice_w(space, ctx, fmarks[i * 2], fmarks[i * 2 + 1], w_default)
  57. for i in range(num_groups)]
  58. return space.newtuple(grps)
  59. def import_re(space):
  60. w_builtin = space.getbuiltinmodule('__builtin__')
  61. w_import = space.getattr(w_builtin, space.wrap("__import__"))
  62. return space.call_function(w_import, space.wrap("re"))
  63. def matchcontext(space, ctx):
  64. try:
  65. return rsre_core.match_context(ctx)
  66. except rsre_core.Error as e:
  67. raise OperationError(space.w_RuntimeError, space.wrap(e.msg))
  68. def searchcontext(space, ctx):
  69. try:
  70. return rsre_core.search_context(ctx)
  71. except rsre_core.Error as e:
  72. raise OperationError(space.w_RuntimeError, space.wrap(e.msg))
  73. # ____________________________________________________________
  74. #
  75. # SRE_Pattern class
  76. class W_SRE_Pattern(W_Root):
  77. _immutable_fields_ = ["code", "flags", "num_groups", "w_groupindex"]
  78. def cannot_copy_w(self):
  79. space = self.space
  80. raise oefmt(space.w_TypeError, "cannot copy this pattern object")
  81. def make_ctx(self, w_string, pos=0, endpos=sys.maxint):
  82. """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for
  83. searching in the given w_string object."""
  84. space = self.space
  85. if pos < 0:
  86. pos = 0
  87. if endpos < pos:
  88. endpos = pos
  89. if space.isinstance_w(w_string, space.w_unicode):
  90. unicodestr = space.unicode_w(w_string)
  91. if pos > len(unicodestr):
  92. pos = len(unicodestr)
  93. if endpos > len(unicodestr):
  94. endpos = len(unicodestr)
  95. return rsre_core.UnicodeMatchContext(self.code, unicodestr,
  96. pos, endpos, self.flags)
  97. elif space.isinstance_w(w_string, space.w_str):
  98. str = space.str_w(w_string)
  99. if pos > len(str):
  100. pos = len(str)
  101. if endpos > len(str):
  102. endpos = len(str)
  103. return rsre_core.StrMatchContext(self.code, str,
  104. pos, endpos, self.flags)
  105. else:
  106. buf = space.readbuf_w(w_string)
  107. size = buf.getlength()
  108. assert size >= 0
  109. if pos > size:
  110. pos = size
  111. if endpos > size:
  112. endpos = size
  113. return rsre_core.BufMatchContext(self.code, buf,
  114. pos, endpos, self.flags)
  115. def getmatch(self, ctx, found):
  116. if found:
  117. return W_SRE_Match(self, ctx)
  118. else:
  119. return self.space.w_None
  120. @unwrap_spec(pos=int, endpos=int)
  121. def match_w(self, w_string, pos=0, endpos=sys.maxint):
  122. ctx = self.make_ctx(w_string, pos, endpos)
  123. return self.getmatch(ctx, matchcontext(self.space, ctx))
  124. @unwrap_spec(pos=int, endpos=int)
  125. def search_w(self, w_string, pos=0, endpos=sys.maxint):
  126. ctx = self.make_ctx(w_string, pos, endpos)
  127. return self.getmatch(ctx, searchcontext(self.space, ctx))
  128. @unwrap_spec(pos=int, endpos=int)
  129. def findall_w(self, w_string, pos=0, endpos=sys.maxint):
  130. space = self.space
  131. matchlist_w = []
  132. ctx = self.make_ctx(w_string, pos, endpos)
  133. while ctx.match_start <= ctx.end:
  134. if not searchcontext(space, ctx):
  135. break
  136. num_groups = self.num_groups
  137. w_emptystr = space.wrap("")
  138. if num_groups == 0:
  139. w_item = slice_w(space, ctx, ctx.match_start, ctx.match_end,
  140. w_emptystr)
  141. else:
  142. fmarks = do_flatten_marks(ctx, num_groups)
  143. if num_groups == 1:
  144. w_item = slice_w(space, ctx, fmarks[0], fmarks[1],
  145. w_emptystr)
  146. else:
  147. w_item = allgroups_w(space, ctx, fmarks, num_groups,
  148. w_emptystr)
  149. matchlist_w.append(w_item)
  150. no_progress = (ctx.match_start == ctx.match_end)
  151. ctx.reset(ctx.match_end + no_progress)
  152. return space.newlist(matchlist_w)
  153. @unwrap_spec(pos=int, endpos=int)
  154. def finditer_w(self, w_string, pos=0, endpos=sys.maxint):
  155. # this also works as the implementation of the undocumented
  156. # scanner() method.
  157. ctx = self.make_ctx(w_string, pos, endpos)
  158. scanner = W_SRE_Scanner(self, ctx)
  159. return self.space.wrap(scanner)
  160. @unwrap_spec(maxsplit=int)
  161. def split_w(self, w_string, maxsplit=0):
  162. space = self.space
  163. splitlist = []
  164. n = 0
  165. last = 0
  166. ctx = self.make_ctx(w_string)
  167. while not maxsplit or n < maxsplit:
  168. if not searchcontext(space, ctx):
  169. break
  170. if ctx.match_start == ctx.match_end: # zero-width match
  171. if ctx.match_start == ctx.end: # or end of string
  172. break
  173. ctx.reset(ctx.match_end + 1)
  174. continue
  175. splitlist.append(slice_w(space, ctx, last, ctx.match_start,
  176. space.w_None))
  177. # add groups (if any)
  178. fmarks = do_flatten_marks(ctx, self.num_groups)
  179. for groupnum in range(self.num_groups):
  180. groupstart, groupend = fmarks[groupnum*2], fmarks[groupnum*2+1]
  181. splitlist.append(slice_w(space, ctx, groupstart, groupend,
  182. space.w_None))
  183. n += 1
  184. last = ctx.match_end
  185. ctx.reset(last)
  186. splitlist.append(slice_w(space, ctx, last, ctx.end, space.w_None))
  187. return space.newlist(splitlist)
  188. @unwrap_spec(count=int)
  189. def sub_w(self, w_repl, w_string, count=0):
  190. w_item, n = self.subx(w_repl, w_string, count)
  191. return w_item
  192. @unwrap_spec(count=int)
  193. def subn_w(self, w_repl, w_string, count=0):
  194. w_item, n = self.subx(w_repl, w_string, count)
  195. space = self.space
  196. return space.newtuple([w_item, space.wrap(n)])
  197. def subx(self, w_ptemplate, w_string, count):
  198. space = self.space
  199. # use a (much faster) string/unicode builder if w_ptemplate and
  200. # w_string are both string or both unicode objects, and if w_ptemplate
  201. # is a literal
  202. use_builder = False
  203. filter_as_unicode = filter_as_string = None
  204. if space.is_true(space.callable(w_ptemplate)):
  205. w_filter = w_ptemplate
  206. filter_is_callable = True
  207. else:
  208. if space.isinstance_w(w_ptemplate, space.w_unicode):
  209. filter_as_unicode = space.unicode_w(w_ptemplate)
  210. literal = u'\\' not in filter_as_unicode
  211. use_builder = (
  212. space.isinstance_w(w_string, space.w_unicode) and literal)
  213. else:
  214. try:
  215. filter_as_string = space.bytes_w(w_ptemplate)
  216. except OperationError as e:
  217. if e.async(space):
  218. raise
  219. literal = False
  220. else:
  221. literal = '\\' not in filter_as_string
  222. use_builder = (
  223. space.isinstance_w(w_string, space.w_str) and literal)
  224. if literal:
  225. w_filter = w_ptemplate
  226. filter_is_callable = False
  227. else:
  228. # not a literal; hand it over to the template compiler
  229. w_re = import_re(space)
  230. w_filter = space.call_method(w_re, '_subx',
  231. space.wrap(self), w_ptemplate)
  232. filter_is_callable = space.is_true(space.callable(w_filter))
  233. #
  234. # XXX this is a bit of a mess, but it improves performance a lot
  235. ctx = self.make_ctx(w_string)
  236. sublist_w = strbuilder = unicodebuilder = None
  237. if use_builder:
  238. if filter_as_unicode is not None:
  239. unicodebuilder = UnicodeBuilder(ctx.end)
  240. else:
  241. assert filter_as_string is not None
  242. strbuilder = StringBuilder(ctx.end)
  243. else:
  244. sublist_w = []
  245. n = last_pos = 0
  246. while not count or n < count:
  247. sub_jitdriver.jit_merge_point(
  248. self=self,
  249. use_builder=use_builder,
  250. filter_is_callable=filter_is_callable,
  251. filter_type=type(w_filter),
  252. ctx=ctx,
  253. w_filter=w_filter,
  254. strbuilder=strbuilder,
  255. unicodebuilder=unicodebuilder,
  256. filter_as_string=filter_as_string,
  257. filter_as_unicode=filter_as_unicode,
  258. count=count,
  259. w_string=w_string,
  260. n=n, last_pos=last_pos, sublist_w=sublist_w
  261. )
  262. space = self.space
  263. if not searchcontext(space, ctx):
  264. break
  265. if last_pos < ctx.match_start:
  266. _sub_append_slice(
  267. ctx, space, use_builder, sublist_w,
  268. strbuilder, unicodebuilder, last_pos, ctx.match_start)
  269. start = ctx.match_end
  270. if start == ctx.match_start:
  271. start += 1
  272. if not (last_pos == ctx.match_start
  273. == ctx.match_end and n > 0):
  274. # the above ignores empty matches on latest position
  275. if filter_is_callable:
  276. w_match = self.getmatch(ctx, True)
  277. w_piece = space.call_function(w_filter, w_match)
  278. if not space.is_w(w_piece, space.w_None):
  279. assert strbuilder is None and unicodebuilder is None
  280. assert not use_builder
  281. sublist_w.append(w_piece)
  282. else:
  283. if use_builder:
  284. if strbuilder is not None:
  285. assert filter_as_string is not None
  286. strbuilder.append(filter_as_string)
  287. else:
  288. assert unicodebuilder is not None
  289. assert filter_as_unicode is not None
  290. unicodebuilder.append(filter_as_unicode)
  291. else:
  292. sublist_w.append(w_filter)
  293. last_pos = ctx.match_end
  294. n += 1
  295. elif last_pos >= ctx.end:
  296. break # empty match at the end: finished
  297. ctx.reset(start)
  298. if last_pos < ctx.end:
  299. _sub_append_slice(ctx, space, use_builder, sublist_w,
  300. strbuilder, unicodebuilder, last_pos, ctx.end)
  301. if use_builder:
  302. if strbuilder is not None:
  303. return space.newbytes(strbuilder.build()), n
  304. else:
  305. assert unicodebuilder is not None
  306. return space.newunicode(unicodebuilder.build()), n
  307. else:
  308. if space.isinstance_w(w_string, space.w_unicode):
  309. w_emptystr = space.newunicode(u'')
  310. else:
  311. w_emptystr = space.newbytes('')
  312. w_item = space.call_method(w_emptystr, 'join',
  313. space.newlist(sublist_w))
  314. return w_item, n
  315. sub_jitdriver = jit.JitDriver(
  316. reds="""count n last_pos
  317. ctx w_filter
  318. strbuilder unicodebuilder
  319. filter_as_string
  320. filter_as_unicode
  321. w_string sublist_w
  322. self""".split(),
  323. greens=["filter_is_callable", "use_builder", "filter_type", "ctx.pattern"])
  324. def _sub_append_slice(ctx, space, use_builder, sublist_w,
  325. strbuilder, unicodebuilder, start, end):
  326. if use_builder:
  327. if isinstance(ctx, rsre_core.BufMatchContext):
  328. assert strbuilder is not None
  329. return strbuilder.append(ctx._buffer.getslice(start, end, 1, end-start))
  330. if isinstance(ctx, rsre_core.StrMatchContext):
  331. assert strbuilder is not None
  332. return strbuilder.append_slice(ctx._string, start, end)
  333. elif isinstance(ctx, rsre_core.UnicodeMatchContext):
  334. assert unicodebuilder is not None
  335. return unicodebuilder.append_slice(ctx._unicodestr, start, end)
  336. assert 0, "unreachable"
  337. else:
  338. sublist_w.append(slice_w(space, ctx, start, end, space.w_None))
  339. @unwrap_spec(flags=int, groups=int, w_groupindex=WrappedDefault(None),
  340. w_indexgroup=WrappedDefault(None))
  341. def SRE_Pattern__new__(space, w_subtype, w_pattern, flags, w_code,
  342. groups=0, w_groupindex=None, w_indexgroup=None):
  343. n = space.len_w(w_code)
  344. code = [intmask(space.uint_w(space.getitem(w_code, space.wrap(i))))
  345. for i in range(n)]
  346. #
  347. w_srepat = space.allocate_instance(W_SRE_Pattern, w_subtype)
  348. srepat = space.interp_w(W_SRE_Pattern, w_srepat)
  349. srepat.space = space
  350. srepat.w_pattern = w_pattern # the original uncompiled pattern
  351. srepat.flags = flags
  352. srepat.code = code
  353. srepat.num_groups = groups
  354. srepat.w_groupindex = w_groupindex
  355. srepat.w_indexgroup = w_indexgroup
  356. return w_srepat
  357. W_SRE_Pattern.typedef = TypeDef(
  358. 'SRE_Pattern',
  359. __new__ = interp2app(SRE_Pattern__new__),
  360. __copy__ = interp2app(W_SRE_Pattern.cannot_copy_w),
  361. __deepcopy__ = interp2app(W_SRE_Pattern.cannot_copy_w),
  362. __weakref__ = make_weakref_descr(W_SRE_Pattern),
  363. findall = interp2app(W_SRE_Pattern.findall_w),
  364. finditer = interp2app(W_SRE_Pattern.finditer_w),
  365. match = interp2app(W_SRE_Pattern.match_w),
  366. scanner = interp2app(W_SRE_Pattern.finditer_w), # reuse finditer()
  367. search = interp2app(W_SRE_Pattern.search_w),
  368. split = interp2app(W_SRE_Pattern.split_w),
  369. sub = interp2app(W_SRE_Pattern.sub_w),
  370. subn = interp2app(W_SRE_Pattern.subn_w),
  371. flags = interp_attrproperty('flags', W_SRE_Pattern),
  372. groupindex = interp_attrproperty_w('w_groupindex', W_SRE_Pattern),
  373. groups = interp_attrproperty('num_groups', W_SRE_Pattern),
  374. pattern = interp_attrproperty_w('w_pattern', W_SRE_Pattern),
  375. )
  376. W_SRE_Pattern.typedef.acceptable_as_base_class = False
  377. # ____________________________________________________________
  378. #
  379. # SRE_Match class
  380. class W_SRE_Match(W_Root):
  381. flatten_cache = None
  382. def __init__(self, srepat, ctx):
  383. self.space = srepat.space
  384. self.srepat = srepat
  385. self.ctx = ctx
  386. def cannot_copy_w(self):
  387. space = self.space
  388. raise oefmt(space.w_TypeError, "cannot copy this match object")
  389. @jit.look_inside_iff(lambda self, args_w: jit.isconstant(len(args_w)))
  390. def group_w(self, args_w):
  391. space = self.space
  392. ctx = self.ctx
  393. if len(args_w) <= 1:
  394. if len(args_w) == 0:
  395. start, end = ctx.match_start, ctx.match_end
  396. else:
  397. start, end = self.do_span(args_w[0])
  398. return slice_w(space, ctx, start, end, space.w_None)
  399. else:
  400. results = [None] * len(args_w)
  401. for i in range(len(args_w)):
  402. start, end = self.do_span(args_w[i])
  403. results[i] = slice_w(space, ctx, start, end, space.w_None)
  404. return space.newtuple(results)
  405. @unwrap_spec(w_default=WrappedDefault(None))
  406. def groups_w(self, w_default=None):
  407. fmarks = self.flatten_marks()
  408. num_groups = self.srepat.num_groups
  409. return allgroups_w(self.space, self.ctx, fmarks, num_groups, w_default)
  410. @unwrap_spec(w_default=WrappedDefault(None))
  411. def groupdict_w(self, w_default=None):
  412. space = self.space
  413. w_dict = space.newdict()
  414. w_groupindex = self.srepat.w_groupindex
  415. w_iterator = space.iter(w_groupindex)
  416. while True:
  417. try:
  418. w_key = space.next(w_iterator)
  419. except OperationError as e:
  420. if not e.match(space, space.w_StopIteration):
  421. raise
  422. break # done
  423. w_value = space.getitem(w_groupindex, w_key)
  424. start, end = self.do_span(w_value)
  425. w_grp = slice_w(space, self.ctx, start, end, w_default)
  426. space.setitem(w_dict, w_key, w_grp)
  427. return w_dict
  428. def expand_w(self, w_template):
  429. space = self.space
  430. w_re = import_re(space)
  431. return space.call_method(w_re, '_expand', space.wrap(self.srepat),
  432. space.wrap(self), w_template)
  433. @unwrap_spec(w_groupnum=WrappedDefault(0))
  434. def start_w(self, w_groupnum):
  435. return self.space.wrap(self.do_span(w_groupnum)[0])
  436. @unwrap_spec(w_groupnum=WrappedDefault(0))
  437. def end_w(self, w_groupnum):
  438. return self.space.wrap(self.do_span(w_groupnum)[1])
  439. @unwrap_spec(w_groupnum=WrappedDefault(0))
  440. def span_w(self, w_groupnum):
  441. start, end = self.do_span(w_groupnum)
  442. return self.space.newtuple([self.space.wrap(start),
  443. self.space.wrap(end)])
  444. def flatten_marks(self):
  445. if self.flatten_cache is None:
  446. num_groups = self.srepat.num_groups
  447. self.flatten_cache = do_flatten_marks(self.ctx, num_groups)
  448. return self.flatten_cache
  449. def do_span(self, w_arg):
  450. space = self.space
  451. try:
  452. groupnum = space.int_w(w_arg)
  453. except OperationError as e:
  454. if not e.match(space, space.w_TypeError) and \
  455. not e.match(space, space.w_OverflowError):
  456. raise
  457. try:
  458. w_groupnum = space.getitem(self.srepat.w_groupindex, w_arg)
  459. except OperationError as e:
  460. if not e.match(space, space.w_KeyError):
  461. raise
  462. raise oefmt(space.w_IndexError, "no such group")
  463. groupnum = space.int_w(w_groupnum)
  464. if groupnum == 0:
  465. return self.ctx.match_start, self.ctx.match_end
  466. elif 1 <= groupnum <= self.srepat.num_groups:
  467. fmarks = self.flatten_marks()
  468. idx = 2*(groupnum-1)
  469. assert idx >= 0
  470. return fmarks[idx], fmarks[idx+1]
  471. else:
  472. raise oefmt(space.w_IndexError, "group index out of range")
  473. def _last_index(self):
  474. mark = self.ctx.match_marks
  475. if mark is not None:
  476. return mark.gid // 2 + 1
  477. return -1
  478. def fget_lastgroup(self, space):
  479. lastindex = self._last_index()
  480. if lastindex < 0:
  481. return space.w_None
  482. w_result = space.finditem(self.srepat.w_indexgroup,
  483. space.wrap(lastindex))
  484. if w_result is None:
  485. return space.w_None
  486. return w_result
  487. def fget_lastindex(self, space):
  488. lastindex = self._last_index()
  489. if lastindex >= 0:
  490. return space.wrap(lastindex)
  491. return space.w_None
  492. def fget_pos(self, space):
  493. return space.wrap(self.ctx.original_pos)
  494. def fget_endpos(self, space):
  495. return space.wrap(self.ctx.end)
  496. def fget_regs(self, space):
  497. space = self.space
  498. fmarks = self.flatten_marks()
  499. num_groups = self.srepat.num_groups
  500. result_w = [None] * (num_groups + 1)
  501. ctx = self.ctx
  502. result_w[0] = space.newtuple([space.wrap(ctx.match_start),
  503. space.wrap(ctx.match_end)])
  504. for i in range(num_groups):
  505. result_w[i + 1] = space.newtuple([space.wrap(fmarks[i*2]),
  506. space.wrap(fmarks[i*2+1])])
  507. return space.newtuple(result_w)
  508. def fget_string(self, space):
  509. ctx = self.ctx
  510. if isinstance(ctx, rsre_core.BufMatchContext):
  511. return space.newbytes(ctx._buffer.as_str())
  512. elif isinstance(ctx, rsre_core.StrMatchContext):
  513. return space.newbytes(ctx._string)
  514. elif isinstance(ctx, rsre_core.UnicodeMatchContext):
  515. return space.newunicode(ctx._unicodestr)
  516. else:
  517. raise SystemError
  518. W_SRE_Match.typedef = TypeDef(
  519. 'SRE_Match',
  520. __copy__ = interp2app(W_SRE_Match.cannot_copy_w),
  521. __deepcopy__ = interp2app(W_SRE_Match.cannot_copy_w),
  522. group = interp2app(W_SRE_Match.group_w),
  523. groups = interp2app(W_SRE_Match.groups_w),
  524. groupdict = interp2app(W_SRE_Match.groupdict_w),
  525. start = interp2app(W_SRE_Match.start_w),
  526. end = interp2app(W_SRE_Match.end_w),
  527. span = interp2app(W_SRE_Match.span_w),
  528. expand = interp2app(W_SRE_Match.expand_w),
  529. #
  530. re = interp_attrproperty('srepat', W_SRE_Match),
  531. string = GetSetProperty(W_SRE_Match.fget_string),
  532. pos = GetSetProperty(W_SRE_Match.fget_pos),
  533. endpos = GetSetProperty(W_SRE_Match.fget_endpos),
  534. lastgroup = GetSetProperty(W_SRE_Match.fget_lastgroup),
  535. lastindex = GetSetProperty(W_SRE_Match.fget_lastindex),
  536. regs = GetSetProperty(W_SRE_Match.fget_regs),
  537. )
  538. W_SRE_Match.typedef.acceptable_as_base_class = False
  539. # ____________________________________________________________
  540. #
  541. # SRE_Scanner class
  542. # This is mostly an internal class in CPython.
  543. # Our version is also directly iterable, to make finditer() easier.
  544. class W_SRE_Scanner(W_Root):
  545. def __init__(self, pattern, ctx):
  546. self.space = pattern.space
  547. self.srepat = pattern
  548. self.ctx = ctx
  549. # 'self.ctx' is always a fresh context in which no searching
  550. # or matching succeeded so far.
  551. def iter_w(self):
  552. return self.space.wrap(self)
  553. def next_w(self):
  554. if self.ctx.match_start > self.ctx.end:
  555. raise OperationError(self.space.w_StopIteration, self.space.w_None)
  556. if not searchcontext(self.space, self.ctx):
  557. raise OperationError(self.space.w_StopIteration, self.space.w_None)
  558. return self.getmatch(True)
  559. def match_w(self):
  560. if self.ctx.match_start > self.ctx.end:
  561. return self.space.w_None
  562. return self.getmatch(matchcontext(self.space, self.ctx))
  563. def search_w(self):
  564. if self.ctx.match_start > self.ctx.end:
  565. return self.space.w_None
  566. return self.getmatch(searchcontext(self.space, self.ctx))
  567. def getmatch(self, found):
  568. if found:
  569. ctx = self.ctx
  570. nextstart = ctx.match_end
  571. nextstart += (ctx.match_start == nextstart)
  572. self.ctx = ctx.fresh_copy(nextstart)
  573. match = W_SRE_Match(self.srepat, ctx)
  574. return self.space.wrap(match)
  575. else:
  576. self.ctx.match_start += 1 # obscure corner case
  577. return None
  578. W_SRE_Scanner.typedef = TypeDef(
  579. 'SRE_Scanner',
  580. __iter__ = interp2app(W_SRE_Scanner.iter_w),
  581. next = interp2app(W_SRE_Scanner.next_w),
  582. match = interp2app(W_SRE_Scanner.match_w),
  583. search = interp2app(W_SRE_Scanner.search_w),
  584. pattern = interp_attrproperty('srepat', W_SRE_Scanner),
  585. )
  586. W_SRE_Scanner.typedef.acceptable_as_base_class = False