/Lib/json/scanner.py

http://unladen-swallow.googlecode.com/ · Python · 69 lines · 52 code · 9 blank · 8 comment · 12 complexity · 8aa693b27702a635e509cd4030b0e1dc MD5 · raw file

  1. """Iterator based sre token scanner
  2. """
  3. import re
  4. import sre_parse
  5. import sre_compile
  6. import sre_constants
  7. from re import VERBOSE, MULTILINE, DOTALL
  8. from sre_constants import BRANCH, SUBPATTERN
  9. __all__ = ['Scanner', 'pattern']
  10. FLAGS = (VERBOSE | MULTILINE | DOTALL)
  11. class Scanner(object):
  12. def __init__(self, lexicon, flags=FLAGS):
  13. self.actions = [None]
  14. # Combine phrases into a compound pattern
  15. s = sre_parse.Pattern()
  16. s.flags = flags
  17. p = []
  18. for idx, token in enumerate(lexicon):
  19. phrase = token.pattern
  20. try:
  21. subpattern = sre_parse.SubPattern(s,
  22. [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
  23. except sre_constants.error:
  24. raise
  25. p.append(subpattern)
  26. self.actions.append(token)
  27. s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
  28. p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
  29. self.scanner = sre_compile.compile(p)
  30. def iterscan(self, string, idx=0, context=None):
  31. """Yield match, end_idx for each match
  32. """
  33. match = self.scanner.scanner(string, idx).match
  34. actions = self.actions
  35. lastend = idx
  36. end = len(string)
  37. while True:
  38. m = match()
  39. if m is None:
  40. break
  41. matchbegin, matchend = m.span()
  42. if lastend == matchend:
  43. break
  44. action = actions[m.lastindex]
  45. if action is not None:
  46. rval, next_pos = action(m, context)
  47. if next_pos is not None and next_pos != matchend:
  48. # "fast forward" the scanner
  49. matchend = next_pos
  50. match = self.scanner.scanner(string, matchend).match
  51. yield rval, matchend
  52. lastend = matchend
  53. def pattern(pattern, flags=FLAGS):
  54. def decorator(fn):
  55. fn.pattern = pattern
  56. fn.regex = re.compile(pattern, flags)
  57. return fn
  58. return decorator