PageRenderTime 49ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/rlib/parsing/parsing.py

https://bitbucket.org/dac_io/pypy
Python | 349 lines | 336 code | 11 blank | 2 comment | 4 complexity | 258f1f7e29885e01bb85634222689808 MD5 | raw file
  1. import py
  2. from pypy.rlib.parsing.lexer import SourcePos
  3. from pypy.rlib.parsing.tree import Node, Symbol, Nonterminal
  4. class Rule(object):
  5. def __init__(self, nonterminal, expansions):
  6. self.nonterminal = nonterminal
  7. self.expansions = expansions
  8. def getkey(self):
  9. return (self.nonterminal, tuple(self.expansions))
  10. # def __hash__(self):
  11. # return hash(self.getkey())
  12. def __eq__(self, other):
  13. return self.getkey() == other.getkey()
  14. def __ne__(self, other):
  15. return not self == other
  16. def __str__(self):
  17. return "%s: %s" % (
  18. self.nonterminal, " | ".join([repr(e) for e in self.expansions]))
  19. def __repr__(self):
  20. return "Rule(%r, %r)" % (self.nonterminal, self.expansions)
  21. class LazyInputStream(object):
  22. def __init__(self, iterator):
  23. self.iterator = iter(iterator)
  24. self.data = []
  25. def __getitem__(self, index):
  26. assert index >= 0
  27. while len(self.data) <= index:
  28. try:
  29. self.data.append(self.iterator.next())
  30. except StopIteration:
  31. raise IndexError("index out of range")
  32. return self.data[index]
  33. class ParseError(Exception):
  34. def __init__(self, source_pos, errorinformation):
  35. self.source_pos = source_pos
  36. self.errorinformation = errorinformation
  37. self.args = (source_pos, errorinformation)
  38. def nice_error_message(self, filename="<unknown>", source=""):
  39. result = [" File %s, line %s" % (filename, self.source_pos.lineno)]
  40. if source:
  41. result.append(source.split("\n")[self.source_pos.lineno])
  42. result.append(" " * self.source_pos.columnno + "^")
  43. else:
  44. result.append("<couldn't get source>")
  45. if self.errorinformation:
  46. failure_reasons = self.errorinformation.failure_reasons
  47. if len(failure_reasons) > 1:
  48. all_but_one = failure_reasons[:-1]
  49. last = failure_reasons[-1]
  50. expected = "%s or '%s'" % (
  51. ", ".join(["'%s'" % e for e in all_but_one]), last)
  52. else:
  53. expected = failure_reasons[0]
  54. result.append("ParseError: expected %s" % (expected, ))
  55. else:
  56. result.append("ParseError")
  57. return "\n".join(result)
  58. class ErrorInformation(object):
  59. def __init__(self, pos, failure_reasons=None):
  60. if failure_reasons is None:
  61. failure_reasons = []
  62. self.failure_reasons = failure_reasons
  63. self.pos = pos
  64. def combine_errors(self, other):
  65. if self is None:
  66. return other
  67. if (other is None or self.pos > other.pos or
  68. len(other.failure_reasons) == 0):
  69. return self
  70. elif other.pos > self.pos or len(self.failure_reasons) == 0:
  71. return other
  72. failure_reasons = []
  73. already_there = {}
  74. for fr in [self.failure_reasons, other.failure_reasons]:
  75. for reason in fr:
  76. if reason not in already_there:
  77. already_there[reason] = True
  78. failure_reasons.append(reason)
  79. return ErrorInformation(self.pos, failure_reasons)
  80. class LazyParseTable(object):
  81. def __init__(self, input, parser):
  82. self.parser = parser
  83. self.input = input
  84. self.matched = {}
  85. self.errorinformation = {}
  86. def match_symbol(self, i, symbol):
  87. #print i, symbol
  88. #print self.matched.keys()
  89. if (i, symbol) in self.matched:
  90. return self.matched[i, symbol]
  91. error = None # for the annotator
  92. if self.parser.is_nonterminal(symbol):
  93. rule = self.parser.get_rule(symbol)
  94. lastexpansion = len(rule.expansions) - 1
  95. subsymbol = None
  96. error = None
  97. for expansionindex in range(len(rule.expansions)):
  98. expansion = rule.expansions[expansionindex]
  99. curr = i
  100. children = []
  101. for j in range(len(expansion)):
  102. subsymbol = expansion[j]
  103. node, next, error2 = self.match_symbol(curr, subsymbol)
  104. if node is None:
  105. error = combine_errors(error, error2)
  106. break
  107. children.append(node)
  108. curr = next
  109. else:
  110. assert len(expansion) == len(children)
  111. result = (Nonterminal(symbol, children), curr, error)
  112. self.matched[i, symbol] = result
  113. return result
  114. self.matched[i, symbol] = None, 0, error
  115. return None, 0, error
  116. else:
  117. try:
  118. input = self.input[i]
  119. if self.terminal_equality(symbol, input):
  120. result = (Symbol(symbol, input.source, input), i + 1, error)
  121. self.matched[i, symbol] = result
  122. return result
  123. else:
  124. # XXX hack unnice: handles the sort of token names that
  125. # ebnfparse produces
  126. if (symbol.startswith("__") and
  127. symbol.split("_")[2][0] in "0123456789"):
  128. expected = symbol.split("_")[-1]
  129. else:
  130. expected = symbol
  131. error = ErrorInformation(i, [expected])
  132. except IndexError:
  133. error = ErrorInformation(i)
  134. return None, 0, error
  135. def terminal_equality(self, symbol, input):
  136. return symbol == input.name
  137. class PackratParser(object):
  138. def __init__(self, rules, startsymbol, parsetablefactory=LazyParseTable,
  139. check_for_left_recursion=True):
  140. self.rules = rules
  141. self.nonterminal_to_rule = {}
  142. for rule in rules:
  143. self.nonterminal_to_rule[rule.nonterminal] = rule
  144. self.startsymbol = startsymbol
  145. if check_for_left_recursion:
  146. assert not self.has_left_recursion()
  147. self.parsetablefactory = parsetablefactory
  148. def is_nonterminal(self, symbol):
  149. return symbol in self.nonterminal_to_rule
  150. def get_rule(self, symbol):
  151. return self.nonterminal_to_rule[symbol]
  152. def parse(self, tokeniterator, lazy=False):
  153. if lazy:
  154. input = LazyInputStream(tokeniterator)
  155. else:
  156. input = list(tokeniterator)
  157. table = self.parsetablefactory(input, self)
  158. result = table.match_symbol(0, self.startsymbol)
  159. if result[0] is None:
  160. error = result[2]
  161. raise ParseError(input[error.pos].source_pos, error)
  162. return result[0]
  163. def has_left_recursion(self):
  164. """NOT_RPYTHON"""
  165. follows = {}
  166. for rule in self.rules:
  167. follow = py.builtin.set()
  168. follows[rule.nonterminal] = follow
  169. for expansion in rule.expansions:
  170. if expansion and self.is_nonterminal(expansion[0]):
  171. follow.add(expansion[0])
  172. changed = True
  173. while changed:
  174. changed = False
  175. for nonterminal, follow in follows.iteritems():
  176. for nt in follow:
  177. subfollow = follows[nt]
  178. update = subfollow - follow
  179. if update:
  180. changed = True
  181. follow.update(update)
  182. break
  183. for nonterminal, follow in follows.iteritems():
  184. if nonterminal in follow:
  185. print "nonterminal %s is in its own follow %s" % (nonterminal, follow)
  186. return True
  187. return False
  188. def __repr__(self):
  189. from pprint import pformat
  190. return "%s%s" % (self.__class__.__name__,
  191. pformat((self.rules, self.startsymbol)), )
  192. class ParserCompiler(object):
  193. def __init__(self, parser):
  194. self.parser = parser
  195. self.allcode = []
  196. self.symbol_to_number = {}
  197. self.made = {}
  198. def compile(self):
  199. from pypy.tool.sourcetools import func_with_new_name
  200. self.allcode.append("class CompileableParser(baseclass):")
  201. self.make_matcher(self.parser.startsymbol)
  202. self.make_fixed()
  203. miniglobals = globals().copy()
  204. miniglobals["baseclass"] = self.parser.__class__
  205. #print "\n".join(self.allcode)
  206. exec py.code.Source("\n".join(self.allcode)).compile() in miniglobals
  207. kls = miniglobals["CompileableParser"]
  208. # XXX
  209. parsetable = self.parser.parsetablefactory([], self.parser)
  210. kls.terminal_equality = func_with_new_name(
  211. parsetable.terminal_equality.im_func,
  212. "terminal_equality_compileable")
  213. return kls
  214. def get_number(self, symbol):
  215. if symbol in self.symbol_to_number:
  216. return self.symbol_to_number[symbol]
  217. result = len(self.symbol_to_number)
  218. self.symbol_to_number[symbol] = result
  219. return result
  220. def make_matcher(self, symbol):
  221. if symbol not in self.made:
  222. self.made[symbol] = True
  223. if self.parser.is_nonterminal(symbol):
  224. self.make_nonterminal_matcher(symbol)
  225. else:
  226. self.make_terminal_matcher(symbol)
  227. def make_terminal_matcher(self, symbol):
  228. number = self.get_number(symbol)
  229. self.allcode.append("""
  230. def match_terminal%(number)s(self, i):
  231. # matcher for terminal %(number)s %(symbol)r
  232. if i in self.matched_terminals%(number)s:
  233. return self.matched_terminals%(number)s[i]
  234. try:
  235. input = self.input[i]
  236. if self.terminal_equality(%(symbol)r, input):
  237. symbol = Symbol(%(symbol)r, input.name, input)
  238. result = (symbol, i + 1)
  239. self.matched_terminals%(number)s[i] = result
  240. return result
  241. except IndexError:
  242. pass
  243. return None, i""" % vars())
  244. def make_nonterminal_matcher(self, symbol):
  245. number = self.get_number(symbol)
  246. rule = self.parser.nonterminal_to_rule[symbol]
  247. code = []
  248. code.append("""
  249. def match_nonterminal%(number)s(self, i):
  250. # matcher for nonterminal %(number)s %(symbol)s
  251. if i in self.matched_nonterminals%(number)s:
  252. return self.matched_nonterminals%(number)s[i]
  253. last_failed_position = 0
  254. subsymbol = None
  255. expansionindex = 0
  256. while 1:""" % vars())
  257. for expansionindex, expansion in enumerate(rule.expansions):
  258. nextindex = expansionindex + 1
  259. code.append("""\
  260. if expansionindex == %s:""" % (expansionindex, ))
  261. if not expansion:
  262. code.append("""\
  263. result = (Nonterminal(symbol, []), i)
  264. self.matched_nonterminals%(number)s[i] = result
  265. return result""" % vars())
  266. continue
  267. code.append("""\
  268. curr = i
  269. children = []""")
  270. for subsymbol in expansion:
  271. self.make_matcher(subsymbol)
  272. if self.parser.is_nonterminal(subsymbol):
  273. match = "match_nonterminal%s" % self.get_number(subsymbol)
  274. else:
  275. match = "match_terminal%s" % self.get_number(subsymbol)
  276. code.append("""\
  277. node, next = self.%(match)s(curr)
  278. if node is None:
  279. last_failed_position = next
  280. expansionindex = %(nextindex)s
  281. continue
  282. curr = next""" % vars())
  283. code.append("""\
  284. result = (Nonterminal(%(symbol)r, children), curr)
  285. self.matched_nonterminals%(number)s[i] = result
  286. return result""" % vars())
  287. code.append("""\
  288. if expansionindex == %(nextindex)s:
  289. result = None, last_failed_position
  290. self.matched_nonterminals%(number)s[i] = result
  291. return result""" % vars())
  292. self.allcode.extend(code)
  293. def make_fixed(self):
  294. # __init__
  295. code = ["""
  296. def __init__(self):
  297. self.rules = [] # dummy
  298. self.nonterminal_to_rule = {} # dummy
  299. self.startsymbol = "" # dummy
  300. self.parsetablefactory = None # dummy"""]
  301. for symbol, number in self.symbol_to_number.iteritems():
  302. if self.parser.is_nonterminal(symbol):
  303. name = "matched_nonterminals%s" % number
  304. else:
  305. name = "matched_terminals%s" % number
  306. code.append("""\
  307. self.%(name)s = {}""" % vars())
  308. # parse
  309. startsymbol = self.get_number(self.parser.startsymbol)
  310. code.append("""
  311. def parse(self, tokenlist, lazy=True):
  312. self.input = tokenlist
  313. result = self.match_nonterminal%(startsymbol)s(0)
  314. if result[0] is None:
  315. raise ParseError(None, self.input[result[1]])
  316. return result[0]""" % (vars()))
  317. self.allcode.extend(code)