PageRenderTime 27ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/ast/spark.py

http://github.com/nriley/jython
Python | 840 lines | 582 code | 108 blank | 150 comment | 154 complexity | 835371d98efe120227b7fe70c8172f40 MD5 | raw file
  1. # Copyright (c) 1998-2002 John Aycock
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining
  4. # a copy of this software and associated documentation files (the
  5. # "Software"), to deal in the Software without restriction, including
  6. # without limitation the rights to use, copy, modify, merge, publish,
  7. # distribute, sublicense, and/or sell copies of the Software, and to
  8. # permit persons to whom the Software is furnished to do so, subject to
  9. # the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be
  12. # included in all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. __version__ = 'SPARK-0.7 (pre-alpha-5)'
  22. import re
  23. import sys
  24. import string
  25. def _namelist(instance):
  26. namelist, namedict, classlist = [], {}, [instance.__class__]
  27. for c in classlist:
  28. for b in c.__bases__:
  29. classlist.append(b)
  30. for name in c.__dict__.keys():
  31. if not namedict.has_key(name):
  32. namelist.append(name)
  33. namedict[name] = 1
  34. return namelist
  35. class GenericScanner:
  36. def __init__(self, flags=0):
  37. pattern = self.reflect()
  38. self.re = re.compile(pattern, re.VERBOSE|flags)
  39. self.index2func = {}
  40. for name, number in self.re.groupindex.items():
  41. self.index2func[number-1] = getattr(self, 't_' + name)
  42. def makeRE(self, name):
  43. doc = getattr(self, name).__doc__
  44. rv = '(?P<%s>%s)' % (name[2:], doc)
  45. return rv
  46. def reflect(self):
  47. rv = []
  48. for name in _namelist(self):
  49. if name[:2] == 't_' and name != 't_default':
  50. rv.append(self.makeRE(name))
  51. rv.append(self.makeRE('t_default'))
  52. return string.join(rv, '|')
  53. def error(self, s, pos):
  54. print "Lexical error at position %s" % pos
  55. raise SystemExit
  56. def tokenize(self, s):
  57. pos = 0
  58. n = len(s)
  59. while pos < n:
  60. m = self.re.match(s, pos)
  61. if m is None:
  62. self.error(s, pos)
  63. groups = m.groups()
  64. for i in range(len(groups)):
  65. if groups[i] and self.index2func.has_key(i):
  66. self.index2func[i](groups[i])
  67. pos = m.end()
  68. def t_default(self, s):
  69. r'( . | \n )+'
  70. print "Specification error: unmatched input"
  71. raise SystemExit
  72. #
  73. # Extracted from GenericParser and made global so that [un]picking works.
  74. #
  75. class _State:
  76. def __init__(self, stateno, items):
  77. self.T, self.complete, self.items = [], [], items
  78. self.stateno = stateno
  79. class GenericParser:
  80. #
  81. # An Earley parser, as per J. Earley, "An Efficient Context-Free
  82. # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
  83. # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
  84. # Carnegie-Mellon University, August 1968. New formulation of
  85. # the parser according to J. Aycock, "Practical Earley Parsing
  86. # and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
  87. # 2001, and J. Aycock and R. N. Horspool, "Practical Earley
  88. # Parsing", unpublished paper, 2001.
  89. #
  90. def __init__(self, start):
  91. self.rules = {}
  92. self.rule2func = {}
  93. self.rule2name = {}
  94. self.collectRules()
  95. self.augment(start)
  96. self.ruleschanged = 1
  97. _NULLABLE = '\e_'
  98. _START = 'START'
  99. _BOF = '|-'
  100. #
  101. # When pickling, take the time to generate the full state machine;
  102. # some information is then extraneous, too. Unfortunately we
  103. # can't save the rule2func map.
  104. #
  105. def __getstate__(self):
  106. if self.ruleschanged:
  107. #
  108. # XXX - duplicated from parse()
  109. #
  110. self.computeNull()
  111. self.newrules = {}
  112. self.new2old = {}
  113. self.makeNewRules()
  114. self.ruleschanged = 0
  115. self.edges, self.cores = {}, {}
  116. self.states = { 0: self.makeState0() }
  117. self.makeState(0, self._BOF)
  118. #
  119. # XXX - should find a better way to do this..
  120. #
  121. changes = 1
  122. while changes:
  123. changes = 0
  124. for k, v in self.edges.items():
  125. if v is None:
  126. state, sym = k
  127. if self.states.has_key(state):
  128. self.goto(state, sym)
  129. changes = 1
  130. rv = self.__dict__.copy()
  131. for s in self.states.values():
  132. del s.items
  133. del rv['rule2func']
  134. del rv['nullable']
  135. del rv['cores']
  136. return rv
  137. def __setstate__(self, D):
  138. self.rules = {}
  139. self.rule2func = {}
  140. self.rule2name = {}
  141. self.collectRules()
  142. start = D['rules'][self._START][0][1][1] # Blech.
  143. self.augment(start)
  144. D['rule2func'] = self.rule2func
  145. D['makeSet'] = self.makeSet_fast
  146. self.__dict__ = D
  147. #
  148. # A hook for GenericASTBuilder and GenericASTMatcher. Mess
  149. # thee not with this; nor shall thee toucheth the _preprocess
  150. # argument to addRule.
  151. #
  152. def preprocess(self, rule, func): return rule, func
  153. def addRule(self, doc, func, _preprocess=1):
  154. fn = func
  155. rules = string.split(doc)
  156. index = []
  157. for i in range(len(rules)):
  158. if rules[i] == '::=':
  159. index.append(i-1)
  160. index.append(len(rules))
  161. for i in range(len(index)-1):
  162. lhs = rules[index[i]]
  163. rhs = rules[index[i]+2:index[i+1]]
  164. rule = (lhs, tuple(rhs))
  165. if _preprocess:
  166. rule, fn = self.preprocess(rule, func)
  167. if self.rules.has_key(lhs):
  168. self.rules[lhs].append(rule)
  169. else:
  170. self.rules[lhs] = [ rule ]
  171. self.rule2func[rule] = fn
  172. self.rule2name[rule] = func.__name__[2:]
  173. self.ruleschanged = 1
  174. def collectRules(self):
  175. for name in _namelist(self):
  176. if name[:2] == 'p_':
  177. func = getattr(self, name)
  178. doc = func.__doc__
  179. self.addRule(doc, func)
  180. def augment(self, start):
  181. rule = '%s ::= %s %s' % (self._START, self._BOF, start)
  182. self.addRule(rule, lambda args: args[1], 0)
  183. def computeNull(self):
  184. self.nullable = {}
  185. tbd = []
  186. for rulelist in self.rules.values():
  187. lhs = rulelist[0][0]
  188. self.nullable[lhs] = 0
  189. for rule in rulelist:
  190. rhs = rule[1]
  191. if len(rhs) == 0:
  192. self.nullable[lhs] = 1
  193. continue
  194. #
  195. # We only need to consider rules which
  196. # consist entirely of nonterminal symbols.
  197. # This should be a savings on typical
  198. # grammars.
  199. #
  200. for sym in rhs:
  201. if not self.rules.has_key(sym):
  202. break
  203. else:
  204. tbd.append(rule)
  205. changes = 1
  206. while changes:
  207. changes = 0
  208. for lhs, rhs in tbd:
  209. if self.nullable[lhs]:
  210. continue
  211. for sym in rhs:
  212. if not self.nullable[sym]:
  213. break
  214. else:
  215. self.nullable[lhs] = 1
  216. changes = 1
  217. def makeState0(self):
  218. s0 = _State(0, [])
  219. for rule in self.newrules[self._START]:
  220. s0.items.append((rule, 0))
  221. return s0
  222. def finalState(self, tokens):
  223. #
  224. # Yuck.
  225. #
  226. if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
  227. return 1
  228. start = self.rules[self._START][0][1][1]
  229. return self.goto(1, start)
  230. def makeNewRules(self):
  231. worklist = []
  232. for rulelist in self.rules.values():
  233. for rule in rulelist:
  234. worklist.append((rule, 0, 1, rule))
  235. for rule, i, candidate, oldrule in worklist:
  236. lhs, rhs = rule
  237. n = len(rhs)
  238. while i < n:
  239. sym = rhs[i]
  240. if not self.rules.has_key(sym) or \
  241. not self.nullable[sym]:
  242. candidate = 0
  243. i = i + 1
  244. continue
  245. newrhs = list(rhs)
  246. newrhs[i] = self._NULLABLE+sym
  247. newrule = (lhs, tuple(newrhs))
  248. worklist.append((newrule, i+1,
  249. candidate, oldrule))
  250. candidate = 0
  251. i = i + 1
  252. else:
  253. if candidate:
  254. lhs = self._NULLABLE+lhs
  255. rule = (lhs, rhs)
  256. if self.newrules.has_key(lhs):
  257. self.newrules[lhs].append(rule)
  258. else:
  259. self.newrules[lhs] = [ rule ]
  260. self.new2old[rule] = oldrule
  261. def typestring(self, token):
  262. return None
  263. def error(self, token):
  264. print "Syntax error at or near `%s' token" % token
  265. raise SystemExit
  266. def parse(self, tokens):
  267. sets = [ [(1,0), (2,0)] ]
  268. self.links = {}
  269. if self.ruleschanged:
  270. self.computeNull()
  271. self.newrules = {}
  272. self.new2old = {}
  273. self.makeNewRules()
  274. self.ruleschanged = 0
  275. self.edges, self.cores = {}, {}
  276. self.states = { 0: self.makeState0() }
  277. self.makeState(0, self._BOF)
  278. for i in xrange(len(tokens)):
  279. sets.append([])
  280. if sets[i] == []:
  281. break
  282. self.makeSet(tokens[i], sets, i)
  283. else:
  284. sets.append([])
  285. self.makeSet(None, sets, len(tokens))
  286. #_dump(tokens, sets, self.states)
  287. finalitem = (self.finalState(tokens), 0)
  288. if finalitem not in sets[-2]:
  289. if len(tokens) > 0:
  290. self.error(tokens[i-1])
  291. else:
  292. self.error(None)
  293. return self.buildTree(self._START, finalitem,
  294. tokens, len(sets)-2)
  295. def isnullable(self, sym):
  296. #
  297. # For symbols in G_e only. If we weren't supporting 1.5,
  298. # could just use sym.startswith().
  299. #
  300. return self._NULLABLE == sym[0:len(self._NULLABLE)]
  301. def skip(self, (lhs, rhs), pos=0):
  302. n = len(rhs)
  303. while pos < n:
  304. if not self.isnullable(rhs[pos]):
  305. break
  306. pos = pos + 1
  307. return pos
  308. def makeState(self, state, sym):
  309. assert sym is not None
  310. #
  311. # Compute \epsilon-kernel state's core and see if
  312. # it exists already.
  313. #
  314. kitems = []
  315. for rule, pos in self.states[state].items:
  316. lhs, rhs = rule
  317. if rhs[pos:pos+1] == (sym,):
  318. kitems.append((rule, self.skip(rule, pos+1)))
  319. core = kitems
  320. core.sort()
  321. tcore = tuple(core)
  322. if self.cores.has_key(tcore):
  323. return self.cores[tcore]
  324. #
  325. # Nope, doesn't exist. Compute it and the associated
  326. # \epsilon-nonkernel state together; we'll need it right away.
  327. #
  328. k = self.cores[tcore] = len(self.states)
  329. K, NK = _State(k, kitems), _State(k+1, [])
  330. self.states[k] = K
  331. predicted = {}
  332. edges = self.edges
  333. rules = self.newrules
  334. for X in K, NK:
  335. worklist = X.items
  336. for item in worklist:
  337. rule, pos = item
  338. lhs, rhs = rule
  339. if pos == len(rhs):
  340. X.complete.append(rule)
  341. continue
  342. nextSym = rhs[pos]
  343. key = (X.stateno, nextSym)
  344. if not rules.has_key(nextSym):
  345. if not edges.has_key(key):
  346. edges[key] = None
  347. X.T.append(nextSym)
  348. else:
  349. edges[key] = None
  350. if not predicted.has_key(nextSym):
  351. predicted[nextSym] = 1
  352. for prule in rules[nextSym]:
  353. ppos = self.skip(prule)
  354. new = (prule, ppos)
  355. NK.items.append(new)
  356. #
  357. # Problem: we know K needs generating, but we
  358. # don't yet know about NK. Can't commit anything
  359. # regarding NK to self.edges until we're sure. Should
  360. # we delay committing on both K and NK to avoid this
  361. # hacky code? This creates other problems..
  362. #
  363. if X is K:
  364. edges = {}
  365. if NK.items == []:
  366. return k
  367. #
  368. # Check for \epsilon-nonkernel's core. Unfortunately we
  369. # need to know the entire set of predicted nonterminals
  370. # to do this without accidentally duplicating states.
  371. #
  372. core = predicted.keys()
  373. core.sort()
  374. tcore = tuple(core)
  375. if self.cores.has_key(tcore):
  376. self.edges[(k, None)] = self.cores[tcore]
  377. return k
  378. nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
  379. self.edges.update(edges)
  380. self.states[nk] = NK
  381. return k
  382. def goto(self, state, sym):
  383. key = (state, sym)
  384. if not self.edges.has_key(key):
  385. #
  386. # No transitions from state on sym.
  387. #
  388. return None
  389. rv = self.edges[key]
  390. if rv is None:
  391. #
  392. # Target state isn't generated yet. Remedy this.
  393. #
  394. rv = self.makeState(state, sym)
  395. self.edges[key] = rv
  396. return rv
  397. def gotoT(self, state, t):
  398. return [self.goto(state, t)]
  399. def gotoST(self, state, st):
  400. rv = []
  401. for t in self.states[state].T:
  402. if st == t:
  403. rv.append(self.goto(state, t))
  404. return rv
  405. def add(self, set, item, i=None, predecessor=None, causal=None):
  406. if predecessor is None:
  407. if item not in set:
  408. set.append(item)
  409. else:
  410. key = (item, i)
  411. if item not in set:
  412. self.links[key] = []
  413. set.append(item)
  414. self.links[key].append((predecessor, causal))
  415. def makeSet(self, token, sets, i):
  416. cur, next = sets[i], sets[i+1]
  417. ttype = token is not None and self.typestring(token) or None
  418. if ttype is not None:
  419. fn, arg = self.gotoT, ttype
  420. else:
  421. fn, arg = self.gotoST, token
  422. for item in cur:
  423. ptr = (item, i)
  424. state, parent = item
  425. add = fn(state, arg)
  426. for k in add:
  427. if k is not None:
  428. self.add(next, (k, parent), i+1, ptr)
  429. nk = self.goto(k, None)
  430. if nk is not None:
  431. self.add(next, (nk, i+1))
  432. if parent == i:
  433. continue
  434. for rule in self.states[state].complete:
  435. lhs, rhs = rule
  436. for pitem in sets[parent]:
  437. pstate, pparent = pitem
  438. k = self.goto(pstate, lhs)
  439. if k is not None:
  440. why = (item, i, rule)
  441. pptr = (pitem, parent)
  442. self.add(cur, (k, pparent),
  443. i, pptr, why)
  444. nk = self.goto(k, None)
  445. if nk is not None:
  446. self.add(cur, (nk, i))
  447. def makeSet_fast(self, token, sets, i):
  448. #
  449. # Call *only* when the entire state machine has been built!
  450. # It relies on self.edges being filled in completely, and
  451. # then duplicates and inlines code to boost speed at the
  452. # cost of extreme ugliness.
  453. #
  454. cur, next = sets[i], sets[i+1]
  455. ttype = token is not None and self.typestring(token) or None
  456. for item in cur:
  457. ptr = (item, i)
  458. state, parent = item
  459. if ttype is not None:
  460. k = self.edges.get((state, ttype), None)
  461. if k is not None:
  462. #self.add(next, (k, parent), i+1, ptr)
  463. #INLINED --v
  464. new = (k, parent)
  465. key = (new, i+1)
  466. if new not in next:
  467. self.links[key] = []
  468. next.append(new)
  469. self.links[key].append((ptr, None))
  470. #INLINED --^
  471. #nk = self.goto(k, None)
  472. nk = self.edges.get((k, None), None)
  473. if nk is not None:
  474. #self.add(next, (nk, i+1))
  475. #INLINED --v
  476. new = (nk, i+1)
  477. if new not in next:
  478. next.append(new)
  479. #INLINED --^
  480. else:
  481. add = self.gotoST(state, token)
  482. for k in add:
  483. if k is not None:
  484. self.add(next, (k, parent), i+1, ptr)
  485. #nk = self.goto(k, None)
  486. nk = self.edges.get((k, None), None)
  487. if nk is not None:
  488. self.add(next, (nk, i+1))
  489. if parent == i:
  490. continue
  491. for rule in self.states[state].complete:
  492. lhs, rhs = rule
  493. for pitem in sets[parent]:
  494. pstate, pparent = pitem
  495. #k = self.goto(pstate, lhs)
  496. k = self.edges.get((pstate, lhs), None)
  497. if k is not None:
  498. why = (item, i, rule)
  499. pptr = (pitem, parent)
  500. #self.add(cur, (k, pparent),
  501. # i, pptr, why)
  502. #INLINED --v
  503. new = (k, pparent)
  504. key = (new, i)
  505. if new not in cur:
  506. self.links[key] = []
  507. cur.append(new)
  508. self.links[key].append((pptr, why))
  509. #INLINED --^
  510. #nk = self.goto(k, None)
  511. nk = self.edges.get((k, None), None)
  512. if nk is not None:
  513. #self.add(cur, (nk, i))
  514. #INLINED --v
  515. new = (nk, i)
  516. if new not in cur:
  517. cur.append(new)
  518. #INLINED --^
  519. def predecessor(self, key, causal):
  520. for p, c in self.links[key]:
  521. if c == causal:
  522. return p
  523. assert 0
  524. def causal(self, key):
  525. links = self.links[key]
  526. if len(links) == 1:
  527. return links[0][1]
  528. choices = []
  529. rule2cause = {}
  530. for p, c in links:
  531. rule = c[2]
  532. choices.append(rule)
  533. rule2cause[rule] = c
  534. return rule2cause[self.ambiguity(choices)]
  535. def deriveEpsilon(self, nt):
  536. if len(self.newrules[nt]) > 1:
  537. rule = self.ambiguity(self.newrules[nt])
  538. else:
  539. rule = self.newrules[nt][0]
  540. #print rule
  541. rhs = rule[1]
  542. attr = [None] * len(rhs)
  543. for i in range(len(rhs)-1, -1, -1):
  544. attr[i] = self.deriveEpsilon(rhs[i])
  545. return self.rule2func[self.new2old[rule]](attr)
  546. def buildTree(self, nt, item, tokens, k):
  547. state, parent = item
  548. choices = []
  549. for rule in self.states[state].complete:
  550. if rule[0] == nt:
  551. choices.append(rule)
  552. rule = choices[0]
  553. if len(choices) > 1:
  554. rule = self.ambiguity(choices)
  555. #print rule
  556. rhs = rule[1]
  557. attr = [None] * len(rhs)
  558. for i in range(len(rhs)-1, -1, -1):
  559. sym = rhs[i]
  560. if not self.newrules.has_key(sym):
  561. if sym != self._BOF:
  562. attr[i] = tokens[k-1]
  563. key = (item, k)
  564. item, k = self.predecessor(key, None)
  565. #elif self.isnullable(sym):
  566. elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
  567. attr[i] = self.deriveEpsilon(sym)
  568. else:
  569. key = (item, k)
  570. why = self.causal(key)
  571. attr[i] = self.buildTree(sym, why[0],
  572. tokens, why[1])
  573. item, k = self.predecessor(key, why)
  574. return self.rule2func[self.new2old[rule]](attr)
  575. def ambiguity(self, rules):
  576. #
  577. # XXX - problem here and in collectRules() if the same rule
  578. # appears in >1 method. Also undefined results if rules
  579. # causing the ambiguity appear in the same method.
  580. #
  581. sortlist = []
  582. name2index = {}
  583. for i in range(len(rules)):
  584. lhs, rhs = rule = rules[i]
  585. name = self.rule2name[self.new2old[rule]]
  586. sortlist.append((len(rhs), name))
  587. name2index[name] = i
  588. sortlist.sort()
  589. list = map(lambda (a,b): b, sortlist)
  590. return rules[name2index[self.resolve(list)]]
  591. def resolve(self, list):
  592. #
  593. # Resolve ambiguity in favor of the shortest RHS.
  594. # Since we walk the tree from the top down, this
  595. # should effectively resolve in favor of a "shift".
  596. #
  597. return list[0]
  598. #
  599. # GenericASTBuilder automagically constructs a concrete/abstract syntax tree
  600. # for a given input. The extra argument is a class (not an instance!)
  601. # which supports the "__setslice__" and "__len__" methods.
  602. #
  603. # XXX - silently overrides any user code in methods.
  604. #
  605. class GenericASTBuilder(GenericParser):
  606. def __init__(self, AST, start):
  607. GenericParser.__init__(self, start)
  608. self.AST = AST
  609. def preprocess(self, rule, func):
  610. rebind = lambda lhs, self=self: \
  611. lambda args, lhs=lhs, self=self: \
  612. self.buildASTNode(args, lhs)
  613. lhs, rhs = rule
  614. return rule, rebind(lhs)
  615. def buildASTNode(self, args, lhs):
  616. children = []
  617. for arg in args:
  618. if isinstance(arg, self.AST):
  619. children.append(arg)
  620. else:
  621. children.append(self.terminal(arg))
  622. return self.nonterminal(lhs, children)
  623. def terminal(self, token): return token
  624. def nonterminal(self, type, args):
  625. rv = self.AST(type)
  626. rv[:len(args)] = args
  627. return rv
  628. #
  629. # GenericASTTraversal is a Visitor pattern according to Design Patterns. For
  630. # each node it attempts to invoke the method n_<node type>, falling
  631. # back onto the default() method if the n_* can't be found. The preorder
  632. # traversal also looks for an exit hook named n_<node type>_exit (no default
  633. # routine is called if it's not found). To prematurely halt traversal
  634. # of a subtree, call the prune() method -- this only makes sense for a
  635. # preorder traversal. Node type is determined via the typestring() method.
  636. #
  637. class GenericASTTraversalPruningException:
  638. pass
  639. class GenericASTTraversal:
  640. def __init__(self, ast):
  641. self.ast = ast
  642. def typestring(self, node):
  643. return node.type
  644. def prune(self):
  645. raise GenericASTTraversalPruningException
  646. def preorder(self, node=None):
  647. if node is None:
  648. node = self.ast
  649. try:
  650. name = 'n_' + self.typestring(node)
  651. if hasattr(self, name):
  652. func = getattr(self, name)
  653. func(node)
  654. else:
  655. self.default(node)
  656. except GenericASTTraversalPruningException:
  657. return
  658. for kid in node:
  659. self.preorder(kid)
  660. name = name + '_exit'
  661. if hasattr(self, name):
  662. func = getattr(self, name)
  663. func(node)
  664. def postorder(self, node=None):
  665. if node is None:
  666. node = self.ast
  667. for kid in node:
  668. self.postorder(kid)
  669. name = 'n_' + self.typestring(node)
  670. if hasattr(self, name):
  671. func = getattr(self, name)
  672. func(node)
  673. else:
  674. self.default(node)
  675. def default(self, node):
  676. pass
  677. #
  678. # GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__"
  679. # implemented.
  680. #
  681. # XXX - makes assumptions about how GenericParser walks the parse tree.
  682. #
  683. class GenericASTMatcher(GenericParser):
  684. def __init__(self, start, ast):
  685. GenericParser.__init__(self, start)
  686. self.ast = ast
  687. def preprocess(self, rule, func):
  688. rebind = lambda func, self=self: \
  689. lambda args, func=func, self=self: \
  690. self.foundMatch(args, func)
  691. lhs, rhs = rule
  692. rhslist = list(rhs)
  693. rhslist.reverse()
  694. return (lhs, tuple(rhslist)), rebind(func)
  695. def foundMatch(self, args, func):
  696. func(args[-1])
  697. return args[-1]
  698. def match_r(self, node):
  699. self.input.insert(0, node)
  700. children = 0
  701. for child in node:
  702. if children == 0:
  703. self.input.insert(0, '(')
  704. children = children + 1
  705. self.match_r(child)
  706. if children > 0:
  707. self.input.insert(0, ')')
  708. def match(self, ast=None):
  709. if ast is None:
  710. ast = self.ast
  711. self.input = []
  712. self.match_r(ast)
  713. self.parse(self.input)
  714. def resolve(self, list):
  715. #
  716. # Resolve ambiguity in favor of the longest RHS.
  717. #
  718. return list[-1]
  719. def _dump(tokens, sets, states):
  720. for i in range(len(sets)):
  721. print 'set', i
  722. for item in sets[i]:
  723. print '\t', item
  724. for (lhs, rhs), pos in states[item[0]].items:
  725. print '\t\t', lhs, '::=',
  726. print string.join(rhs[:pos]),
  727. print '.',
  728. print string.join(rhs[pos:])
  729. if i < len(tokens):
  730. print
  731. print 'token', str(tokens[i])
  732. print