PageRenderTime 56ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/Python/system/spark.py

https://bitbucket.org/cwalther/moulscript-dlanor
Python | 566 lines | 516 code | 22 blank | 28 comment | 20 complexity | 1e9c61923ba6e86ffd92012b9250df2e MD5 | raw file
Possible License(s): AGPL-1.0, GPL-3.0
  1. # Copyright (c) 1998-2000 John Aycock
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining
  4. # a copy of this software and associated documentation files (the
  5. # "Software"), to deal in the Software without restriction, including
  6. # without limitation the rights to use, copy, modify, merge, publish,
  7. # distribute, sublicense, and/or sell copies of the Software, and to
  8. # permit persons to whom the Software is furnished to do so, subject to
  9. # the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be
  12. # included in all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. __version__ = 'SPARK-0.6.1'
  22. import re
  23. import sys
  24. import string
  25. def _namelist(instance):
  26. namelist, namedict, classlist = [], {}, [instance.__class__]
  27. for c in classlist:
  28. for b in c.__bases__:
  29. classlist.append(b)
  30. for name in dir(c):
  31. if not namedict.has_key(name):
  32. namelist.append(name)
  33. namedict[name] = 1
  34. return namelist
  35. class GenericScanner:
  36. def __init__(self):
  37. pattern = self.reflect()
  38. self.re = re.compile(pattern, re.VERBOSE)
  39. self.index2func = {}
  40. for name, number in self.re.groupindex.items():
  41. self.index2func[number-1] = getattr(self, 't_' + name)
  42. def makeRE(self, name):
  43. doc = getattr(self, name).__doc__
  44. rv = '(?P<%s>%s)' % (name[2:], doc)
  45. return rv
  46. def reflect(self):
  47. rv = []
  48. for name in _namelist(self):
  49. if name[:2] == 't_' and name != 't_default':
  50. rv.append(self.makeRE(name))
  51. rv.append(self.makeRE('t_default'))
  52. return string.join(rv, '|')
  53. def error(self, s, pos):
  54. print "Lexical error at position %s" % pos
  55. raise SystemExit
  56. def tokenize(self, s):
  57. pos = 0
  58. n = len(s)
  59. while pos < n:
  60. m = self.re.match(s, pos)
  61. if m is None:
  62. self.error(s, pos)
  63. groups = m.groups()
  64. for i in range(len(groups)):
  65. if groups[i] and self.index2func.has_key(i):
  66. self.index2func[i](groups[i])
  67. pos = m.end()
  68. def t_default(self, s):
  69. r'( . | \n )+'
  70. pass
  71. class GenericParser:
  72. def __init__(self, start):
  73. self.rules = {}
  74. self.rule2func = {}
  75. self.rule2name = {}
  76. self.collectRules()
  77. self.startRule = self.augment(start)
  78. self.ruleschanged = 1
  79. _START = 'START'
  80. _EOF = 'EOF'
  81. #
  82. # A hook for GenericASTBuilder and GenericASTMatcher.
  83. #
  84. def preprocess(self, rule, func): return rule, func
  85. def addRule(self, doc, func):
  86. rules = string.split(doc)
  87. index = []
  88. for i in range(len(rules)):
  89. if rules[i] == '::=':
  90. index.append(i-1)
  91. index.append(len(rules))
  92. for i in range(len(index)-1):
  93. lhs = rules[index[i]]
  94. rhs = rules[index[i]+2:index[i+1]]
  95. rule = (lhs, tuple(rhs))
  96. rule, fn = self.preprocess(rule, func)
  97. if self.rules.has_key(lhs):
  98. self.rules[lhs].append(rule)
  99. else:
  100. self.rules[lhs] = [ rule ]
  101. self.rule2func[rule] = fn
  102. self.rule2name[rule] = func.__name__[2:]
  103. self.ruleschanged = 1
  104. def collectRules(self):
  105. for name in _namelist(self):
  106. if name[:2] == 'p_':
  107. func = getattr(self, name)
  108. doc = func.__doc__
  109. self.addRule(doc, func)
  110. def augment(self, start):
  111. #
  112. # Tempting though it is, this isn't made into a call
  113. # to self.addRule() because the start rule shouldn't
  114. # be subject to preprocessing.
  115. #
  116. startRule = (self._START, ( start, self._EOF ))
  117. self.rule2func[startRule] = lambda args: args[0]
  118. self.rules[self._START] = [ startRule ]
  119. self.rule2name[startRule] = ''
  120. return startRule
  121. def makeFIRST(self):
  122. union = {}
  123. self.first = {}
  124. for rulelist in self.rules.values():
  125. for lhs, rhs in rulelist:
  126. if not self.first.has_key(lhs):
  127. self.first[lhs] = {}
  128. if len(rhs) == 0:
  129. self.first[lhs][None] = 1
  130. continue
  131. sym = rhs[0]
  132. if not self.rules.has_key(sym):
  133. self.first[lhs][sym] = 1
  134. else:
  135. union[(sym, lhs)] = 1
  136. changes = 1
  137. while changes:
  138. changes = 0
  139. for src, dest in union.keys():
  140. destlen = len(self.first[dest])
  141. self.first[dest].update(self.first[src])
  142. if len(self.first[dest]) != destlen:
  143. changes = 1
  144. #
  145. # An Earley parser, as per J. Earley, "An Efficient Context-Free
  146. # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
  147. # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
  148. # Carnegie-Mellon University, August 1968, p. 27.
  149. #
  150. def typestring(self, token):
  151. return None
  152. def error(self, token):
  153. print "Syntax error at or near `%s' token" % token
  154. raise SystemExit
  155. def parse(self, tokens):
  156. tree = {}
  157. tokens.append(self._EOF)
  158. states = { 0: [ (self.startRule, 0, 0) ] }
  159. if self.ruleschanged:
  160. self.makeFIRST()
  161. for i in xrange(len(tokens)):
  162. states[i+1] = []
  163. if states[i] == []:
  164. break
  165. self.buildState(tokens[i], states, i, tree)
  166. #_dump(tokens, states)
  167. if i < len(tokens)-1 or states[i+1] != [(self.startRule, 2, 0)]:
  168. del tokens[-1]
  169. self.error(tokens[i-1])
  170. rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1))
  171. del tokens[-1]
  172. return rv
  173. def buildState(self, token, states, i, tree):
  174. needsCompletion = {}
  175. state = states[i]
  176. predicted = {}
  177. for item in state:
  178. rule, pos, parent = item
  179. lhs, rhs = rule
  180. #
  181. # A -> a . (completer)
  182. #
  183. if pos == len(rhs):
  184. if len(rhs) == 0:
  185. needsCompletion[lhs] = (item, i)
  186. for pitem in states[parent]:
  187. if pitem is item:
  188. break
  189. prule, ppos, pparent = pitem
  190. plhs, prhs = prule
  191. if prhs[ppos:ppos+1] == (lhs,):
  192. new = (prule,
  193. ppos+1,
  194. pparent)
  195. if new not in state:
  196. state.append(new)
  197. tree[(new, i)] = [(item, i)]
  198. else:
  199. tree[(new, i)].append((item, i))
  200. continue
  201. nextSym = rhs[pos]
  202. #
  203. # A -> a . B (predictor)
  204. #
  205. if self.rules.has_key(nextSym):
  206. #
  207. # Work on completer step some more; for rules
  208. # with empty RHS, the "parent state" is the
  209. # current state we're adding Earley items to,
  210. # so the Earley items the completer step needs
  211. # may not all be present when it runs.
  212. #
  213. if needsCompletion.has_key(nextSym):
  214. new = (rule, pos+1, parent)
  215. olditem_i = needsCompletion[nextSym]
  216. if new not in state:
  217. state.append(new)
  218. tree[(new, i)] = [olditem_i]
  219. else:
  220. tree[(new, i)].append(olditem_i)
  221. #
  222. # Has this been predicted already?
  223. #
  224. if predicted.has_key(nextSym):
  225. continue
  226. predicted[nextSym] = 1
  227. ttype = token is not self._EOF and \
  228. self.typestring(token) or \
  229. None
  230. if ttype is not None:
  231. #
  232. # Even smarter predictor, when the
  233. # token's type is known. The code is
  234. # grungy, but runs pretty fast. Three
  235. # cases are looked for: rules with
  236. # empty RHS; first symbol on RHS is a
  237. # terminal; first symbol on RHS is a
  238. # nonterminal (and isn't nullable).
  239. #
  240. for prule in self.rules[nextSym]:
  241. new = (prule, 0, i)
  242. prhs = prule[1]
  243. if len(prhs) == 0:
  244. state.append(new)
  245. continue
  246. prhs0 = prhs[0]
  247. if not self.rules.has_key(prhs0):
  248. if prhs0 != ttype:
  249. continue
  250. else:
  251. state.append(new)
  252. continue
  253. first = self.first[prhs0]
  254. if not first.has_key(None) and \
  255. not first.has_key(ttype):
  256. continue
  257. state.append(new)
  258. continue
  259. for prule in self.rules[nextSym]:
  260. #
  261. # Smarter predictor, as per Grune &
  262. # Jacobs' _Parsing Techniques_. Not
  263. # as good as FIRST sets though.
  264. #
  265. prhs = prule[1]
  266. if len(prhs) > 0 and \
  267. not self.rules.has_key(prhs[0]) and \
  268. token != prhs[0]:
  269. continue
  270. state.append((prule, 0, i))
  271. #
  272. # A -> a . c (scanner)
  273. #
  274. elif token == nextSym:
  275. #assert new not in states[i+1]
  276. states[i+1].append((rule, pos+1, parent))
  277. def buildTree(self, tokens, tree, root):
  278. stack = []
  279. self.buildTree_r(stack, tokens, -1, tree, root)
  280. return stack[0]
  281. def buildTree_r(self, stack, tokens, tokpos, tree, root):
  282. (rule, pos, parent), state = root
  283. while pos > 0:
  284. want = ((rule, pos, parent), state)
  285. if not tree.has_key(want):
  286. #
  287. # Since pos > 0, it didn't come from closure,
  288. # and if it isn't in tree[], then there must
  289. # be a terminal symbol to the left of the dot.
  290. # (It must be from a "scanner" step.)
  291. #
  292. pos = pos - 1
  293. state = state - 1
  294. stack.insert(0, tokens[tokpos])
  295. tokpos = tokpos - 1
  296. else:
  297. #
  298. # There's a NT to the left of the dot.
  299. # Follow the tree pointer recursively (>1
  300. # tree pointers from it indicates ambiguity).
  301. # Since the item must have come about from a
  302. # "completer" step, the state where the item
  303. # came from must be the parent state of the
  304. # item the tree pointer points to.
  305. #
  306. children = tree[want]
  307. if len(children) > 1:
  308. child = self.ambiguity(children)
  309. else:
  310. child = children[0]
  311. tokpos = self.buildTree_r(stack,
  312. tokens, tokpos,
  313. tree, child)
  314. pos = pos - 1
  315. (crule, cpos, cparent), cstate = child
  316. state = cparent
  317. lhs, rhs = rule
  318. result = self.rule2func[rule](stack[:len(rhs)])
  319. stack[:len(rhs)] = [result]
  320. return tokpos
  321. def ambiguity(self, children):
  322. #
  323. # XXX - problem here and in collectRules() if the same
  324. # rule appears in >1 method. But in that case the
  325. # user probably gets what they deserve :-) Also
  326. # undefined results if rules causing the ambiguity
  327. # appear in the same method.
  328. #
  329. sortlist = []
  330. name2index = {}
  331. for i in range(len(children)):
  332. ((rule, pos, parent), index) = children[i]
  333. lhs, rhs = rule
  334. name = self.rule2name[rule]
  335. sortlist.append((len(rhs), name))
  336. name2index[name] = i
  337. sortlist.sort()
  338. list = map(lambda (a,b): b, sortlist)
  339. return children[name2index[self.resolve(list)]]
  340. def resolve(self, list):
  341. #
  342. # Resolve ambiguity in favor of the shortest RHS.
  343. # Since we walk the tree from the top down, this
  344. # should effectively resolve in favor of a "shift".
  345. #
  346. return list[0]
  347. #
  348. # GenericASTBuilder automagically constructs a concrete/abstract syntax tree
  349. # for a given input. The extra argument is a class (not an instance!)
  350. # which supports the "__setslice__" and "__len__" methods.
  351. #
  352. # XXX - silently overrides any user code in methods.
  353. #
  354. class GenericASTBuilder(GenericParser):
  355. def __init__(self, AST, start):
  356. GenericParser.__init__(self, start)
  357. self.AST = AST
  358. def preprocess(self, rule, func):
  359. rebind = lambda lhs, self=self: \
  360. lambda args, lhs=lhs, self=self: \
  361. self.buildASTNode(args, lhs)
  362. lhs, rhs = rule
  363. return rule, rebind(lhs)
  364. def buildASTNode(self, args, lhs):
  365. children = []
  366. for arg in args:
  367. if isinstance(arg, self.AST):
  368. children.append(arg)
  369. else:
  370. children.append(self.terminal(arg))
  371. return self.nonterminal(lhs, children)
  372. def terminal(self, token): return token
  373. def nonterminal(self, type, args):
  374. rv = self.AST(type)
  375. rv[:len(args)] = args
  376. return rv
  377. #
  378. # GenericASTTraversal is a Visitor pattern according to Design Patterns. For
  379. # each node it attempts to invoke the method n_<node type>, falling
  380. # back onto the default() method if the n_* can't be found. The preorder
  381. # traversal also looks for an exit hook named n_<node type>_exit (no default
  382. # routine is called if it's not found). To prematurely halt traversal
  383. # of a subtree, call the prune() method -- this only makes sense for a
  384. # preorder traversal. Node type is determined via the typestring() method.
  385. #
  386. class GenericASTTraversalPruningException:
  387. pass
  388. class GenericASTTraversal:
  389. def __init__(self, ast):
  390. self.ast = ast
  391. def typestring(self, node):
  392. return node.type
  393. def prune(self):
  394. raise GenericASTTraversalPruningException
  395. def preorder(self, node=None):
  396. if node is None:
  397. node = self.ast
  398. try:
  399. name = 'n_' + self.typestring(node)
  400. if hasattr(self, name):
  401. func = getattr(self, name)
  402. func(node)
  403. else:
  404. self.default(node)
  405. except GenericASTTraversalPruningException:
  406. return
  407. for kid in node:
  408. self.preorder(kid)
  409. name = name + '_exit'
  410. if hasattr(self, name):
  411. func = getattr(self, name)
  412. func(node)
  413. def postorder(self, node=None):
  414. if node is None:
  415. node = self.ast
  416. for kid in node:
  417. self.postorder(kid)
  418. name = 'n_' + self.typestring(node)
  419. if hasattr(self, name):
  420. func = getattr(self, name)
  421. func(node)
  422. else:
  423. self.default(node)
  424. def default(self, node):
  425. pass
  426. #
  427. # GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__"
  428. # implemented.
  429. #
  430. # XXX - makes assumptions about how GenericParser walks the parse tree.
  431. #
  432. class GenericASTMatcher(GenericParser):
  433. def __init__(self, start, ast):
  434. GenericParser.__init__(self, start)
  435. self.ast = ast
  436. def preprocess(self, rule, func):
  437. rebind = lambda func, self=self: \
  438. lambda args, func=func, self=self: \
  439. self.foundMatch(args, func)
  440. lhs, rhs = rule
  441. rhslist = list(rhs)
  442. rhslist.reverse()
  443. return (lhs, tuple(rhslist)), rebind(func)
  444. def foundMatch(self, args, func):
  445. func(args[-1])
  446. return args[-1]
  447. def match_r(self, node):
  448. self.input.insert(0, node)
  449. children = 0
  450. for child in node:
  451. if children == 0:
  452. self.input.insert(0, '(')
  453. children = children + 1
  454. self.match_r(child)
  455. if children > 0:
  456. self.input.insert(0, ')')
  457. def match(self, ast=None):
  458. if ast is None:
  459. ast = self.ast
  460. self.input = []
  461. self.match_r(ast)
  462. self.parse(self.input)
  463. def resolve(self, list):
  464. #
  465. # Resolve ambiguity in favor of the longest RHS.
  466. #
  467. return list[-1]
  468. def _dump(tokens, states):
  469. for i in range(len(states)):
  470. print 'state', i
  471. for (lhs, rhs), pos, parent in states[i]:
  472. print '\t', lhs, '::=',
  473. print string.join(rhs[:pos]),
  474. print '.',
  475. print string.join(rhs[pos:]),
  476. print ',', parent
  477. if i < len(tokens):
  478. print
  479. print 'token', str(tokens[i])
  480. print