PageRenderTime 69ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/Dependencies/boo/lib/antlr-2.7.5/lib/python/antlr/antlr.py

https://github.com/w4x/boolangstudio
Python | 2800 lines | 2690 code | 60 blank | 50 comment | 44 complexity | 64c94f5ddc5ce79ddd6389eafc8bda70 MD5 | raw file
Possible License(s): GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. ## This file is part of PyANTLR. See LICENSE.txt for license
  2. ## details..........Copyright (C) Wolfgang Haefelinger, 2004.
  3. ## get sys module
  4. import sys
  5. version = sys.version.split()[0]
  6. if version < '2.2.1':
  7. False = 0
  8. if version < '2.3':
  9. True = not False
  10. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  11. ### global symbols ###
  12. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  13. ### ANTLR Standard Tokens
  14. SKIP = -1
  15. INVALID_TYPE = 0
  16. EOF_TYPE = 1
  17. EOF = 1
  18. NULL_TREE_LOOKAHEAD = 3
  19. MIN_USER_TYPE = 4
  20. ### ANTLR's EOF Symbol
  21. EOF_CHAR = ''
  22. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  23. ### general functions ###
  24. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  25. def error(fmt,*args):
  26. if fmt:
  27. print "error: ", fmt % tuple(args)
  28. def ifelse(cond,_then,_else):
  29. if cond :
  30. r = _then
  31. else:
  32. r = _else
  33. return r
  34. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  35. ### ANTLR Exceptions ###
  36. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  37. class ANTLRException(Exception):
  38. def __init__(self, *args):
  39. Exception.__init__(self, *args)
  40. class RecognitionException(ANTLRException):
  41. def __init__(self, *args):
  42. ANTLRException.__init__(self, *args)
  43. self.fileName = None
  44. self.line = -1
  45. self.column = -1
  46. if len(args) >= 2:
  47. self.fileName = args[1]
  48. if len(args) >= 3:
  49. self.line = args[2]
  50. if len(args) >= 4:
  51. self.column = args[3]
  52. def __str__(self):
  53. buf = ['']
  54. if self.fileName:
  55. buf.append(self.fileName + ":")
  56. if self.line != -1:
  57. if not self.fileName:
  58. buf.append("line ")
  59. buf.append(str(self.line))
  60. if self.column != -1:
  61. buf.append(":" + str(self.column))
  62. buf.append(":")
  63. buf.append(" ")
  64. return str('').join(buf)
  65. __repr__ = __str__
  66. class NoViableAltException(RecognitionException):
  67. def __init__(self, *args):
  68. RecognitionException.__init__(self, *args)
  69. self.token = None
  70. self.node = None
  71. if isinstance(args[0],AST):
  72. self.node = args[0]
  73. elif isinstance(args[0],Token):
  74. self.token = args[0]
  75. else:
  76. raise TypeError("NoViableAltException requires Token or AST argument")
  77. def __str__(self):
  78. if self.token:
  79. line = self.token.getLine()
  80. col = self.token.getColumn()
  81. text = self.token.getText()
  82. return "unexpected symbol at line %s (column %s): \"%s\"" % (line,col,text)
  83. if self.node == ASTNULL:
  84. return "unexpected end of subtree"
  85. assert self.node
  86. ### hackish, we assume that an AST contains method getText
  87. return "unexpected node: %s" % (self.node.getText())
  88. __repr__ = __str__
  89. class NoViableAltForCharException(RecognitionException):
  90. def __init__(self, *args):
  91. self.foundChar = None
  92. if len(args) == 2:
  93. self.foundChar = args[0]
  94. scanner = args[1]
  95. RecognitionException.__init__(self, "NoViableAlt",
  96. scanner.getFilename(),
  97. scanner.getLine(),
  98. scanner.getColumn())
  99. elif len(args) == 4:
  100. self.foundChar = args[0]
  101. fileName = args[1]
  102. line = args[2]
  103. column = args[3]
  104. RecognitionException.__init__(self, "NoViableAlt",
  105. fileName, line, column)
  106. else:
  107. RecognitionException.__init__(self, "NoViableAlt",
  108. '', -1, -1)
  109. def __str__(self):
  110. mesg = "unexpected char: "
  111. if self.foundChar >= ' ' and self.foundChar <= '~':
  112. mesg += "'" + self.foundChar + "'"
  113. elif self.foundChar:
  114. mesg += "0x" + hex(ord(self.foundChar)).upper()[2:]
  115. else:
  116. mesg += "<None>"
  117. return mesg
  118. __repr__ = __str__
  119. class SemanticException(RecognitionException):
  120. def __init__(self, *args):
  121. RecognitionException.__init__(self, *args)
  122. class MismatchedCharException(RecognitionException):
  123. NONE = 0
  124. CHAR = 1
  125. NOT_CHAR = 2
  126. RANGE = 3
  127. NOT_RANGE = 4
  128. SET = 5
  129. NOT_SET = 6
  130. def __init__(self, *args):
  131. self.args = args
  132. if len(args) == 5:
  133. # Expected range / not range
  134. if args[3]:
  135. self.mismatchType = MismatchedCharException.NOT_RANGE
  136. else:
  137. self.mismatchType = MismatchedCharException.RANGE
  138. self.foundChar = args[0]
  139. self.expecting = args[1]
  140. self.upper = args[2]
  141. self.scanner = args[4]
  142. RecognitionException.__init__(self, "Mismatched char range",
  143. self.scanner.getFilename(),
  144. self.scanner.getLine(),
  145. self.scanner.getColumn())
  146. elif len(args) == 4 and isinstance(args[1], str):
  147. # Expected char / not char
  148. if args[2]:
  149. self.mismatchType = MismatchedCharException.NOT_CHAR
  150. else:
  151. self.mismatchType = MismatchedCharException.CHAR
  152. self.foundChar = args[0]
  153. self.expecting = args[1]
  154. self.scanner = args[3]
  155. RecognitionException.__init__(self, "Mismatched char",
  156. self.scanner.getFilename(),
  157. self.scanner.getLine(),
  158. self.scanner.getColumn())
  159. elif len(args) == 4 and isinstance(args[1], BitSet):
  160. # Expected BitSet / not BitSet
  161. if args[2]:
  162. self.mismatchType = MismatchedCharException.NOT_SET
  163. else:
  164. self.mismatchType = MismatchedCharException.SET
  165. self.foundChar = args[0]
  166. self.set = args[1]
  167. self.scanner = args[3]
  168. RecognitionException.__init__(self, "Mismatched char set",
  169. self.scanner.getFilename(),
  170. self.scanner.getLine(),
  171. self.scanner.getColumn())
  172. else:
  173. self.mismatchType = MismatchedCharException.NONE
  174. RecognitionException.__init__(self, "Mismatched char")
  175. ## Append a char to the msg buffer. If special,
  176. # then show escaped version
  177. #
  178. def appendCharName(self, sb, c):
  179. if not c or c == 65535:
  180. # 65535 = (char) -1 = EOF
  181. sb.append("'<EOF>'")
  182. elif c == '\n':
  183. sb.append("'\\n'")
  184. elif c == '\r':
  185. sb.append("'\\r'");
  186. elif c == '\t':
  187. sb.append("'\\t'")
  188. else:
  189. sb.append('\'' + c + '\'')
  190. ##
  191. # Returns an error message with line number/column information
  192. #
  193. def __str__(self):
  194. sb = ['']
  195. sb.append(RecognitionException.__str__(self))
  196. if self.mismatchType == MismatchedCharException.CHAR:
  197. sb.append("expecting ")
  198. self.appendCharName(sb, self.expecting)
  199. sb.append(", found ")
  200. self.appendCharName(sb, self.foundChar)
  201. elif self.mismatchType == MismatchedCharException.NOT_CHAR:
  202. sb.append("expecting anything but '")
  203. self.appendCharName(sb, self.expecting)
  204. sb.append("'; got it anyway")
  205. elif self.mismatchType in [MismatchedCharException.RANGE, MismatchedCharException.NOT_RANGE]:
  206. sb.append("expecting char ")
  207. if self.mismatchType == MismatchedCharException.NOT_RANGE:
  208. sb.append("NOT ")
  209. sb.append("in range: ")
  210. appendCharName(sb, self.expecting)
  211. sb.append("..")
  212. appendCharName(sb, self.upper)
  213. sb.append(", found ")
  214. appendCharName(sb, self.foundChar)
  215. elif self.mismatchType in [MismatchedCharException.SET, MismatchedCharException.NOT_SET]:
  216. sb.append("expecting ")
  217. if self.mismatchType == MismatchedCharException.NOT_SET:
  218. sb.append("NOT ")
  219. sb.append("one of (")
  220. for i in range(len(self.set)):
  221. self.appendCharName(sb, self.set[i])
  222. sb.append("), found ")
  223. self.appendCharName(sb, self.foundChar)
  224. return str().join(sb).strip()
  225. __repr__ = __str__
  226. class MismatchedTokenException(RecognitionException):
  227. NONE = 0
  228. TOKEN = 1
  229. NOT_TOKEN = 2
  230. RANGE = 3
  231. NOT_RANGE = 4
  232. SET = 5
  233. NOT_SET = 6
  234. def __init__(self, *args):
  235. self.args = args
  236. self.tokenNames = []
  237. self.token = None
  238. self.tokenText = ''
  239. self.node = None
  240. if len(args) == 6:
  241. # Expected range / not range
  242. if args[3]:
  243. self.mismatchType = MismatchedTokenException.NOT_RANGE
  244. else:
  245. self.mismatchType = MismatchedTokenException.RANGE
  246. self.tokenNames = args[0]
  247. self.expecting = args[2]
  248. self.upper = args[3]
  249. self.fileName = args[5]
  250. elif len(args) == 4 and isinstance(args[2], int):
  251. # Expected token / not token
  252. if args[3]:
  253. self.mismatchType = MismatchedTokenException.NOT_TOKEN
  254. else:
  255. self.mismatchType = MismatchedTokenException.TOKEN
  256. self.tokenNames = args[0]
  257. self.expecting = args[2]
  258. elif len(args) == 4 and isinstance(args[2], BitSet):
  259. # Expected BitSet / not BitSet
  260. if args[3]:
  261. self.mismatchType = MismatchedTokenException.NOT_SET
  262. else:
  263. self.mismatchType = MismatchedTokenException.SET
  264. self.tokenNames = args[0]
  265. self.set = args[2]
  266. else:
  267. self.mismatchType = MismatchedTokenException.NONE
  268. RecognitionException.__init__(self, "Mismatched Token: expecting any AST node", "<AST>", -1, -1)
  269. if len(args) >= 2:
  270. if isinstance(args[1],Token):
  271. self.token = args[1]
  272. self.tokenText = self.token.getText()
  273. RecognitionException.__init__(self, "Mismatched Token",
  274. self.fileName,
  275. self.token.getLine(),
  276. self.token.getColumn())
  277. elif isinstance(args[1],AST):
  278. self.node = args[1]
  279. self.tokenText = str(self.node)
  280. RecognitionException.__init__(self, "Mismatched Token",
  281. "<AST>",
  282. self.node.getLine(),
  283. self.node.getColumn())
  284. else:
  285. self.tokenText = "<empty tree>"
  286. RecognitionException.__init__(self, "Mismatched Token",
  287. "<AST>", -1, -1)
  288. def appendTokenName(self, sb, tokenType):
  289. if tokenType == INVALID_TYPE:
  290. sb.append("<Set of tokens>")
  291. elif tokenType < 0 or tokenType >= len(self.tokenNames):
  292. sb.append("<" + str(tokenType) + ">")
  293. else:
  294. sb.append(self.tokenNames[tokenType])
  295. ##
  296. # Returns an error message with line number/column information
  297. #
  298. def __str__(self):
  299. sb = ['']
  300. sb.append(RecognitionException.__str__(self))
  301. if self.mismatchType == MismatchedTokenException.TOKEN:
  302. sb.append("expecting ")
  303. self.appendTokenName(sb, self.expecting)
  304. sb.append(", found " + self.tokenText)
  305. elif self.mismatchType == MismatchedTokenException.NOT_TOKEN:
  306. sb.append("expecting anything but '")
  307. self.appendTokenName(sb, self.expecting)
  308. sb.append("'; got it anyway")
  309. elif self.mismatchType in [MismatchedTokenException.RANGE, MismatchedTokenException.NOT_RANGE]:
  310. sb.append("expecting token ")
  311. if self.mismatchType == MismatchedTokenException.NOT_RANGE:
  312. sb.append("NOT ")
  313. sb.append("in range: ")
  314. appendTokenName(sb, self.expecting)
  315. sb.append("..")
  316. appendTokenName(sb, self.upper)
  317. sb.append(", found " + self.tokenText)
  318. elif self.mismatchType in [MismatchedTokenException.SET, MismatchedTokenException.NOT_SET]:
  319. sb.append("expecting ")
  320. if self.mismatchType == MismatchedTokenException.NOT_SET:
  321. sb.append("NOT ")
  322. sb.append("one of (")
  323. for i in range(len(self.set)):
  324. self.appendTokenName(sb, self.set[i])
  325. sb.append("), found " + self.tokenText)
  326. return str().join(sb).strip()
  327. __repr__ = __str__
  328. class TokenStreamException(ANTLRException):
  329. def __init__(self, *args):
  330. ANTLRException.__init__(self, *args)
  331. # Wraps an Exception in a TokenStreamException
  332. class TokenStreamIOException(TokenStreamException):
  333. def __init__(self, *args):
  334. if args and isinstance(args[0], Exception):
  335. io = args[0]
  336. TokenStreamException.__init__(self, str(io))
  337. self.io = io
  338. else:
  339. TokenStreamException.__init__(self, *args)
  340. self.io = self
  341. # Wraps a RecognitionException in a TokenStreamException
  342. class TokenStreamRecognitionException(TokenStreamException):
  343. def __init__(self, *args):
  344. if args and isinstance(args[0], RecognitionException):
  345. recog = args[0]
  346. TokenStreamException.__init__(self, str(recog))
  347. self.recog = recog
  348. else:
  349. raise TypeError("TokenStreamRecognitionException requires RecognitionException argument")
  350. def __str__(self):
  351. return str(self.recog)
  352. __repr__ = __str__
  353. class TokenStreamRetryException(TokenStreamException):
  354. def __init__(self, *args):
  355. TokenStreamException.__init__(self, *args)
  356. class CharStreamException(ANTLRException):
  357. def __init__(self, *args):
  358. ANTLRException.__init__(self, *args)
  359. # Wraps an Exception in a CharStreamException
  360. class CharStreamIOException(CharStreamException):
  361. def __init__(self, *args):
  362. if args and isinstance(args[0], Exception):
  363. io = args[0]
  364. CharStreamException.__init__(self, str(io))
  365. self.io = io
  366. else:
  367. CharStreamException.__init__(self, *args)
  368. self.io = self
  369. class TryAgain(Exception):
  370. pass
  371. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  372. ### Token ###
  373. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  374. class Token(object):
  375. SKIP = -1
  376. INVALID_TYPE = 0
  377. EOF_TYPE = 1
  378. EOF = 1
  379. NULL_TREE_LOOKAHEAD = 3
  380. MIN_USER_TYPE = 4
  381. def __init__(self,**argv):
  382. try:
  383. self.type = argv['type']
  384. except:
  385. self.type = INVALID_TYPE
  386. try:
  387. self.text = argv['text']
  388. except:
  389. self.text = "<no text>"
  390. def isEOF(self):
  391. return (self.type == EOF_TYPE)
  392. def getColumn(self):
  393. return 0
  394. def getLine(self):
  395. return 0
  396. def getFilename(self):
  397. return None
  398. def setFilename(self,name):
  399. return self
  400. def getText(self):
  401. return "<no text>"
  402. def setText(self,text):
  403. if isinstance(text,str):
  404. pass
  405. else:
  406. raise TypeError("Token.setText requires string argument")
  407. return self
  408. def setColumn(self,column):
  409. return self
  410. def setLine(self,line):
  411. return self
  412. def getType(self):
  413. return self.type
  414. def setType(self,type):
  415. if isinstance(type,int):
  416. self.type = type
  417. else:
  418. raise TypeError("Token.setType requires integer argument")
  419. return self
  420. def toString(self):
  421. ## not optimal
  422. type_ = self.type
  423. if type_ == 3:
  424. tval = 'NULL_TREE_LOOKAHEAD'
  425. elif type_ == 1:
  426. tval = 'EOF_TYPE'
  427. elif type_ == 0:
  428. tval = 'INVALID_TYPE'
  429. elif type_ == -1:
  430. tval = 'SKIP'
  431. else:
  432. tval = type_
  433. return '["%s",<%s>]' % (self.getText(),tval)
  434. __str__ = toString
  435. __repr__ = toString
  436. ### static attribute ..
  437. Token.badToken = Token( type=INVALID_TYPE, text="<no text>")
  438. if __name__ == "__main__":
  439. print "testing .."
  440. T = Token.badToken
  441. print T
  442. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  443. ### CommonToken ###
  444. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  445. class CommonToken(Token):
  446. def __init__(self,**argv):
  447. Token.__init__(self,**argv)
  448. self.line = 0
  449. self.col = 0
  450. try:
  451. self.line = argv['line']
  452. except:
  453. pass
  454. try:
  455. self.col = argv['col']
  456. except:
  457. pass
  458. def getLine(self):
  459. return self.line
  460. def getText(self):
  461. return self.text
  462. def getColumn(self):
  463. return self.col
  464. def setLine(self,line):
  465. self.line = line
  466. return self
  467. def setText(self,text):
  468. self.text = text
  469. return self
  470. def setColumn(self,col):
  471. self.col = col
  472. return self
  473. def toString(self):
  474. ## not optimal
  475. type_ = self.type
  476. if type_ == 3:
  477. tval = 'NULL_TREE_LOOKAHEAD'
  478. elif type_ == 1:
  479. tval = 'EOF_TYPE'
  480. elif type_ == 0:
  481. tval = 'INVALID_TYPE'
  482. elif type_ == -1:
  483. tval = 'SKIP'
  484. else:
  485. tval = type_
  486. d = {
  487. 'text' : self.text,
  488. 'type' : tval,
  489. 'line' : self.line,
  490. 'colm' : self.col
  491. }
  492. fmt = '["%(text)s",<%(type)s>,line=%(line)s,col=%(colm)s]'
  493. return fmt % d
  494. __str__ = toString
  495. __repr__ = toString
  496. if __name__ == '__main__' :
  497. T = CommonToken()
  498. print T
  499. T = CommonToken(col=15,line=1,text="some text", type=5)
  500. print T
  501. T = CommonToken()
  502. T.setLine(1).setColumn(15).setText("some text").setType(5)
  503. print T
  504. print T.getLine()
  505. print T.getColumn()
  506. print T.getText()
  507. print T.getType()
  508. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  509. ### CommonHiddenStreamToken ###
  510. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  511. class CommonHiddenStreamToken(CommonToken):
  512. def __init__(self,*args):
  513. CommonToken.__init__(self,*args)
  514. self.hiddenBefore = None
  515. self.hiddenAfter = None
  516. def getHiddenAfter(self):
  517. return self.hiddenAfter
  518. def getHiddenBefore(self):
  519. return self.hiddenBefore
  520. def setHiddenAfter(self,t):
  521. self.hiddenAfter = t
  522. def setHiddenBefore(self, t):
  523. self.hiddenBefore = t
  524. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  525. ### Queue ###
  526. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  527. ## Shall be a circular buffer on tokens ..
  528. class Queue(object):
  529. def __init__(self):
  530. self.buffer = [] # empty list
  531. def append(self,item):
  532. self.buffer.append(item)
  533. def elementAt(self,index):
  534. return self.buffer[index]
  535. def reset(self):
  536. self.buffer = []
  537. def removeFirst(self):
  538. self.buffer.pop(0)
  539. def length(self):
  540. return len(self.buffer)
  541. def __str__(self):
  542. return str(self.buffer)
  543. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  544. ### InputBuffer ###
  545. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  546. class InputBuffer(object):
  547. def __init__(self):
  548. self.nMarkers = 0
  549. self.markerOffset = 0
  550. self.numToConsume = 0
  551. self.queue = Queue()
  552. def __str__(self):
  553. return "(%s,%s,%s,%s)" % (
  554. self.nMarkers,
  555. self.markerOffset,
  556. self.numToConsume,
  557. self.queue)
  558. def __repr__(self):
  559. return str(self)
  560. def commit(self):
  561. self.nMarkers -= 1
  562. def consume(self) :
  563. self.numToConsume += 1
  564. ## probably better to return a list of items
  565. ## because of unicode. Or return a unicode
  566. ## string ..
  567. def getLAChars(self) :
  568. i = self.markerOffset
  569. n = self.queue.length()
  570. s = ''
  571. while i<n:
  572. s += self.queue.elementAt(i)
  573. return s
  574. ## probably better to return a list of items
  575. ## because of unicode chars
  576. def getMarkedChars(self) :
  577. s = ''
  578. i = 0
  579. n = self.markerOffset
  580. while i<n:
  581. s += self.queue.elementAt(i)
  582. return s
  583. def isMarked(self) :
  584. return self.nMarkers != 0
  585. def fill(self,k):
  586. ### abstract method
  587. raise NotImplementedError()
  588. def LA(self,k) :
  589. self.fill(k)
  590. return self.queue.elementAt(self.markerOffset + k - 1)
  591. def mark(self) :
  592. self.syncConsume()
  593. self.nMarkers += 1
  594. return self.markerOffset
  595. def rewind(self,mark) :
  596. self.syncConsume()
  597. self.markerOffset = mark
  598. self.nMarkers -= 1
  599. def reset(self) :
  600. self.nMarkers = 0
  601. self.markerOffset = 0
  602. self.numToConsume = 0
  603. self.queue.reset()
  604. def syncConsume(self) :
  605. while self.numToConsume > 0:
  606. if self.nMarkers > 0:
  607. # guess mode -- leave leading characters and bump offset.
  608. self.markerOffset += 1
  609. else:
  610. # normal mode -- remove first character
  611. self.queue.removeFirst()
  612. self.numToConsume -= 1
  613. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  614. ### CharBuffer ###
  615. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  616. class CharBuffer(InputBuffer):
  617. def __init__(self,reader):
  618. ##assert isinstance(reader,file)
  619. super(CharBuffer,self).__init__()
  620. ## a reader is supposed to be anything that has
  621. ## a method 'read(int)'.
  622. self.input = reader
  623. def __str__(self):
  624. base = super(CharBuffer,self).__str__()
  625. return "CharBuffer{%s,%s" % (base,str(input))
  626. def fill(self,amount):
  627. try:
  628. self.syncConsume()
  629. while self.queue.length() < (amount + self.markerOffset) :
  630. ## retrieve just one char - what happend at end
  631. ## of input?
  632. c = self.input.read(1)
  633. ### python's behaviour is to return the empty string on
  634. ### EOF, ie. no exception whatsoever is thrown. An empty
  635. ### python string has the nice feature that it is of
  636. ### type 'str' and "not ''" would return true. Contrary,
  637. ### one can't do this: '' in 'abc'. This should return
  638. ### false, but all we get is then a TypeError as an
  639. ### empty string is not a character.
  640. ### Let's assure then that we have either seen a
  641. ### character or an empty string (EOF).
  642. assert len(c) == 0 or len(c) == 1
  643. ### And it shall be of type string (ASCII or UNICODE).
  644. assert isinstance(c,str) or isinstance(c,unicode)
  645. ### Just append EOF char to buffer. Note that buffer may
  646. ### contain then just more than one EOF char ..
  647. ### use unicode chars instead of ASCII ..
  648. self.queue.append(c)
  649. except Exception,e:
  650. raise CharStreamIOException(e)
  651. ##except: # (mk) Cannot happen ...
  652. ##error ("unexpected exception caught ..")
  653. ##assert 0
  654. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  655. ### LexerSharedInputState ###
  656. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  657. class LexerSharedInputState(object):
  658. def __init__(self,ibuf):
  659. assert isinstance(ibuf,InputBuffer)
  660. self.input = ibuf
  661. self.column = 1
  662. self.line = 1
  663. self.tokenStartColumn = 1
  664. self.tokenStartLine = 1
  665. self.guessing = 0
  666. self.filename = None
  667. def reset(self):
  668. self.column = 1
  669. self.line = 1
  670. self.tokenStartColumn = 1
  671. self.tokenStartLine = 1
  672. self.guessing = 0
  673. self.filename = None
  674. self.input.reset()
  675. def LA(self,k):
  676. return self.input.LA(k)
  677. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  678. ### TokenStream ###
  679. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  680. class TokenStream(object):
  681. def nextToken(self):
  682. pass
  683. def __iter__(self):
  684. return TokenStreamIterator(self)
  685. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  686. ### TokenStreamIterator ###
  687. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  688. class TokenStreamIterator(object):
  689. def __init__(self,inst):
  690. if isinstance(inst,TokenStream):
  691. self.inst = inst
  692. return
  693. raise TypeError("TokenStreamIterator requires TokenStream object")
  694. def next(self):
  695. assert self.inst
  696. item = self.inst.nextToken()
  697. if not item or item.isEOF():
  698. raise StopIteration()
  699. return item
  700. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  701. ### TokenStreamSelector ###
  702. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  703. class TokenStreamSelector(TokenStream):
  704. def __init__(self):
  705. self._input = None
  706. self._stmap = {}
  707. self._stack = []
  708. def addInputStream(self,stream,key):
  709. self._stmap[key] = stream
  710. def getCurrentStream(self):
  711. return self._input
  712. def getStream(self,sname):
  713. try:
  714. stream = self._stmap[sname]
  715. except:
  716. raise ValueError("TokenStream " + sname + " not found");
  717. return stream;
  718. def nextToken(self):
  719. while 1:
  720. try:
  721. return self._input.nextToken()
  722. except TokenStreamRetryException,r:
  723. ### just retry "forever"
  724. pass
  725. def pop(self):
  726. stream = self._stack.pop();
  727. self.select(stream);
  728. return stream;
  729. def push(self,arg):
  730. self._stack.append(self._input);
  731. self.select(arg)
  732. def retry(self):
  733. raise TokenStreamRetryException()
  734. def select(self,arg):
  735. if isinstance(arg,TokenStream):
  736. self._input = arg
  737. return
  738. if isinstance(arg,str):
  739. self._input = self.getStream(arg)
  740. return
  741. raise TypeError("TokenStreamSelector.select requires " +
  742. "TokenStream or string argument")
  743. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  744. ### TokenStreamBasicFilter ###
  745. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  746. class TokenStreamBasicFilter(TokenStream):
  747. def __init__(self,input):
  748. self.input = input;
  749. self.discardMask = BitSet()
  750. def discard(self,arg):
  751. if isinstance(arg,int):
  752. self.discardMask.add(arg)
  753. return
  754. if isinstance(arg,BitSet):
  755. self.discardMark = arg
  756. return
  757. raise TypeError("TokenStreamBasicFilter.discard requires" +
  758. "integer or BitSet argument")
  759. def nextToken(self):
  760. tok = self.input.nextToken()
  761. while tok and self.discardMask.member(tok.getType()):
  762. tok = self.input.nextToken()
  763. return tok
  764. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  765. ### TokenStreamHiddenTokenFilter ###
  766. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  767. class TokenStreamHiddenTokenFilter(TokenStreamBasicFilter):
  768. def __init__(self,input):
  769. TokenStreamBasicFilter.__init__(self,input)
  770. self.hideMask = BitSet()
  771. self.nextMonitoredToken = None
  772. self.lastHiddenToken = None
  773. self.firstHidden = None
  774. def consume(self):
  775. self.nextMonitoredToken = self.input.nextToken()
  776. def consumeFirst(self):
  777. self.consume()
  778. p = None;
  779. while self.hideMask.member(self.LA(1).getType()) or \
  780. self.discardMask.member(self.LA(1).getType()):
  781. if self.hideMask.member(self.LA(1).getType()):
  782. if not p:
  783. p = self.LA(1)
  784. else:
  785. p.setHiddenAfter(self.LA(1))
  786. self.LA(1).setHiddenBefore(p)
  787. p = self.LA(1)
  788. self.lastHiddenToken = p
  789. if not self.firstHidden:
  790. self.firstHidden = p
  791. self.consume()
  792. def getDiscardMask(self):
  793. return self.discardMask
  794. def getHiddenAfter(self,t):
  795. return t.getHiddenAfter()
  796. def getHiddenBefore(self,t):
  797. return t.getHiddenBefore()
  798. def getHideMask(self):
  799. return self.hideMask
  800. def getInitialHiddenToken(self):
  801. return self.firstHidden
  802. def hide(self,m):
  803. if isinstance(m,int):
  804. self.hideMask.add(m)
  805. return
  806. if isinstance(m.BitMask):
  807. self.hideMask = m
  808. return
  809. def LA(self,i):
  810. return self.nextMonitoredToken
  811. def nextToken(self):
  812. if not self.LA(1):
  813. self.consumeFirst()
  814. monitored = self.LA(1)
  815. monitored.setHiddenBefore(self.lastHiddenToken)
  816. self.lastHiddenToken = None
  817. self.consume()
  818. p = monitored
  819. while self.hideMask.member(self.LA(1).getType()) or \
  820. self.discardMask.member(self.LA(1).getType()):
  821. if self.hideMask.member(self.LA(1).getType()):
  822. p.setHiddenAfter(self.LA(1))
  823. if p != monitored:
  824. self.LA(1).setHiddenBefore(p)
  825. p = self.lastHiddenToken = self.LA(1)
  826. self.consume()
  827. return monitored
  828. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  829. ### StringBuffer ###
  830. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  831. class StringBuffer:
  832. def __init__(self,string=None):
  833. if string:
  834. self.text = list(string)
  835. else:
  836. self.text = []
  837. def setLength(self,sz):
  838. if not sz :
  839. self.text = []
  840. return
  841. assert sz>0
  842. if sz >= self.length():
  843. return
  844. ### just reset to empty buffer
  845. self.text = self.text[0:sz]
  846. def length(self):
  847. return len(self.text)
  848. def append(self,c):
  849. self.text.append(c)
  850. ### return buffer as string. Arg 'a' is used as index
  851. ## into the buffer and 2nd argument shall be the length.
  852. ## If 2nd args is absent, we return chars till end of
  853. ## buffer starting with 'a'.
  854. def getString(self,a=None,length=None):
  855. if not a :
  856. a = 0
  857. assert a>=0
  858. if a>= len(self.text) :
  859. return ""
  860. if not length:
  861. ## no second argument
  862. L = self.text[a:]
  863. else:
  864. assert (a+length) <= len(self.text)
  865. b = a + length
  866. L = self.text[a:b]
  867. s = ""
  868. for x in L : s += x
  869. return s
  870. toString = getString ## alias
  871. def __str__(self):
  872. return str(self.text)
  873. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  874. ### Reader ###
  875. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  876. ## When reading Japanese chars, it happens that a stream returns a
  877. ## 'char' of length 2. This looks like a bug in the appropriate
  878. ## codecs - but I'm rather unsure about this. Anyway, if this is
  879. ## the case, I'm going to split this string into a list of chars
  880. ## and put them on hold, ie. on a buffer. Next time when called
  881. ## we read from buffer until buffer is empty.
  882. ## wh: nov, 25th -> problem does not appear in Python 2.4.0.c1.
  883. class Reader(object):
  884. def __init__(self,stream):
  885. self.cin = stream
  886. self.buf = []
  887. def read(self,num):
  888. assert num==1
  889. if len(self.buf):
  890. return self.buf.pop()
  891. ## Read a char - this may return a string.
  892. ## Is this a bug in codecs/Python?
  893. c = self.cin.read(1)
  894. if not c or len(c)==1:
  895. return c
  896. L = list(c)
  897. L.reverse()
  898. for x in L:
  899. self.buf.append(x)
  900. ## read one char ..
  901. return self.read(1)
  902. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  903. ### CharScanner ###
  904. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  905. class CharScanner(TokenStream):
  906. ## class members
  907. NO_CHAR = 0
  908. EOF_CHAR = '' ### EOF shall be the empty string.
  909. def __init__(self, *argv, **kwargs):
  910. super(CharScanner, self).__init__()
  911. self.saveConsumedInput = True
  912. self.tokenClass = None
  913. self.caseSensitive = True
  914. self.caseSensitiveLiterals = True
  915. self.literals = None
  916. self.tabsize = 8
  917. self._returnToken = None
  918. self.commitToPath = False
  919. self.traceDepth = 0
  920. self.text = StringBuffer()
  921. self.hashString = hash(self)
  922. self.setTokenObjectClass(CommonToken)
  923. self.setInput(*argv)
  924. def __iter__(self):
  925. return CharScannerIterator(self)
  926. def setInput(self,*argv):
  927. ## case 1:
  928. ## if there's no arg we default to read from
  929. ## standard input
  930. if not argv:
  931. import sys
  932. self.setInput(sys.stdin)
  933. return
  934. ## get 1st argument
  935. arg1 = argv[0]
  936. ## case 2:
  937. ## if arg1 is a string, we assume it's a file name
  938. ## and open a stream using 2nd argument as open
  939. ## mode. If there's no 2nd argument we fall back to
  940. ## mode '+rb'.
  941. if isinstance(arg1,str):
  942. f = open(arg1,"rb")
  943. self.setInput(f)
  944. self.setFilename(arg1)
  945. return
  946. ## case 3:
  947. ## if arg1 is a file we wrap it by a char buffer (
  948. ## some additional checks?? No, can't do this in
  949. ## general).
  950. if isinstance(arg1,file):
  951. self.setInput(CharBuffer(arg1))
  952. return
  953. ## case 4:
  954. ## if arg1 is of type SharedLexerInputState we use
  955. ## argument as is.
  956. if isinstance(arg1,LexerSharedInputState):
  957. self.inputState = arg1
  958. return
  959. ## case 5:
  960. ## check whether argument type is of type input
  961. ## buffer. If so create a SharedLexerInputState and
  962. ## go ahead.
  963. if isinstance(arg1,InputBuffer):
  964. self.setInput(LexerSharedInputState(arg1))
  965. return
  966. ## case 6:
  967. ## check whether argument type has a method read(int)
  968. ## If so create CharBuffer ...
  969. try:
  970. if arg1.read:
  971. rd = Reader(arg1)
  972. cb = CharBuffer(rd)
  973. ss = LexerSharedInputState(cb)
  974. self.inputState = ss
  975. return
  976. except:
  977. pass
  978. ## case 7:
  979. ## raise wrong argument exception
  980. raise TypeError(argv)
  981. def setTabSize(self,size) :
  982. self.tabsize = size
  983. def getTabSize(self) :
  984. return self.tabsize
  985. def setCaseSensitive(self,t) :
  986. self.caseSensitive = t
  987. def setCommitToPath(self,commit) :
  988. self.commitToPath = commit
  989. def setFilename(self,f) :
  990. self.inputState.filename = f
  991. def setLine(self,line) :
  992. self.inputState.line = line
  993. def setText(self,s) :
  994. self.resetText()
  995. self.text.append(s)
  996. def getCaseSensitive(self) :
  997. return self.caseSensitive
  998. def getCaseSensitiveLiterals(self) :
  999. return self.caseSensitiveLiterals
  1000. def getColumn(self) :
  1001. return self.inputState.column
  1002. def setColumn(self,c) :
  1003. self.inputState.column = c
  1004. def getCommitToPath(self) :
  1005. return self.commitToPath
  1006. def getFilename(self) :
  1007. return self.inputState.filename
  1008. def getInputBuffer(self) :
  1009. return self.inputState.input
  1010. def getInputState(self) :
  1011. return self.inputState
  1012. def setInputState(self,state) :
  1013. assert isinstance(state,LexerSharedInputState)
  1014. self.inputState = state
  1015. def getLine(self) :
  1016. return self.inputState.line
  1017. def getText(self) :
  1018. return str(self.text)
  1019. def getTokenObject(self) :
  1020. return self._returnToken
  1021. def LA(self,i) :
  1022. c = self.inputState.input.LA(i)
  1023. if not self.caseSensitive:
  1024. ### E0006
  1025. c = c.__class__.lower(c)
  1026. return c
  1027. def makeToken(self,type) :
  1028. try:
  1029. ## dynamically load a class
  1030. assert self.tokenClass
  1031. tok = self.tokenClass()
  1032. tok.setType(type)
  1033. tok.setColumn(self.inputState.tokenStartColumn)
  1034. tok.setLine(self.inputState.tokenStartLine)
  1035. return tok
  1036. except:
  1037. self.panic("unable to create new token")
  1038. return Token.badToken
  1039. def mark(self) :
  1040. return self.inputState.input.mark()
  1041. def _match_bitset(self,b) :
  1042. if b.member(self.LA(1)):
  1043. self.consume()
  1044. else:
  1045. raise MismatchedCharException(self.LA(1), b, False, self)
  1046. def _match_string(self,s) :
  1047. for c in s:
  1048. if self.LA(1) == c:
  1049. self.consume()
  1050. else:
  1051. raise MismatchedCharException(self.LA(1), c, False, self)
  1052. def match(self,item):
  1053. if isinstance(item,str) or isinstance(item,unicode):
  1054. return self._match_string(item)
  1055. else:
  1056. return self._match_bitset(item)
  1057. def matchNot(self,c) :
  1058. if self.LA(1) != c:
  1059. self.consume()
  1060. else:
  1061. raise MismatchedCharException(self.LA(1), c, True, self)
  1062. def matchRange(self,c1,c2) :
  1063. if self.LA(1) < c1 or self.LA(1) > c2 :
  1064. raise MismatchedCharException(self.LA(1), c1, c2, False, self)
  1065. else:
  1066. self.consume()
  1067. def newline(self) :
  1068. self.inputState.line += 1
  1069. self.inputState.column = 1
  1070. def tab(self) :
  1071. c = self.getColumn()
  1072. nc = ( ((c-1)/self.tabsize) + 1) * self.tabsize + 1
  1073. self.setColumn(nc)
  1074. def panic(self,s='') :
  1075. print "CharScanner: panic: " + s
  1076. sys.exit(1)
  1077. def reportError(self,ex) :
  1078. print ex
  1079. def reportError(self,s) :
  1080. if not self.getFilename():
  1081. print "error: " + str(s)
  1082. else:
  1083. print self.getFilename() + ": error: " + str(s)
  1084. def reportWarning(self,s) :
  1085. if not self.getFilename():
  1086. print "warning: " + str(s)
  1087. else:
  1088. print self.getFilename() + ": warning: " + str(s)
  1089. def resetText(self) :
  1090. self.text.setLength(0)
  1091. self.inputState.tokenStartColumn = self.inputState.column
  1092. self.inputState.tokenStartLine = self.inputState.line
  1093. def rewind(self,pos) :
  1094. self.inputState.input.rewind(pos)
  1095. def setTokenObjectClass(self,cl):
  1096. self.tokenClass = cl
  1097. def testForLiteral(self,token):
  1098. if not token:
  1099. return
  1100. assert isinstance(token,Token)
  1101. _type = token.getType()
  1102. ## special tokens can't be literals
  1103. if _type in [SKIP,INVALID_TYPE,EOF_TYPE,NULL_TREE_LOOKAHEAD] :
  1104. return
  1105. _text = token.getText()
  1106. if not _text:
  1107. return
  1108. assert isinstance(_text,str) or isinstance(_text,unicode)
  1109. _type = self.testLiteralsTable(_text,_type)
  1110. token.setType(_type)
  1111. return _type
  1112. def testLiteralsTable(self,*args):
  1113. if isinstance(args[0],str) or isinstance(args[0],unicode):
  1114. s = args[0]
  1115. i = args[1]
  1116. else:
  1117. s = self.text.getString()
  1118. i = args[0]
  1119. ## check whether integer has been given
  1120. if not isinstance(i,int):
  1121. assert isinstance(i,int)
  1122. ## check whether we have a dict
  1123. assert isinstance(self.literals,dict)
  1124. try:
  1125. ## E0010
  1126. if not self.caseSensitiveLiterals:
  1127. s = s.__class__.lower(s)
  1128. i = self.literals[s]
  1129. except:
  1130. pass
  1131. return i
  1132. def toLower(self,c):
  1133. return c.__class__.lower()
  1134. def traceIndent(self):
  1135. print ' ' * self.traceDepth
  1136. def traceIn(self,rname):
  1137. self.traceDepth += 1
  1138. self.traceIndent()
  1139. print "> lexer %s c== %s" % (rname,self.LA(1))
  1140. def traceOut(self,rname):
  1141. self.traceIndent()
  1142. print "< lexer %s c== %s" % (rname,self.LA(1))
  1143. self.traceDepth -= 1
  1144. def uponEOF(self):
  1145. pass
  1146. def append(self,c):
  1147. if self.saveConsumedInput :
  1148. self.text.append(c)
  1149. def commit(self):
  1150. self.inputState.input.commit()
  1151. def consume(self):
  1152. if not self.inputState.guessing:
  1153. c = self.LA(1)
  1154. if self.caseSensitive:
  1155. self.append(c)
  1156. else:
  1157. # use input.LA(), not LA(), to get original case
  1158. # CharScanner.LA() would toLower it.
  1159. c = self.inputState.input.LA(1)
  1160. self.append(c)
  1161. if c and c in "\t":
  1162. self.tab()
  1163. else:
  1164. self.inputState.column += 1
  1165. self.inputState.input.consume()
  1166. ## Consume chars until one matches the given char
  1167. def consumeUntil_char(self,c):
  1168. while self.LA(1) != EOF_CHAR and self.LA(1) != c:
  1169. self.consume()
  1170. ## Consume chars until one matches the given set
  1171. def consumeUntil_bitset(self,bitset):
  1172. while self.LA(1) != EOF_CHAR and not self.set.member(self.LA(1)):
  1173. self.consume()
  1174. ### If symbol seen is EOF then generate and set token, otherwise
  1175. ### throw exception.
  1176. def default(self,la1):
  1177. if not la1 :
  1178. self.uponEOF()
  1179. self._returnToken = self.makeToken(EOF_TYPE)
  1180. else:
  1181. self.raise_NoViableAlt(la1)
  1182. def filterdefault(self,la1,*args):
  1183. if not la1:
  1184. self.uponEOF()
  1185. self._returnToken = self.makeToken(EOF_TYPE)
  1186. return
  1187. if not args:
  1188. self.consume()
  1189. raise TryAgain()
  1190. else:
  1191. ### apply filter object
  1192. self.commit();
  1193. try:
  1194. func=args[0]
  1195. args=args[1:]
  1196. apply(func,args)
  1197. except RecognitionException, e:
  1198. ## catastrophic failure
  1199. self.reportError(e);
  1200. self.consume();
  1201. raise TryAgain()
  1202. def raise_NoViableAlt(self,la1=None):
  1203. if not la1: la1 = self.LA(1)
  1204. fname = self.getFilename()
  1205. line = self.getLine()
  1206. col = self.getColumn()
  1207. raise NoViableAltForCharException(la1,fname,line,col)
  1208. def set_return_token(self,_create,_token,_ttype,_offset):
  1209. if _create and not _token and (not _ttype == SKIP):
  1210. string = self.text.getString(_offset)
  1211. _token = self.makeToken(_ttype)
  1212. _token.setText(string)
  1213. self._returnToken = _token
  1214. return _token
  1215. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1216. ### CharScannerIterator ###
  1217. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1218. class CharScannerIterator:
  1219. def __init__(self,inst):
  1220. if isinstance(inst,CharScanner):
  1221. self.inst = inst
  1222. return
  1223. raise TypeError("CharScannerIterator requires CharScanner object")
  1224. def next(self):
  1225. assert self.inst
  1226. item = self.inst.nextToken()
  1227. if not item or item.isEOF():
  1228. raise StopIteration()
  1229. return item
  1230. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1231. ### BitSet ###
  1232. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1233. ### I'm assuming here that a long is 64bits. It appears however, that
  1234. ### a long is of any size. That means we can use a single long as the
  1235. ### bitset (!), ie. Python would do almost all the work (TBD).
  1236. class BitSet(object):
  1237. BITS = 64
  1238. NIBBLE = 4
  1239. LOG_BITS = 6
  1240. MOD_MASK = BITS -1
  1241. def __init__(self,data=None):
  1242. if not data:
  1243. BitSet.__init__(self,[long(0)])
  1244. return
  1245. if isinstance(data,int):
  1246. BitSet.__init__(self,[long(data)])
  1247. return
  1248. if isinstance(data,long):
  1249. BitSet.__init__(self,[data])
  1250. return
  1251. if not isinstance(data,list):
  1252. raise TypeError("BitSet requires integer, long, or " +
  1253. "list argument")
  1254. for x in data:
  1255. if not isinstance(x,long):
  1256. raise TypeError(self,"List argument item is " +
  1257. "not a long: %s" % (x))
  1258. self.data = data
  1259. def __str__(self):
  1260. bits = len(self.data) * BitSet.BITS
  1261. s = ""
  1262. for i in xrange(0,bits):
  1263. if self.at(i):
  1264. s += "1"
  1265. else:
  1266. s += "o"
  1267. if not ((i+1) % 10):
  1268. s += '|%s|' % (i+1)
  1269. return s
  1270. def __repr__(self):
  1271. return str(self)
  1272. def member(self,item):
  1273. if not item:
  1274. return False
  1275. if isinstance(item,int):
  1276. return self.at(item)
  1277. if not (isinstance(item,str) or isinstance(item,unicode)):
  1278. raise TypeError(self,"char or unichar expected: %s" % (item))
  1279. ## char is a (unicode) string with at most lenght 1, ie.
  1280. ## a char.
  1281. if len(item) != 1:
  1282. raise TypeError(self,"char expected: %s" % (item))
  1283. ### handle ASCII/UNICODE char
  1284. num = ord(item)
  1285. ### check whether position num is in bitset
  1286. return self.at(num)
  1287. def wordNumber(self,bit):
  1288. return bit >> BitSet.LOG_BITS
  1289. def bitMask(self,bit):
  1290. pos = bit & BitSet.MOD_MASK ## bit mod BITS
  1291. return (1L << pos)
  1292. def set(self,bit,on=True):
  1293. # grow bitset as required (use with care!)
  1294. i = self.wordNumber(bit)
  1295. mask = self.bitMask(bit)
  1296. if i>=len(self.data):
  1297. d = i - len(self.data) + 1
  1298. for x in xrange(0,d):
  1299. self.data.append(0L)
  1300. assert len(self.data) == i+1
  1301. if on:
  1302. self.data[i] |= mask
  1303. else:
  1304. self.data[i] &= (~mask)
  1305. ### make add an alias for set
  1306. add = set
  1307. def off(self,bit,off=True):
  1308. self.set(bit,not off)
  1309. def at(self,bit):
  1310. i = self.wordNumber(bit)
  1311. v = self.data[i]
  1312. m = self.bitMask(bit)
  1313. return v & m
  1314. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1315. ### some further funcs ###
  1316. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1317. def illegalarg_ex(func):
  1318. raise ValueError(
  1319. "%s is only valid if parser is built for debugging" %
  1320. (func.func_name))
  1321. def runtime_ex(func):
  1322. raise RuntimeException(
  1323. "%s is only valid if parser is built for debugging" %
  1324. (func.func_name))
  1325. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1326. ### TokenBuffer ###
  1327. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1328. class TokenBuffer(object):
  1329. def __init__(self,stream):
  1330. self.input = stream
  1331. self.nMarkers = 0
  1332. self.markerOffset = 0
  1333. self.numToConsume = 0
  1334. self.queue = Queue()
  1335. def reset(self) :
  1336. self.nMarkers = 0
  1337. self.markerOffset = 0
  1338. self.numToConsume = 0
  1339. self.queue.reset()
  1340. def consume(self) :
  1341. self.numToConsume += 1
  1342. def fill(self, amount):
  1343. self.syncConsume()
  1344. while self.queue.length() < (amount + self.markerOffset):
  1345. self.queue.append(self.input.nextToken())
  1346. def getInput(self):
  1347. return self.input
  1348. def LA(self,k) :
  1349. self.fill(k)
  1350. return self.queue.elementAt(self.markerOffset + k - 1).type
  1351. def LT(self,k) :
  1352. self.fill(k)
  1353. return self.queue.elementAt(self.markerOffset + k - 1)
  1354. def mark(self) :
  1355. self.syncConsume()
  1356. self.nMarkers += 1
  1357. return self.markerOffset
  1358. def rewind(self,mark) :
  1359. self.syncConsume()
  1360. self.markerOffset = mark
  1361. self.nMarkers -= 1
  1362. def syncConsume(self) :
  1363. while self.numToConsume > 0:
  1364. if self.nMarkers > 0:
  1365. # guess mode -- leave leading characters and bump offset.
  1366. self.markerOffset += 1
  1367. else:
  1368. # normal mode -- remove first character
  1369. self.queue.removeFirst()
  1370. self.numToConsume -= 1
  1371. def __str__(self):
  1372. return "(%s,%s,%s,%s,%s)" % (
  1373. self.input,
  1374. self.nMarkers,
  1375. self.markerOffset,
  1376. self.numToConsume,
  1377. self.queue)
  1378. def __repr__(self):
  1379. return str(self)
  1380. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1381. ### ParserSharedInputState ###
  1382. ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
  1383. class ParserSharedInputState(object):

Large files files are truncated, but you can click here to view the full file