/Dependencies/boo/lib/antlr-2.7.5/lib/python/antlr/antlr.py
Python | 2800 lines | 2690 code | 60 blank | 50 comment | 44 complexity | 64c94f5ddc5ce79ddd6389eafc8bda70 MD5 | raw file
Possible License(s): GPL-2.0
Large files files are truncated, but you can click here to view the full file
- ## This file is part of PyANTLR. See LICENSE.txt for license
- ## details..........Copyright (C) Wolfgang Haefelinger, 2004.
- ## get sys module
- import sys
- version = sys.version.split()[0]
- if version < '2.2.1':
- False = 0
- if version < '2.3':
- True = not False
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### global symbols ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### ANTLR Standard Tokens
- SKIP = -1
- INVALID_TYPE = 0
- EOF_TYPE = 1
- EOF = 1
- NULL_TREE_LOOKAHEAD = 3
- MIN_USER_TYPE = 4
- ### ANTLR's EOF Symbol
- EOF_CHAR = ''
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### general functions ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- def error(fmt,*args):
- if fmt:
- print "error: ", fmt % tuple(args)
- def ifelse(cond,_then,_else):
- if cond :
- r = _then
- else:
- r = _else
- return r
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### ANTLR Exceptions ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class ANTLRException(Exception):
- def __init__(self, *args):
- Exception.__init__(self, *args)
- class RecognitionException(ANTLRException):
- def __init__(self, *args):
- ANTLRException.__init__(self, *args)
- self.fileName = None
- self.line = -1
- self.column = -1
- if len(args) >= 2:
- self.fileName = args[1]
- if len(args) >= 3:
- self.line = args[2]
- if len(args) >= 4:
- self.column = args[3]
- def __str__(self):
- buf = ['']
- if self.fileName:
- buf.append(self.fileName + ":")
- if self.line != -1:
- if not self.fileName:
- buf.append("line ")
- buf.append(str(self.line))
- if self.column != -1:
- buf.append(":" + str(self.column))
- buf.append(":")
- buf.append(" ")
- return str('').join(buf)
- __repr__ = __str__
- class NoViableAltException(RecognitionException):
- def __init__(self, *args):
- RecognitionException.__init__(self, *args)
- self.token = None
- self.node = None
- if isinstance(args[0],AST):
- self.node = args[0]
- elif isinstance(args[0],Token):
- self.token = args[0]
- else:
- raise TypeError("NoViableAltException requires Token or AST argument")
- def __str__(self):
- if self.token:
- line = self.token.getLine()
- col = self.token.getColumn()
- text = self.token.getText()
- return "unexpected symbol at line %s (column %s): \"%s\"" % (line,col,text)
- if self.node == ASTNULL:
- return "unexpected end of subtree"
- assert self.node
- ### hackish, we assume that an AST contains method getText
- return "unexpected node: %s" % (self.node.getText())
- __repr__ = __str__
- class NoViableAltForCharException(RecognitionException):
- def __init__(self, *args):
- self.foundChar = None
- if len(args) == 2:
- self.foundChar = args[0]
- scanner = args[1]
- RecognitionException.__init__(self, "NoViableAlt",
- scanner.getFilename(),
- scanner.getLine(),
- scanner.getColumn())
- elif len(args) == 4:
- self.foundChar = args[0]
- fileName = args[1]
- line = args[2]
- column = args[3]
- RecognitionException.__init__(self, "NoViableAlt",
- fileName, line, column)
- else:
- RecognitionException.__init__(self, "NoViableAlt",
- '', -1, -1)
- def __str__(self):
- mesg = "unexpected char: "
- if self.foundChar >= ' ' and self.foundChar <= '~':
- mesg += "'" + self.foundChar + "'"
- elif self.foundChar:
- mesg += "0x" + hex(ord(self.foundChar)).upper()[2:]
- else:
- mesg += "<None>"
- return mesg
- __repr__ = __str__
- class SemanticException(RecognitionException):
- def __init__(self, *args):
- RecognitionException.__init__(self, *args)
- class MismatchedCharException(RecognitionException):
- NONE = 0
- CHAR = 1
- NOT_CHAR = 2
- RANGE = 3
- NOT_RANGE = 4
- SET = 5
- NOT_SET = 6
- def __init__(self, *args):
- self.args = args
- if len(args) == 5:
- # Expected range / not range
- if args[3]:
- self.mismatchType = MismatchedCharException.NOT_RANGE
- else:
- self.mismatchType = MismatchedCharException.RANGE
- self.foundChar = args[0]
- self.expecting = args[1]
- self.upper = args[2]
- self.scanner = args[4]
- RecognitionException.__init__(self, "Mismatched char range",
- self.scanner.getFilename(),
- self.scanner.getLine(),
- self.scanner.getColumn())
- elif len(args) == 4 and isinstance(args[1], str):
- # Expected char / not char
- if args[2]:
- self.mismatchType = MismatchedCharException.NOT_CHAR
- else:
- self.mismatchType = MismatchedCharException.CHAR
- self.foundChar = args[0]
- self.expecting = args[1]
- self.scanner = args[3]
- RecognitionException.__init__(self, "Mismatched char",
- self.scanner.getFilename(),
- self.scanner.getLine(),
- self.scanner.getColumn())
- elif len(args) == 4 and isinstance(args[1], BitSet):
- # Expected BitSet / not BitSet
- if args[2]:
- self.mismatchType = MismatchedCharException.NOT_SET
- else:
- self.mismatchType = MismatchedCharException.SET
- self.foundChar = args[0]
- self.set = args[1]
- self.scanner = args[3]
- RecognitionException.__init__(self, "Mismatched char set",
- self.scanner.getFilename(),
- self.scanner.getLine(),
- self.scanner.getColumn())
- else:
- self.mismatchType = MismatchedCharException.NONE
- RecognitionException.__init__(self, "Mismatched char")
- ## Append a char to the msg buffer. If special,
- # then show escaped version
- #
- def appendCharName(self, sb, c):
- if not c or c == 65535:
- # 65535 = (char) -1 = EOF
- sb.append("'<EOF>'")
- elif c == '\n':
- sb.append("'\\n'")
- elif c == '\r':
- sb.append("'\\r'");
- elif c == '\t':
- sb.append("'\\t'")
- else:
- sb.append('\'' + c + '\'')
- ##
- # Returns an error message with line number/column information
- #
- def __str__(self):
- sb = ['']
- sb.append(RecognitionException.__str__(self))
- if self.mismatchType == MismatchedCharException.CHAR:
- sb.append("expecting ")
- self.appendCharName(sb, self.expecting)
- sb.append(", found ")
- self.appendCharName(sb, self.foundChar)
- elif self.mismatchType == MismatchedCharException.NOT_CHAR:
- sb.append("expecting anything but '")
- self.appendCharName(sb, self.expecting)
- sb.append("'; got it anyway")
- elif self.mismatchType in [MismatchedCharException.RANGE, MismatchedCharException.NOT_RANGE]:
- sb.append("expecting char ")
- if self.mismatchType == MismatchedCharException.NOT_RANGE:
- sb.append("NOT ")
- sb.append("in range: ")
- appendCharName(sb, self.expecting)
- sb.append("..")
- appendCharName(sb, self.upper)
- sb.append(", found ")
- appendCharName(sb, self.foundChar)
- elif self.mismatchType in [MismatchedCharException.SET, MismatchedCharException.NOT_SET]:
- sb.append("expecting ")
- if self.mismatchType == MismatchedCharException.NOT_SET:
- sb.append("NOT ")
- sb.append("one of (")
- for i in range(len(self.set)):
- self.appendCharName(sb, self.set[i])
- sb.append("), found ")
- self.appendCharName(sb, self.foundChar)
- return str().join(sb).strip()
- __repr__ = __str__
- class MismatchedTokenException(RecognitionException):
- NONE = 0
- TOKEN = 1
- NOT_TOKEN = 2
- RANGE = 3
- NOT_RANGE = 4
- SET = 5
- NOT_SET = 6
- def __init__(self, *args):
- self.args = args
- self.tokenNames = []
- self.token = None
- self.tokenText = ''
- self.node = None
- if len(args) == 6:
- # Expected range / not range
- if args[3]:
- self.mismatchType = MismatchedTokenException.NOT_RANGE
- else:
- self.mismatchType = MismatchedTokenException.RANGE
- self.tokenNames = args[0]
- self.expecting = args[2]
- self.upper = args[3]
- self.fileName = args[5]
- elif len(args) == 4 and isinstance(args[2], int):
- # Expected token / not token
- if args[3]:
- self.mismatchType = MismatchedTokenException.NOT_TOKEN
- else:
- self.mismatchType = MismatchedTokenException.TOKEN
- self.tokenNames = args[0]
- self.expecting = args[2]
- elif len(args) == 4 and isinstance(args[2], BitSet):
- # Expected BitSet / not BitSet
- if args[3]:
- self.mismatchType = MismatchedTokenException.NOT_SET
- else:
- self.mismatchType = MismatchedTokenException.SET
- self.tokenNames = args[0]
- self.set = args[2]
- else:
- self.mismatchType = MismatchedTokenException.NONE
- RecognitionException.__init__(self, "Mismatched Token: expecting any AST node", "<AST>", -1, -1)
- if len(args) >= 2:
- if isinstance(args[1],Token):
- self.token = args[1]
- self.tokenText = self.token.getText()
- RecognitionException.__init__(self, "Mismatched Token",
- self.fileName,
- self.token.getLine(),
- self.token.getColumn())
- elif isinstance(args[1],AST):
- self.node = args[1]
- self.tokenText = str(self.node)
- RecognitionException.__init__(self, "Mismatched Token",
- "<AST>",
- self.node.getLine(),
- self.node.getColumn())
- else:
- self.tokenText = "<empty tree>"
- RecognitionException.__init__(self, "Mismatched Token",
- "<AST>", -1, -1)
- def appendTokenName(self, sb, tokenType):
- if tokenType == INVALID_TYPE:
- sb.append("<Set of tokens>")
- elif tokenType < 0 or tokenType >= len(self.tokenNames):
- sb.append("<" + str(tokenType) + ">")
- else:
- sb.append(self.tokenNames[tokenType])
- ##
- # Returns an error message with line number/column information
- #
- def __str__(self):
- sb = ['']
- sb.append(RecognitionException.__str__(self))
- if self.mismatchType == MismatchedTokenException.TOKEN:
- sb.append("expecting ")
- self.appendTokenName(sb, self.expecting)
- sb.append(", found " + self.tokenText)
- elif self.mismatchType == MismatchedTokenException.NOT_TOKEN:
- sb.append("expecting anything but '")
- self.appendTokenName(sb, self.expecting)
- sb.append("'; got it anyway")
- elif self.mismatchType in [MismatchedTokenException.RANGE, MismatchedTokenException.NOT_RANGE]:
- sb.append("expecting token ")
- if self.mismatchType == MismatchedTokenException.NOT_RANGE:
- sb.append("NOT ")
- sb.append("in range: ")
- appendTokenName(sb, self.expecting)
- sb.append("..")
- appendTokenName(sb, self.upper)
- sb.append(", found " + self.tokenText)
- elif self.mismatchType in [MismatchedTokenException.SET, MismatchedTokenException.NOT_SET]:
- sb.append("expecting ")
- if self.mismatchType == MismatchedTokenException.NOT_SET:
- sb.append("NOT ")
- sb.append("one of (")
- for i in range(len(self.set)):
- self.appendTokenName(sb, self.set[i])
- sb.append("), found " + self.tokenText)
- return str().join(sb).strip()
- __repr__ = __str__
- class TokenStreamException(ANTLRException):
- def __init__(self, *args):
- ANTLRException.__init__(self, *args)
- # Wraps an Exception in a TokenStreamException
- class TokenStreamIOException(TokenStreamException):
- def __init__(self, *args):
- if args and isinstance(args[0], Exception):
- io = args[0]
- TokenStreamException.__init__(self, str(io))
- self.io = io
- else:
- TokenStreamException.__init__(self, *args)
- self.io = self
- # Wraps a RecognitionException in a TokenStreamException
- class TokenStreamRecognitionException(TokenStreamException):
- def __init__(self, *args):
- if args and isinstance(args[0], RecognitionException):
- recog = args[0]
- TokenStreamException.__init__(self, str(recog))
- self.recog = recog
- else:
- raise TypeError("TokenStreamRecognitionException requires RecognitionException argument")
- def __str__(self):
- return str(self.recog)
- __repr__ = __str__
- class TokenStreamRetryException(TokenStreamException):
- def __init__(self, *args):
- TokenStreamException.__init__(self, *args)
- class CharStreamException(ANTLRException):
- def __init__(self, *args):
- ANTLRException.__init__(self, *args)
- # Wraps an Exception in a CharStreamException
- class CharStreamIOException(CharStreamException):
- def __init__(self, *args):
- if args and isinstance(args[0], Exception):
- io = args[0]
- CharStreamException.__init__(self, str(io))
- self.io = io
- else:
- CharStreamException.__init__(self, *args)
- self.io = self
- class TryAgain(Exception):
- pass
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### Token ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class Token(object):
- SKIP = -1
- INVALID_TYPE = 0
- EOF_TYPE = 1
- EOF = 1
- NULL_TREE_LOOKAHEAD = 3
- MIN_USER_TYPE = 4
- def __init__(self,**argv):
- try:
- self.type = argv['type']
- except:
- self.type = INVALID_TYPE
- try:
- self.text = argv['text']
- except:
- self.text = "<no text>"
- def isEOF(self):
- return (self.type == EOF_TYPE)
- def getColumn(self):
- return 0
- def getLine(self):
- return 0
- def getFilename(self):
- return None
- def setFilename(self,name):
- return self
- def getText(self):
- return "<no text>"
- def setText(self,text):
- if isinstance(text,str):
- pass
- else:
- raise TypeError("Token.setText requires string argument")
- return self
- def setColumn(self,column):
- return self
- def setLine(self,line):
- return self
- def getType(self):
- return self.type
- def setType(self,type):
- if isinstance(type,int):
- self.type = type
- else:
- raise TypeError("Token.setType requires integer argument")
- return self
- def toString(self):
- ## not optimal
- type_ = self.type
- if type_ == 3:
- tval = 'NULL_TREE_LOOKAHEAD'
- elif type_ == 1:
- tval = 'EOF_TYPE'
- elif type_ == 0:
- tval = 'INVALID_TYPE'
- elif type_ == -1:
- tval = 'SKIP'
- else:
- tval = type_
- return '["%s",<%s>]' % (self.getText(),tval)
- __str__ = toString
- __repr__ = toString
- ### static attribute ..
- Token.badToken = Token( type=INVALID_TYPE, text="<no text>")
- if __name__ == "__main__":
- print "testing .."
- T = Token.badToken
- print T
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### CommonToken ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class CommonToken(Token):
- def __init__(self,**argv):
- Token.__init__(self,**argv)
- self.line = 0
- self.col = 0
- try:
- self.line = argv['line']
- except:
- pass
- try:
- self.col = argv['col']
- except:
- pass
- def getLine(self):
- return self.line
- def getText(self):
- return self.text
- def getColumn(self):
- return self.col
- def setLine(self,line):
- self.line = line
- return self
- def setText(self,text):
- self.text = text
- return self
- def setColumn(self,col):
- self.col = col
- return self
- def toString(self):
- ## not optimal
- type_ = self.type
- if type_ == 3:
- tval = 'NULL_TREE_LOOKAHEAD'
- elif type_ == 1:
- tval = 'EOF_TYPE'
- elif type_ == 0:
- tval = 'INVALID_TYPE'
- elif type_ == -1:
- tval = 'SKIP'
- else:
- tval = type_
- d = {
- 'text' : self.text,
- 'type' : tval,
- 'line' : self.line,
- 'colm' : self.col
- }
- fmt = '["%(text)s",<%(type)s>,line=%(line)s,col=%(colm)s]'
- return fmt % d
- __str__ = toString
- __repr__ = toString
- if __name__ == '__main__' :
- T = CommonToken()
- print T
- T = CommonToken(col=15,line=1,text="some text", type=5)
- print T
- T = CommonToken()
- T.setLine(1).setColumn(15).setText("some text").setType(5)
- print T
- print T.getLine()
- print T.getColumn()
- print T.getText()
- print T.getType()
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### CommonHiddenStreamToken ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class CommonHiddenStreamToken(CommonToken):
- def __init__(self,*args):
- CommonToken.__init__(self,*args)
- self.hiddenBefore = None
- self.hiddenAfter = None
- def getHiddenAfter(self):
- return self.hiddenAfter
- def getHiddenBefore(self):
- return self.hiddenBefore
- def setHiddenAfter(self,t):
- self.hiddenAfter = t
- def setHiddenBefore(self, t):
- self.hiddenBefore = t
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### Queue ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ## Shall be a circular buffer on tokens ..
- class Queue(object):
- def __init__(self):
- self.buffer = [] # empty list
- def append(self,item):
- self.buffer.append(item)
- def elementAt(self,index):
- return self.buffer[index]
- def reset(self):
- self.buffer = []
- def removeFirst(self):
- self.buffer.pop(0)
- def length(self):
- return len(self.buffer)
- def __str__(self):
- return str(self.buffer)
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### InputBuffer ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class InputBuffer(object):
- def __init__(self):
- self.nMarkers = 0
- self.markerOffset = 0
- self.numToConsume = 0
- self.queue = Queue()
- def __str__(self):
- return "(%s,%s,%s,%s)" % (
- self.nMarkers,
- self.markerOffset,
- self.numToConsume,
- self.queue)
- def __repr__(self):
- return str(self)
- def commit(self):
- self.nMarkers -= 1
- def consume(self) :
- self.numToConsume += 1
- ## probably better to return a list of items
- ## because of unicode. Or return a unicode
- ## string ..
- def getLAChars(self) :
- i = self.markerOffset
- n = self.queue.length()
- s = ''
- while i<n:
- s += self.queue.elementAt(i)
- return s
- ## probably better to return a list of items
- ## because of unicode chars
- def getMarkedChars(self) :
- s = ''
- i = 0
- n = self.markerOffset
- while i<n:
- s += self.queue.elementAt(i)
- return s
- def isMarked(self) :
- return self.nMarkers != 0
- def fill(self,k):
- ### abstract method
- raise NotImplementedError()
- def LA(self,k) :
- self.fill(k)
- return self.queue.elementAt(self.markerOffset + k - 1)
- def mark(self) :
- self.syncConsume()
- self.nMarkers += 1
- return self.markerOffset
- def rewind(self,mark) :
- self.syncConsume()
- self.markerOffset = mark
- self.nMarkers -= 1
- def reset(self) :
- self.nMarkers = 0
- self.markerOffset = 0
- self.numToConsume = 0
- self.queue.reset()
- def syncConsume(self) :
- while self.numToConsume > 0:
- if self.nMarkers > 0:
- # guess mode -- leave leading characters and bump offset.
- self.markerOffset += 1
- else:
- # normal mode -- remove first character
- self.queue.removeFirst()
- self.numToConsume -= 1
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### CharBuffer ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class CharBuffer(InputBuffer):
- def __init__(self,reader):
- ##assert isinstance(reader,file)
- super(CharBuffer,self).__init__()
- ## a reader is supposed to be anything that has
- ## a method 'read(int)'.
- self.input = reader
- def __str__(self):
- base = super(CharBuffer,self).__str__()
- return "CharBuffer{%s,%s" % (base,str(input))
- def fill(self,amount):
- try:
- self.syncConsume()
- while self.queue.length() < (amount + self.markerOffset) :
- ## retrieve just one char - what happend at end
- ## of input?
- c = self.input.read(1)
- ### python's behaviour is to return the empty string on
- ### EOF, ie. no exception whatsoever is thrown. An empty
- ### python string has the nice feature that it is of
- ### type 'str' and "not ''" would return true. Contrary,
- ### one can't do this: '' in 'abc'. This should return
- ### false, but all we get is then a TypeError as an
- ### empty string is not a character.
- ### Let's assure then that we have either seen a
- ### character or an empty string (EOF).
- assert len(c) == 0 or len(c) == 1
- ### And it shall be of type string (ASCII or UNICODE).
- assert isinstance(c,str) or isinstance(c,unicode)
- ### Just append EOF char to buffer. Note that buffer may
- ### contain then just more than one EOF char ..
- ### use unicode chars instead of ASCII ..
- self.queue.append(c)
- except Exception,e:
- raise CharStreamIOException(e)
- ##except: # (mk) Cannot happen ...
- ##error ("unexpected exception caught ..")
- ##assert 0
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### LexerSharedInputState ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class LexerSharedInputState(object):
- def __init__(self,ibuf):
- assert isinstance(ibuf,InputBuffer)
- self.input = ibuf
- self.column = 1
- self.line = 1
- self.tokenStartColumn = 1
- self.tokenStartLine = 1
- self.guessing = 0
- self.filename = None
- def reset(self):
- self.column = 1
- self.line = 1
- self.tokenStartColumn = 1
- self.tokenStartLine = 1
- self.guessing = 0
- self.filename = None
- self.input.reset()
- def LA(self,k):
- return self.input.LA(k)
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### TokenStream ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class TokenStream(object):
- def nextToken(self):
- pass
- def __iter__(self):
- return TokenStreamIterator(self)
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### TokenStreamIterator ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class TokenStreamIterator(object):
- def __init__(self,inst):
- if isinstance(inst,TokenStream):
- self.inst = inst
- return
- raise TypeError("TokenStreamIterator requires TokenStream object")
- def next(self):
- assert self.inst
- item = self.inst.nextToken()
- if not item or item.isEOF():
- raise StopIteration()
- return item
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### TokenStreamSelector ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class TokenStreamSelector(TokenStream):
- def __init__(self):
- self._input = None
- self._stmap = {}
- self._stack = []
- def addInputStream(self,stream,key):
- self._stmap[key] = stream
- def getCurrentStream(self):
- return self._input
- def getStream(self,sname):
- try:
- stream = self._stmap[sname]
- except:
- raise ValueError("TokenStream " + sname + " not found");
- return stream;
- def nextToken(self):
- while 1:
- try:
- return self._input.nextToken()
- except TokenStreamRetryException,r:
- ### just retry "forever"
- pass
- def pop(self):
- stream = self._stack.pop();
- self.select(stream);
- return stream;
- def push(self,arg):
- self._stack.append(self._input);
- self.select(arg)
- def retry(self):
- raise TokenStreamRetryException()
- def select(self,arg):
- if isinstance(arg,TokenStream):
- self._input = arg
- return
- if isinstance(arg,str):
- self._input = self.getStream(arg)
- return
- raise TypeError("TokenStreamSelector.select requires " +
- "TokenStream or string argument")
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### TokenStreamBasicFilter ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class TokenStreamBasicFilter(TokenStream):
- def __init__(self,input):
- self.input = input;
- self.discardMask = BitSet()
- def discard(self,arg):
- if isinstance(arg,int):
- self.discardMask.add(arg)
- return
- if isinstance(arg,BitSet):
- self.discardMark = arg
- return
- raise TypeError("TokenStreamBasicFilter.discard requires" +
- "integer or BitSet argument")
- def nextToken(self):
- tok = self.input.nextToken()
- while tok and self.discardMask.member(tok.getType()):
- tok = self.input.nextToken()
- return tok
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### TokenStreamHiddenTokenFilter ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class TokenStreamHiddenTokenFilter(TokenStreamBasicFilter):
- def __init__(self,input):
- TokenStreamBasicFilter.__init__(self,input)
- self.hideMask = BitSet()
- self.nextMonitoredToken = None
- self.lastHiddenToken = None
- self.firstHidden = None
- def consume(self):
- self.nextMonitoredToken = self.input.nextToken()
- def consumeFirst(self):
- self.consume()
- p = None;
- while self.hideMask.member(self.LA(1).getType()) or \
- self.discardMask.member(self.LA(1).getType()):
- if self.hideMask.member(self.LA(1).getType()):
- if not p:
- p = self.LA(1)
- else:
- p.setHiddenAfter(self.LA(1))
- self.LA(1).setHiddenBefore(p)
- p = self.LA(1)
- self.lastHiddenToken = p
- if not self.firstHidden:
- self.firstHidden = p
- self.consume()
- def getDiscardMask(self):
- return self.discardMask
- def getHiddenAfter(self,t):
- return t.getHiddenAfter()
- def getHiddenBefore(self,t):
- return t.getHiddenBefore()
- def getHideMask(self):
- return self.hideMask
- def getInitialHiddenToken(self):
- return self.firstHidden
- def hide(self,m):
- if isinstance(m,int):
- self.hideMask.add(m)
- return
- if isinstance(m.BitMask):
- self.hideMask = m
- return
- def LA(self,i):
- return self.nextMonitoredToken
- def nextToken(self):
- if not self.LA(1):
- self.consumeFirst()
- monitored = self.LA(1)
- monitored.setHiddenBefore(self.lastHiddenToken)
- self.lastHiddenToken = None
- self.consume()
- p = monitored
- while self.hideMask.member(self.LA(1).getType()) or \
- self.discardMask.member(self.LA(1).getType()):
- if self.hideMask.member(self.LA(1).getType()):
- p.setHiddenAfter(self.LA(1))
- if p != monitored:
- self.LA(1).setHiddenBefore(p)
- p = self.lastHiddenToken = self.LA(1)
- self.consume()
- return monitored
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### StringBuffer ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class StringBuffer:
- def __init__(self,string=None):
- if string:
- self.text = list(string)
- else:
- self.text = []
- def setLength(self,sz):
- if not sz :
- self.text = []
- return
- assert sz>0
- if sz >= self.length():
- return
- ### just reset to empty buffer
- self.text = self.text[0:sz]
- def length(self):
- return len(self.text)
- def append(self,c):
- self.text.append(c)
- ### return buffer as string. Arg 'a' is used as index
- ## into the buffer and 2nd argument shall be the length.
- ## If 2nd args is absent, we return chars till end of
- ## buffer starting with 'a'.
- def getString(self,a=None,length=None):
- if not a :
- a = 0
- assert a>=0
- if a>= len(self.text) :
- return ""
- if not length:
- ## no second argument
- L = self.text[a:]
- else:
- assert (a+length) <= len(self.text)
- b = a + length
- L = self.text[a:b]
- s = ""
- for x in L : s += x
- return s
- toString = getString ## alias
- def __str__(self):
- return str(self.text)
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### Reader ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ## When reading Japanese chars, it happens that a stream returns a
- ## 'char' of length 2. This looks like a bug in the appropriate
- ## codecs - but I'm rather unsure about this. Anyway, if this is
- ## the case, I'm going to split this string into a list of chars
- ## and put them on hold, ie. on a buffer. Next time when called
- ## we read from buffer until buffer is empty.
- ## wh: nov, 25th -> problem does not appear in Python 2.4.0.c1.
- class Reader(object):
- def __init__(self,stream):
- self.cin = stream
- self.buf = []
- def read(self,num):
- assert num==1
- if len(self.buf):
- return self.buf.pop()
- ## Read a char - this may return a string.
- ## Is this a bug in codecs/Python?
- c = self.cin.read(1)
- if not c or len(c)==1:
- return c
- L = list(c)
- L.reverse()
- for x in L:
- self.buf.append(x)
- ## read one char ..
- return self.read(1)
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### CharScanner ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class CharScanner(TokenStream):
- ## class members
- NO_CHAR = 0
- EOF_CHAR = '' ### EOF shall be the empty string.
- def __init__(self, *argv, **kwargs):
- super(CharScanner, self).__init__()
- self.saveConsumedInput = True
- self.tokenClass = None
- self.caseSensitive = True
- self.caseSensitiveLiterals = True
- self.literals = None
- self.tabsize = 8
- self._returnToken = None
- self.commitToPath = False
- self.traceDepth = 0
- self.text = StringBuffer()
- self.hashString = hash(self)
- self.setTokenObjectClass(CommonToken)
- self.setInput(*argv)
- def __iter__(self):
- return CharScannerIterator(self)
- def setInput(self,*argv):
- ## case 1:
- ## if there's no arg we default to read from
- ## standard input
- if not argv:
- import sys
- self.setInput(sys.stdin)
- return
- ## get 1st argument
- arg1 = argv[0]
- ## case 2:
- ## if arg1 is a string, we assume it's a file name
- ## and open a stream using 2nd argument as open
- ## mode. If there's no 2nd argument we fall back to
- ## mode '+rb'.
- if isinstance(arg1,str):
- f = open(arg1,"rb")
- self.setInput(f)
- self.setFilename(arg1)
- return
- ## case 3:
- ## if arg1 is a file we wrap it by a char buffer (
- ## some additional checks?? No, can't do this in
- ## general).
- if isinstance(arg1,file):
- self.setInput(CharBuffer(arg1))
- return
- ## case 4:
- ## if arg1 is of type SharedLexerInputState we use
- ## argument as is.
- if isinstance(arg1,LexerSharedInputState):
- self.inputState = arg1
- return
- ## case 5:
- ## check whether argument type is of type input
- ## buffer. If so create a SharedLexerInputState and
- ## go ahead.
- if isinstance(arg1,InputBuffer):
- self.setInput(LexerSharedInputState(arg1))
- return
- ## case 6:
- ## check whether argument type has a method read(int)
- ## If so create CharBuffer ...
- try:
- if arg1.read:
- rd = Reader(arg1)
- cb = CharBuffer(rd)
- ss = LexerSharedInputState(cb)
- self.inputState = ss
- return
- except:
- pass
- ## case 7:
- ## raise wrong argument exception
- raise TypeError(argv)
- def setTabSize(self,size) :
- self.tabsize = size
- def getTabSize(self) :
- return self.tabsize
- def setCaseSensitive(self,t) :
- self.caseSensitive = t
- def setCommitToPath(self,commit) :
- self.commitToPath = commit
- def setFilename(self,f) :
- self.inputState.filename = f
- def setLine(self,line) :
- self.inputState.line = line
- def setText(self,s) :
- self.resetText()
- self.text.append(s)
- def getCaseSensitive(self) :
- return self.caseSensitive
- def getCaseSensitiveLiterals(self) :
- return self.caseSensitiveLiterals
- def getColumn(self) :
- return self.inputState.column
- def setColumn(self,c) :
- self.inputState.column = c
- def getCommitToPath(self) :
- return self.commitToPath
- def getFilename(self) :
- return self.inputState.filename
- def getInputBuffer(self) :
- return self.inputState.input
- def getInputState(self) :
- return self.inputState
- def setInputState(self,state) :
- assert isinstance(state,LexerSharedInputState)
- self.inputState = state
- def getLine(self) :
- return self.inputState.line
- def getText(self) :
- return str(self.text)
- def getTokenObject(self) :
- return self._returnToken
- def LA(self,i) :
- c = self.inputState.input.LA(i)
- if not self.caseSensitive:
- ### E0006
- c = c.__class__.lower(c)
- return c
- def makeToken(self,type) :
- try:
- ## dynamically load a class
- assert self.tokenClass
- tok = self.tokenClass()
- tok.setType(type)
- tok.setColumn(self.inputState.tokenStartColumn)
- tok.setLine(self.inputState.tokenStartLine)
- return tok
- except:
- self.panic("unable to create new token")
- return Token.badToken
- def mark(self) :
- return self.inputState.input.mark()
- def _match_bitset(self,b) :
- if b.member(self.LA(1)):
- self.consume()
- else:
- raise MismatchedCharException(self.LA(1), b, False, self)
- def _match_string(self,s) :
- for c in s:
- if self.LA(1) == c:
- self.consume()
- else:
- raise MismatchedCharException(self.LA(1), c, False, self)
- def match(self,item):
- if isinstance(item,str) or isinstance(item,unicode):
- return self._match_string(item)
- else:
- return self._match_bitset(item)
- def matchNot(self,c) :
- if self.LA(1) != c:
- self.consume()
- else:
- raise MismatchedCharException(self.LA(1), c, True, self)
- def matchRange(self,c1,c2) :
- if self.LA(1) < c1 or self.LA(1) > c2 :
- raise MismatchedCharException(self.LA(1), c1, c2, False, self)
- else:
- self.consume()
- def newline(self) :
- self.inputState.line += 1
- self.inputState.column = 1
- def tab(self) :
- c = self.getColumn()
- nc = ( ((c-1)/self.tabsize) + 1) * self.tabsize + 1
- self.setColumn(nc)
- def panic(self,s='') :
- print "CharScanner: panic: " + s
- sys.exit(1)
- def reportError(self,ex) :
- print ex
- def reportError(self,s) :
- if not self.getFilename():
- print "error: " + str(s)
- else:
- print self.getFilename() + ": error: " + str(s)
- def reportWarning(self,s) :
- if not self.getFilename():
- print "warning: " + str(s)
- else:
- print self.getFilename() + ": warning: " + str(s)
- def resetText(self) :
- self.text.setLength(0)
- self.inputState.tokenStartColumn = self.inputState.column
- self.inputState.tokenStartLine = self.inputState.line
- def rewind(self,pos) :
- self.inputState.input.rewind(pos)
- def setTokenObjectClass(self,cl):
- self.tokenClass = cl
- def testForLiteral(self,token):
- if not token:
- return
- assert isinstance(token,Token)
- _type = token.getType()
- ## special tokens can't be literals
- if _type in [SKIP,INVALID_TYPE,EOF_TYPE,NULL_TREE_LOOKAHEAD] :
- return
- _text = token.getText()
- if not _text:
- return
- assert isinstance(_text,str) or isinstance(_text,unicode)
- _type = self.testLiteralsTable(_text,_type)
- token.setType(_type)
- return _type
- def testLiteralsTable(self,*args):
- if isinstance(args[0],str) or isinstance(args[0],unicode):
- s = args[0]
- i = args[1]
- else:
- s = self.text.getString()
- i = args[0]
- ## check whether integer has been given
- if not isinstance(i,int):
- assert isinstance(i,int)
- ## check whether we have a dict
- assert isinstance(self.literals,dict)
- try:
- ## E0010
- if not self.caseSensitiveLiterals:
- s = s.__class__.lower(s)
- i = self.literals[s]
- except:
- pass
- return i
- def toLower(self,c):
- return c.__class__.lower()
- def traceIndent(self):
- print ' ' * self.traceDepth
- def traceIn(self,rname):
- self.traceDepth += 1
- self.traceIndent()
- print "> lexer %s c== %s" % (rname,self.LA(1))
- def traceOut(self,rname):
- self.traceIndent()
- print "< lexer %s c== %s" % (rname,self.LA(1))
- self.traceDepth -= 1
- def uponEOF(self):
- pass
- def append(self,c):
- if self.saveConsumedInput :
- self.text.append(c)
- def commit(self):
- self.inputState.input.commit()
- def consume(self):
- if not self.inputState.guessing:
- c = self.LA(1)
- if self.caseSensitive:
- self.append(c)
- else:
- # use input.LA(), not LA(), to get original case
- # CharScanner.LA() would toLower it.
- c = self.inputState.input.LA(1)
- self.append(c)
- if c and c in "\t":
- self.tab()
- else:
- self.inputState.column += 1
- self.inputState.input.consume()
- ## Consume chars until one matches the given char
- def consumeUntil_char(self,c):
- while self.LA(1) != EOF_CHAR and self.LA(1) != c:
- self.consume()
- ## Consume chars until one matches the given set
- def consumeUntil_bitset(self,bitset):
- while self.LA(1) != EOF_CHAR and not self.set.member(self.LA(1)):
- self.consume()
- ### If symbol seen is EOF then generate and set token, otherwise
- ### throw exception.
- def default(self,la1):
- if not la1 :
- self.uponEOF()
- self._returnToken = self.makeToken(EOF_TYPE)
- else:
- self.raise_NoViableAlt(la1)
- def filterdefault(self,la1,*args):
- if not la1:
- self.uponEOF()
- self._returnToken = self.makeToken(EOF_TYPE)
- return
- if not args:
- self.consume()
- raise TryAgain()
- else:
- ### apply filter object
- self.commit();
- try:
- func=args[0]
- args=args[1:]
- apply(func,args)
- except RecognitionException, e:
- ## catastrophic failure
- self.reportError(e);
- self.consume();
- raise TryAgain()
- def raise_NoViableAlt(self,la1=None):
- if not la1: la1 = self.LA(1)
- fname = self.getFilename()
- line = self.getLine()
- col = self.getColumn()
- raise NoViableAltForCharException(la1,fname,line,col)
- def set_return_token(self,_create,_token,_ttype,_offset):
- if _create and not _token and (not _ttype == SKIP):
- string = self.text.getString(_offset)
- _token = self.makeToken(_ttype)
- _token.setText(string)
- self._returnToken = _token
- return _token
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### CharScannerIterator ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class CharScannerIterator:
- def __init__(self,inst):
- if isinstance(inst,CharScanner):
- self.inst = inst
- return
- raise TypeError("CharScannerIterator requires CharScanner object")
- def next(self):
- assert self.inst
- item = self.inst.nextToken()
- if not item or item.isEOF():
- raise StopIteration()
- return item
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### BitSet ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### I'm assuming here that a long is 64bits. It appears however, that
- ### a long is of any size. That means we can use a single long as the
- ### bitset (!), ie. Python would do almost all the work (TBD).
- class BitSet(object):
- BITS = 64
- NIBBLE = 4
- LOG_BITS = 6
- MOD_MASK = BITS -1
- def __init__(self,data=None):
- if not data:
- BitSet.__init__(self,[long(0)])
- return
- if isinstance(data,int):
- BitSet.__init__(self,[long(data)])
- return
- if isinstance(data,long):
- BitSet.__init__(self,[data])
- return
- if not isinstance(data,list):
- raise TypeError("BitSet requires integer, long, or " +
- "list argument")
- for x in data:
- if not isinstance(x,long):
- raise TypeError(self,"List argument item is " +
- "not a long: %s" % (x))
- self.data = data
- def __str__(self):
- bits = len(self.data) * BitSet.BITS
- s = ""
- for i in xrange(0,bits):
- if self.at(i):
- s += "1"
- else:
- s += "o"
- if not ((i+1) % 10):
- s += '|%s|' % (i+1)
- return s
- def __repr__(self):
- return str(self)
- def member(self,item):
- if not item:
- return False
- if isinstance(item,int):
- return self.at(item)
- if not (isinstance(item,str) or isinstance(item,unicode)):
- raise TypeError(self,"char or unichar expected: %s" % (item))
- ## char is a (unicode) string with at most lenght 1, ie.
- ## a char.
- if len(item) != 1:
- raise TypeError(self,"char expected: %s" % (item))
- ### handle ASCII/UNICODE char
- num = ord(item)
- ### check whether position num is in bitset
- return self.at(num)
- def wordNumber(self,bit):
- return bit >> BitSet.LOG_BITS
- def bitMask(self,bit):
- pos = bit & BitSet.MOD_MASK ## bit mod BITS
- return (1L << pos)
- def set(self,bit,on=True):
- # grow bitset as required (use with care!)
- i = self.wordNumber(bit)
- mask = self.bitMask(bit)
- if i>=len(self.data):
- d = i - len(self.data) + 1
- for x in xrange(0,d):
- self.data.append(0L)
- assert len(self.data) == i+1
- if on:
- self.data[i] |= mask
- else:
- self.data[i] &= (~mask)
- ### make add an alias for set
- add = set
- def off(self,bit,off=True):
- self.set(bit,not off)
- def at(self,bit):
- i = self.wordNumber(bit)
- v = self.data[i]
- m = self.bitMask(bit)
- return v & m
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### some further funcs ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- def illegalarg_ex(func):
- raise ValueError(
- "%s is only valid if parser is built for debugging" %
- (func.func_name))
- def runtime_ex(func):
- raise RuntimeException(
- "%s is only valid if parser is built for debugging" %
- (func.func_name))
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### TokenBuffer ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class TokenBuffer(object):
- def __init__(self,stream):
- self.input = stream
- self.nMarkers = 0
- self.markerOffset = 0
- self.numToConsume = 0
- self.queue = Queue()
- def reset(self) :
- self.nMarkers = 0
- self.markerOffset = 0
- self.numToConsume = 0
- self.queue.reset()
- def consume(self) :
- self.numToConsume += 1
- def fill(self, amount):
- self.syncConsume()
- while self.queue.length() < (amount + self.markerOffset):
- self.queue.append(self.input.nextToken())
- def getInput(self):
- return self.input
- def LA(self,k) :
- self.fill(k)
- return self.queue.elementAt(self.markerOffset + k - 1).type
- def LT(self,k) :
- self.fill(k)
- return self.queue.elementAt(self.markerOffset + k - 1)
- def mark(self) :
- self.syncConsume()
- self.nMarkers += 1
- return self.markerOffset
- def rewind(self,mark) :
- self.syncConsume()
- self.markerOffset = mark
- self.nMarkers -= 1
- def syncConsume(self) :
- while self.numToConsume > 0:
- if self.nMarkers > 0:
- # guess mode -- leave leading characters and bump offset.
- self.markerOffset += 1
- else:
- # normal mode -- remove first character
- self.queue.removeFirst()
- self.numToConsume -= 1
- def __str__(self):
- return "(%s,%s,%s,%s,%s)" % (
- self.input,
- self.nMarkers,
- self.markerOffset,
- self.numToConsume,
- self.queue)
- def __repr__(self):
- return str(self)
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- ### ParserSharedInputState ###
- ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
- class ParserSharedInputState(object):
- …
Large files files are truncated, but you can click here to view the full file