/ext/ply/ply/yacc.py
Python | 3276 lines | 2373 code | 289 blank | 614 comment | 324 complexity | 9154ae6bd469cb728532dc7bcca1d1a0 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- # -----------------------------------------------------------------------------
- # ply: yacc.py
- #
- # Copyright (C) 2001-2009,
- # David M. Beazley (Dabeaz LLC)
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are
- # met:
- #
- # * Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- # * Redistributions in binary form must reproduce the above copyright notice,
- # this list of conditions and the following disclaimer in the documentation
- # and/or other materials provided with the distribution.
- # * Neither the name of the David Beazley or Dabeaz LLC may be used to
- # endorse or promote products derived from this software without
- # specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- # -----------------------------------------------------------------------------
- #
- # This implements an LR parser that is constructed from grammar rules defined
- # as Python functions. The grammer is specified by supplying the BNF inside
- # Python documentation strings. The inspiration for this technique was borrowed
- # from John Aycock's Spark parsing system. PLY might be viewed as cross between
- # Spark and the GNU bison utility.
- #
- # The current implementation is only somewhat object-oriented. The
- # LR parser itself is defined in terms of an object (which allows multiple
- # parsers to co-exist). However, most of the variables used during table
- # construction are defined in terms of global variables. Users shouldn't
- # notice unless they are trying to define multiple parsers at the same
- # time using threads (in which case they should have their head examined).
- #
- # This implementation supports both SLR and LALR(1) parsing. LALR(1)
- # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
- # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
- # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
- # by the more efficient DeRemer and Pennello algorithm.
- #
- # :::::::: WARNING :::::::
- #
- # Construction of LR parsing tables is fairly complicated and expensive.
- # To make this module run fast, a *LOT* of work has been put into
- # optimization---often at the expensive of readability and what might
- # consider to be good Python "coding style." Modify the code at your
- # own risk!
- # ----------------------------------------------------------------------------
- __version__ = "3.2"
- __tabversion__ = "3.2" # Table version
- #-----------------------------------------------------------------------------
- # === User configurable parameters ===
- #
- # Change these to modify the default behavior of yacc (if you wish)
- #-----------------------------------------------------------------------------
- yaccdebug = 1 # Debugging mode. If set, yacc generates a
- # a 'parser.out' file in the current directory
- debug_file = 'parser.out' # Default name of the debugging file
- tab_module = 'parsetab' # Default name of the table module
- default_lr = 'LALR' # Default LR table generation method
- error_count = 3 # Number of symbols that must be shifted to leave recovery mode
- yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
- # implementations of certain functions.
- resultlimit = 40 # Size limit of results when running in debug mode.
- pickle_protocol = 0 # Protocol to use when writing pickle files
- import re, types, sys, os.path
- # Compatibility function for python 2.6/3.0
- if sys.version_info[0] < 3:
- def func_code(f):
- return f.func_code
- else:
- def func_code(f):
- return f.__code__
- # Compatibility
- try:
- MAXINT = sys.maxint
- except AttributeError:
- MAXINT = sys.maxsize
- # Python 2.x/3.0 compatibility.
- def load_ply_lex():
- if sys.version_info[0] < 3:
- import lex
- else:
- import ply.lex as lex
- return lex
- # This object is a stand-in for a logging object created by the
- # logging module. PLY will use this by default to create things
- # such as the parser.out file. If a user wants more detailed
- # information, they can create their own logging object and pass
- # it into PLY.
- class PlyLogger(object):
- def __init__(self,f):
- self.f = f
- def debug(self,msg,*args,**kwargs):
- self.f.write((msg % args) + "\n")
- info = debug
- def warning(self,msg,*args,**kwargs):
- self.f.write("WARNING: "+ (msg % args) + "\n")
- def error(self,msg,*args,**kwargs):
- self.f.write("ERROR: " + (msg % args) + "\n")
- critical = debug
- # Null logger is used when no output is generated. Does nothing.
- class NullLogger(object):
- def __getattribute__(self,name):
- return self
- def __call__(self,*args,**kwargs):
- return self
-
- # Exception raised for yacc-related errors
- class YaccError(Exception): pass
- # Format the result message that the parser produces when running in debug mode.
- def format_result(r):
- repr_str = repr(r)
- if '\n' in repr_str: repr_str = repr(repr_str)
- if len(repr_str) > resultlimit:
- repr_str = repr_str[:resultlimit]+" ..."
- result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
- return result
- # Format stack entries when the parser is running in debug mode
- def format_stack_entry(r):
- repr_str = repr(r)
- if '\n' in repr_str: repr_str = repr(repr_str)
- if len(repr_str) < 16:
- return repr_str
- else:
- return "<%s @ 0x%x>" % (type(r).__name__,id(r))
- #-----------------------------------------------------------------------------
- # === LR Parsing Engine ===
- #
- # The following classes are used for the LR parser itself. These are not
- # used during table construction and are independent of the actual LR
- # table generation algorithm
- #-----------------------------------------------------------------------------
- # This class is used to hold non-terminal grammar symbols during parsing.
- # It normally has the following attributes set:
- # .type = Grammar symbol type
- # .value = Symbol value
- # .lineno = Starting line number
- # .endlineno = Ending line number (optional, set automatically)
- # .lexpos = Starting lex position
- # .endlexpos = Ending lex position (optional, set automatically)
- class YaccSymbol:
- def __str__(self): return self.type
- def __repr__(self): return str(self)
- # This class is a wrapper around the objects actually passed to each
- # grammar rule. Index lookup and assignment actually assign the
- # .value attribute of the underlying YaccSymbol object.
- # The lineno() method returns the line number of a given
- # item (or 0 if not defined). The linespan() method returns
- # a tuple of (startline,endline) representing the range of lines
- # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
- # representing the range of positional information for a symbol.
- class YaccProduction:
- def __init__(self,s,stack=None):
- self.slice = s
- self.stack = stack
- self.lexer = None
- self.parser= None
- def __getitem__(self,n):
- if n >= 0: return self.slice[n].value
- else: return self.stack[n].value
- def __setitem__(self,n,v):
- self.slice[n].value = v
- def __getslice__(self,i,j):
- return [s.value for s in self.slice[i:j]]
- def __len__(self):
- return len(self.slice)
- def lineno(self,n):
- return getattr(self.slice[n],"lineno",0)
- def set_lineno(self,n,lineno):
- self.slice[n].lineno = n
- def linespan(self,n):
- startline = getattr(self.slice[n],"lineno",0)
- endline = getattr(self.slice[n],"endlineno",startline)
- return startline,endline
- def lexpos(self,n):
- return getattr(self.slice[n],"lexpos",0)
- def lexspan(self,n):
- startpos = getattr(self.slice[n],"lexpos",0)
- endpos = getattr(self.slice[n],"endlexpos",startpos)
- return startpos,endpos
- def error(self):
- raise SyntaxError
- # -----------------------------------------------------------------------------
- # == LRParser ==
- #
- # The LR Parsing engine.
- # -----------------------------------------------------------------------------
- class LRParser:
- def __init__(self,lrtab,errorf):
- self.productions = lrtab.lr_productions
- self.action = lrtab.lr_action
- self.goto = lrtab.lr_goto
- self.errorfunc = errorf
- def errok(self):
- self.errorok = 1
- def restart(self):
- del self.statestack[:]
- del self.symstack[:]
- sym = YaccSymbol()
- sym.type = '$end'
- self.symstack.append(sym)
- self.statestack.append(0)
- def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
- if debug or yaccdevel:
- if isinstance(debug,int):
- debug = PlyLogger(sys.stderr)
- return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
- elif tracking:
- return self.parseopt(input,lexer,debug,tracking,tokenfunc)
- else:
- return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
-
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # parsedebug().
- #
- # This is the debugging enabled version of parse(). All changes made to the
- # parsing engine should be made here. For the non-debugging version,
- # copy this code to a method parseopt() and delete all of the sections
- # enclosed in:
- #
- # #--! DEBUG
- # statements
- # #--! DEBUG
- #
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
- lookahead = None # Current lookahead symbol
- lookaheadstack = [ ] # Stack of lookahead symbols
- actions = self.action # Local reference to action table (to avoid lookup on self.)
- goto = self.goto # Local reference to goto table (to avoid lookup on self.)
- prod = self.productions # Local reference to production list (to avoid lookup on self.)
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
- # --! DEBUG
- debug.info("PLY: PARSE DEBUG START")
- # --! DEBUG
- # If no lexer was given, we will try to use the lex module
- if not lexer:
- lex = load_ply_lex()
- lexer = lex.lexer
- # Set up the lexer and parser objects on pslice
- pslice.lexer = lexer
- pslice.parser = self
- # If input was supplied, pass to lexer
- if input is not None:
- lexer.input(input)
- if tokenfunc is None:
- # Tokenize function
- get_token = lexer.token
- else:
- get_token = tokenfunc
- # Set up the state and symbol stacks
- statestack = [ ] # Stack of parsing states
- self.statestack = statestack
- symstack = [ ] # Stack of grammar symbols
- self.symstack = symstack
- pslice.stack = symstack # Put in the production
- errtoken = None # Err token
- # The start state is assumed to be (0,$end)
- statestack.append(0)
- sym = YaccSymbol()
- sym.type = "$end"
- symstack.append(sym)
- state = 0
- while 1:
- # Get the next symbol on the input. If a lookahead symbol
- # is already set, we just use that. Otherwise, we'll pull
- # the next token off of the lookaheadstack or from the lexer
- # --! DEBUG
- debug.debug('')
- debug.debug('State : %s', state)
- # --! DEBUG
- if not lookahead:
- if not lookaheadstack:
- lookahead = get_token() # Get the next token
- else:
- lookahead = lookaheadstack.pop()
- if not lookahead:
- lookahead = YaccSymbol()
- lookahead.type = "$end"
- # --! DEBUG
- debug.debug('Stack : %s',
- ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
- # --! DEBUG
- # Check the action table
- ltype = lookahead.type
- t = actions[state].get(ltype)
- if t is not None:
- if t > 0:
- # shift a symbol on the stack
- statestack.append(t)
- state = t
-
- # --! DEBUG
- debug.debug("Action : Shift and goto state %s", t)
- # --! DEBUG
- symstack.append(lookahead)
- lookahead = None
- # Decrease error count on successful shift
- if errorcount: errorcount -=1
- continue
- if t < 0:
- # reduce a symbol on the stack, emit a production
- p = prod[-t]
- pname = p.name
- plen = p.len
- # Get production function
- sym = YaccSymbol()
- sym.type = pname # Production name
- sym.value = None
- # --! DEBUG
- if plen:
- debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
- else:
- debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
-
- # --! DEBUG
- if plen:
- targ = symstack[-plen-1:]
- targ[0] = sym
- # --! TRACKING
- if tracking:
- t1 = targ[1]
- sym.lineno = t1.lineno
- sym.lexpos = t1.lexpos
- t1 = targ[-1]
- sym.endlineno = getattr(t1,"endlineno",t1.lineno)
- sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
- # --! TRACKING
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # The code enclosed in this section is duplicated
- # below as a performance optimization. Make sure
- # changes get made in both locations.
- pslice.slice = targ
-
- try:
- # Call the grammar rule with our special slice object
- del symstack[-plen:]
- del statestack[-plen:]
- p.callable(pslice)
- # --! DEBUG
- debug.info("Result : %s", format_result(pslice[0]))
- # --! DEBUG
- symstack.append(sym)
- state = goto[statestack[-1]][pname]
- statestack.append(state)
- except SyntaxError:
- # If an error was set. Enter error recovery state
- lookaheadstack.append(lookahead)
- symstack.pop()
- statestack.pop()
- state = statestack[-1]
- sym.type = 'error'
- lookahead = sym
- errorcount = error_count
- self.errorok = 0
- continue
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- else:
- # --! TRACKING
- if tracking:
- sym.lineno = lexer.lineno
- sym.lexpos = lexer.lexpos
- # --! TRACKING
- targ = [ sym ]
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # The code enclosed in this section is duplicated
- # above as a performance optimization. Make sure
- # changes get made in both locations.
- pslice.slice = targ
- try:
- # Call the grammar rule with our special slice object
- p.callable(pslice)
- # --! DEBUG
- debug.info("Result : %s", format_result(pslice[0]))
- # --! DEBUG
- symstack.append(sym)
- state = goto[statestack[-1]][pname]
- statestack.append(state)
- except SyntaxError:
- # If an error was set. Enter error recovery state
- lookaheadstack.append(lookahead)
- symstack.pop()
- statestack.pop()
- state = statestack[-1]
- sym.type = 'error'
- lookahead = sym
- errorcount = error_count
- self.errorok = 0
- continue
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- if t == 0:
- n = symstack[-1]
- result = getattr(n,"value",None)
- # --! DEBUG
- debug.info("Done : Returning %s", format_result(result))
- debug.info("PLY: PARSE DEBUG END")
- # --! DEBUG
- return result
- if t == None:
- # --! DEBUG
- debug.error('Error : %s',
- ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
- # --! DEBUG
- # We have some kind of parsing error here. To handle
- # this, we are going to push the current token onto
- # the tokenstack and replace it with an 'error' token.
- # If there are any synchronization rules, they may
- # catch it.
- #
- # In addition to pushing the error token, we call call
- # the user defined p_error() function if this is the
- # first syntax error. This function is only called if
- # errorcount == 0.
- if errorcount == 0 or self.errorok:
- errorcount = error_count
- self.errorok = 0
- errtoken = lookahead
- if errtoken.type == "$end":
- errtoken = None # End of file!
- if self.errorfunc:
- global errok,token,restart
- errok = self.errok # Set some special functions available in error recovery
- token = get_token
- restart = self.restart
- if errtoken and not hasattr(errtoken,'lexer'):
- errtoken.lexer = lexer
- tok = self.errorfunc(errtoken)
- del errok, token, restart # Delete special functions
- if self.errorok:
- # User must have done some kind of panic
- # mode recovery on their own. The
- # returned token is the next lookahead
- lookahead = tok
- errtoken = None
- continue
- else:
- if errtoken:
- if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
- else: lineno = 0
- if lineno:
- sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
- else:
- sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
- else:
- sys.stderr.write("yacc: Parse error in input. EOF\n")
- return
- else:
- errorcount = error_count
- # case 1: the statestack only has 1 entry on it. If we're in this state, the
- # entire parse has been rolled back and we're completely hosed. The token is
- # discarded and we just keep going.
- if len(statestack) <= 1 and lookahead.type != "$end":
- lookahead = None
- errtoken = None
- state = 0
- # Nuke the pushback stack
- del lookaheadstack[:]
- continue
- # case 2: the statestack has a couple of entries on it, but we're
- # at the end of the file. nuke the top entry and generate an error token
- # Start nuking entries on the stack
- if lookahead.type == "$end":
- # Whoa. We're really hosed here. Bail out
- return
- if lookahead.type != 'error':
- sym = symstack[-1]
- if sym.type == 'error':
- # Hmmm. Error is on top of stack, we'll just nuke input
- # symbol and continue
- lookahead = None
- continue
- t = YaccSymbol()
- t.type = 'error'
- if hasattr(lookahead,"lineno"):
- t.lineno = lookahead.lineno
- t.value = lookahead
- lookaheadstack.append(lookahead)
- lookahead = t
- else:
- symstack.pop()
- statestack.pop()
- state = statestack[-1] # Potential bug fix
- continue
- # Call an error function here
- raise RuntimeError("yacc: internal parser error!!!\n")
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # parseopt().
- #
- # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
- # Edit the debug version above, then copy any modifications to the method
- # below while removing #--! DEBUG sections.
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
- lookahead = None # Current lookahead symbol
- lookaheadstack = [ ] # Stack of lookahead symbols
- actions = self.action # Local reference to action table (to avoid lookup on self.)
- goto = self.goto # Local reference to goto table (to avoid lookup on self.)
- prod = self.productions # Local reference to production list (to avoid lookup on self.)
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
- # If no lexer was given, we will try to use the lex module
- if not lexer:
- lex = load_ply_lex()
- lexer = lex.lexer
-
- # Set up the lexer and parser objects on pslice
- pslice.lexer = lexer
- pslice.parser = self
- # If input was supplied, pass to lexer
- if input is not None:
- lexer.input(input)
- if tokenfunc is None:
- # Tokenize function
- get_token = lexer.token
- else:
- get_token = tokenfunc
- # Set up the state and symbol stacks
- statestack = [ ] # Stack of parsing states
- self.statestack = statestack
- symstack = [ ] # Stack of grammar symbols
- self.symstack = symstack
- pslice.stack = symstack # Put in the production
- errtoken = None # Err token
- # The start state is assumed to be (0,$end)
- statestack.append(0)
- sym = YaccSymbol()
- sym.type = '$end'
- symstack.append(sym)
- state = 0
- while 1:
- # Get the next symbol on the input. If a lookahead symbol
- # is already set, we just use that. Otherwise, we'll pull
- # the next token off of the lookaheadstack or from the lexer
- if not lookahead:
- if not lookaheadstack:
- lookahead = get_token() # Get the next token
- else:
- lookahead = lookaheadstack.pop()
- if not lookahead:
- lookahead = YaccSymbol()
- lookahead.type = '$end'
- # Check the action table
- ltype = lookahead.type
- t = actions[state].get(ltype)
- if t is not None:
- if t > 0:
- # shift a symbol on the stack
- statestack.append(t)
- state = t
- symstack.append(lookahead)
- lookahead = None
- # Decrease error count on successful shift
- if errorcount: errorcount -=1
- continue
- if t < 0:
- # reduce a symbol on the stack, emit a production
- p = prod[-t]
- pname = p.name
- plen = p.len
- # Get production function
- sym = YaccSymbol()
- sym.type = pname # Production name
- sym.value = None
- if plen:
- targ = symstack[-plen-1:]
- targ[0] = sym
- # --! TRACKING
- if tracking:
- t1 = targ[1]
- sym.lineno = t1.lineno
- sym.lexpos = t1.lexpos
- t1 = targ[-1]
- sym.endlineno = getattr(t1,"endlineno",t1.lineno)
- sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
- # --! TRACKING
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # The code enclosed in this section is duplicated
- # below as a performance optimization. Make sure
- # changes get made in both locations.
- pslice.slice = targ
-
- try:
- # Call the grammar rule with our special slice object
- del symstack[-plen:]
- del statestack[-plen:]
- p.callable(pslice)
- symstack.append(sym)
- state = goto[statestack[-1]][pname]
- statestack.append(state)
- except SyntaxError:
- # If an error was set. Enter error recovery state
- lookaheadstack.append(lookahead)
- symstack.pop()
- statestack.pop()
- state = statestack[-1]
- sym.type = 'error'
- lookahead = sym
- errorcount = error_count
- self.errorok = 0
- continue
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- else:
- # --! TRACKING
- if tracking:
- sym.lineno = lexer.lineno
- sym.lexpos = lexer.lexpos
- # --! TRACKING
- targ = [ sym ]
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # The code enclosed in this section is duplicated
- # above as a performance optimization. Make sure
- # changes get made in both locations.
- pslice.slice = targ
- try:
- # Call the grammar rule with our special slice object
- p.callable(pslice)
- symstack.append(sym)
- state = goto[statestack[-1]][pname]
- statestack.append(state)
- except SyntaxError:
- # If an error was set. Enter error recovery state
- lookaheadstack.append(lookahead)
- symstack.pop()
- statestack.pop()
- state = statestack[-1]
- sym.type = 'error'
- lookahead = sym
- errorcount = error_count
- self.errorok = 0
- continue
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- if t == 0:
- n = symstack[-1]
- return getattr(n,"value",None)
- if t == None:
- # We have some kind of parsing error here. To handle
- # this, we are going to push the current token onto
- # the tokenstack and replace it with an 'error' token.
- # If there are any synchronization rules, they may
- # catch it.
- #
- # In addition to pushing the error token, we call call
- # the user defined p_error() function if this is the
- # first syntax error. This function is only called if
- # errorcount == 0.
- if errorcount == 0 or self.errorok:
- errorcount = error_count
- self.errorok = 0
- errtoken = lookahead
- if errtoken.type == '$end':
- errtoken = None # End of file!
- if self.errorfunc:
- global errok,token,restart
- errok = self.errok # Set some special functions available in error recovery
- token = get_token
- restart = self.restart
- if errtoken and not hasattr(errtoken,'lexer'):
- errtoken.lexer = lexer
- tok = self.errorfunc(errtoken)
- del errok, token, restart # Delete special functions
- if self.errorok:
- # User must have done some kind of panic
- # mode recovery on their own. The
- # returned token is the next lookahead
- lookahead = tok
- errtoken = None
- continue
- else:
- if errtoken:
- if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
- else: lineno = 0
- if lineno:
- sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
- else:
- sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
- else:
- sys.stderr.write("yacc: Parse error in input. EOF\n")
- return
- else:
- errorcount = error_count
- # case 1: the statestack only has 1 entry on it. If we're in this state, the
- # entire parse has been rolled back and we're completely hosed. The token is
- # discarded and we just keep going.
- if len(statestack) <= 1 and lookahead.type != '$end':
- lookahead = None
- errtoken = None
- state = 0
- # Nuke the pushback stack
- del lookaheadstack[:]
- continue
- # case 2: the statestack has a couple of entries on it, but we're
- # at the end of the file. nuke the top entry and generate an error token
- # Start nuking entries on the stack
- if lookahead.type == '$end':
- # Whoa. We're really hosed here. Bail out
- return
- if lookahead.type != 'error':
- sym = symstack[-1]
- if sym.type == 'error':
- # Hmmm. Error is on top of stack, we'll just nuke input
- # symbol and continue
- lookahead = None
- continue
- t = YaccSymbol()
- t.type = 'error'
- if hasattr(lookahead,"lineno"):
- t.lineno = lookahead.lineno
- t.value = lookahead
- lookaheadstack.append(lookahead)
- lookahead = t
- else:
- symstack.pop()
- statestack.pop()
- state = statestack[-1] # Potential bug fix
- continue
- # Call an error function here
- raise RuntimeError("yacc: internal parser error!!!\n")
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # parseopt_notrack().
- #
- # Optimized version of parseopt() with line number tracking removed.
- # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
- # code in the #--! TRACKING sections
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
- lookahead = None # Current lookahead symbol
- lookaheadstack = [ ] # Stack of lookahead symbols
- actions = self.action # Local reference to action table (to avoid lookup on self.)
- goto = self.goto # Local reference to goto table (to avoid lookup on self.)
- prod = self.productions # Local reference to production list (to avoid lookup on self.)
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
- # If no lexer was given, we will try to use the lex module
- if not lexer:
- lex = load_ply_lex()
- lexer = lex.lexer
-
- # Set up the lexer and parser objects on pslice
- pslice.lexer = lexer
- pslice.parser = self
- # If input was supplied, pass to lexer
- if input is not None:
- lexer.input(input)
- if tokenfunc is None:
- # Tokenize function
- get_token = lexer.token
- else:
- get_token = tokenfunc
- # Set up the state and symbol stacks
- statestack = [ ] # Stack of parsing states
- self.statestack = statestack
- symstack = [ ] # Stack of grammar symbols
- self.symstack = symstack
- pslice.stack = symstack # Put in the production
- errtoken = None # Err token
- # The start state is assumed to be (0,$end)
- statestack.append(0)
- sym = YaccSymbol()
- sym.type = '$end'
- symstack.append(sym)
- state = 0
- while 1:
- # Get the next symbol on the input. If a lookahead symbol
- # is already set, we just use that. Otherwise, we'll pull
- # the next token off of the lookaheadstack or from the lexer
- if not lookahead:
- if not lookaheadstack:
- lookahead = get_token() # Get the next token
- else:
- lookahead = lookaheadstack.pop()
- if not lookahead:
- lookahead = YaccSymbol()
- lookahead.type = '$end'
- # Check the action table
- ltype = lookahead.type
- t = actions[state].get(ltype)
- if t is not None:
- if t > 0:
- # shift a symbol on the stack
- statestack.append(t)
- state = t
- symstack.append(lookahead)
- lookahead = None
- # Decrease error count on successful shift
- if errorcount: errorcount -=1
- continue
- if t < 0:
- # reduce a symbol on the stack, emit a production
- p = prod[-t]
- pname = p.name
- plen = p.len
- # Get production function
- sym = YaccSymbol()
- sym.type = pname # Production name
- sym.value = None
- if plen:
- targ = symstack[-plen-1:]
- targ[0] = sym
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # The code enclosed in this section is duplicated
- # below as a performance optimization. Make sure
- # changes get made in both locations.
- pslice.slice = targ
-
- try:
- # Call the grammar rule with our special slice object
- del symstack[-plen:]
- del statestack[-plen:]
- p.callable(pslice)
- symstack.append(sym)
- state = goto[statestack[-1]][pname]
- statestack.append(state)
- except SyntaxError:
- # If an error was set. Enter error recovery state
- lookaheadstack.append(lookahead)
- symstack.pop()
- statestack.pop()
- state = statestack[-1]
- sym.type = 'error'
- lookahead = sym
- errorcount = error_count
- self.errorok = 0
- continue
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- else:
- targ = [ sym ]
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- # The code enclosed in this section is duplicated
- # above as a performance optimization. Make sure
- # changes get made in both locations.
- pslice.slice = targ
- try:
- # Call the grammar rule with our special slice object
- p.callable(pslice)
- symstack.append(sym)
- state = goto[statestack[-1]][pname]
- statestack.append(state)
- except SyntaxError:
- # If an error was set. Enter error recovery state
- lookaheadstack.append(lookahead)
- symstack.pop()
- statestack.pop()
- state = statestack[-1]
- sym.type = 'error'
- lookahead = sym
- errorcount = error_count
- self.errorok = 0
- continue
- # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- if t == 0:
- n = symstack[-1]
- return getattr(n,"value",None)
- if t == None:
- # We have some kind of parsing error here. To handle
- # this, we are going to push the current token onto
- # the tokenstack and replace it with an 'error' token.
- # If there are any synchronization rules, they may
- # catch it.
- #
- # In addition to pushing the error token, we call call
- # the user defined p_error() function if this is the
- # first syntax error. This function is only called if
- # errorcount == 0.
- if errorcount == 0 or self.errorok:
- errorcount = error_count
- self.errorok = 0
- errtoken = lookahead
- if errtoken.type == '$end':
- errtoken = None # End of file!
- if self.errorfunc:
- global errok,token,restart
- errok = self.errok # Set some special functions available in error recovery
- token = get_token
- restart = self.restart
- if errtoken and not hasattr(errtoken,'lexer'):
- errtoken.lexer = lexer
- tok = self.errorfunc(errtoken)
- del errok, token, restart # Delete special functions
- if self.errorok:
- # User must have done some kind of panic
- # mode recovery on their own. The
- # returned token is the next lookahead
- lookahead = tok
- errtoken = None
- continue
- else:
- if errtoken:
- if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
- else: lineno = 0
- if lineno:
- sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
- else:
- sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
- else:
- sys.stderr.write("yacc: Parse error in input. EOF\n")
- return
- else:
- errorcount = error_count
- # case 1: the statestack only has 1 entry on it. If we're in this state, the
- # entire parse has been rolled back and we're completely hosed. The token is
- # discarded and we just keep going.
- if len(statestack) <= 1 and lookahead.type != '$end':
- lookahead = None
- errtoken = None
- state = 0
- # Nuke the pushback stack
- del lookaheadstack[:]
- continue
- # case 2: the statestack has a couple of entries on it, but we're
- # at the end of the file. nuke the top entry and generate an error token
- # Start nuking entries on the stack
- if lookahead.type == '$end':
- # Whoa. We're really hosed here. Bail out
- return
- if lookahead.type != 'error':
- sym = symstack[-1]
- if sym.type == 'error':
- # Hmmm. Error is on top of stack, we'll just nuke input
- # symbol and continue
- lookahead = None
- continue
- t = YaccSymbol()
- t.type = 'error'
- if hasattr(lookahead,"lineno"):
- t.lineno = lookahead.lineno
- t.value = lookahead
- lookaheadstack.append(lookahead)
- lookahead = t
- else:
- symstack.pop()
- statestack.pop()
- state = statestack[-1] # Potential bug fix
- continue
- # Call an error function here
- raise RuntimeError("yacc: internal parser error!!!\n")
- # -----------------------------------------------------------------------------
- # === Grammar Representation ===
- #
- # The following functions, classes, and variables are used to represent and
- # manipulate the rules that make up a grammar.
- # -----------------------------------------------------------------------------
- import re
- # regex matching identifiers
- _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
- # -----------------------------------------------------------------------------
- # class Production:
- #
- # This class stores the raw information about a single production or grammar rule.
- # A grammar rule refers to a specification such as this:
- #
- # expr : expr PLUS term
- #
- # Here are the basic attributes defined on all productions
- #
- # name - Name of the production. For example 'expr'
- # prod - A list of symbols on the right side ['expr','PLUS','term']
- # prec - Production precedence level
- # number - Production number.
- # func - Function that executes on reduce
- # file - File where production function is defined
- # lineno - Line number where production function is defined
- #
- # The following attributes are defined or optional.
- #
- # len - Length of the production (number of symbols on right hand side)
- # usyms - Set of unique symbols found in the production
- # -----------------------------------------------------------------------------
- class Production(object):
- reduced = 0
- def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
- self.name = name
- self.prod = tuple(prod)
- self.number = number
- self.func = func
- self.callable = None
- self.file = file
- self.line = line
- self.prec = precedence
- # Internal settings used during table construction
-
- self.len = len(self.prod) # Length of the production
- # Create a list of unique production symbols used in the production
- self.usyms = [ ]
- for s in self.prod:
- if s not in self.usyms:
- self.usyms.append(s)
- # List of all LR items for the production
- self.lr_items = []
- self.lr_next = None
- # Create a string representation
- if self.prod:
- self.str = "%s -> %s" % (self.name," ".join(self.prod))
- else:
- self.str = "%s -> <empty>" % self.name
- def __str__(self):
- return self.str
- def __repr__(self):
- return "Production("+str(self)+")"
- def __len__(self):
- return len(self.prod)
- def __nonzero__(self):
- return 1
- def __getitem__(self,index):
- return self.prod[index]
-
- # Return the nth lr_item from the production (or None if at the end)
- def lr_item(self,n):
- if n > len(self.prod): return None
- p = LRItem(self,n)
- # Precompute the list of productions immediately following. Hack. Remove later
- try:
- p.lr_after = Prodnames[p.prod[n+1]]
- except (IndexError,KeyError):
- p.lr_after = []
- try:
- p.lr_before = p.prod[n-1]
- except IndexError:
- p.lr_before = None
- return p
-
- # Bind the production function name to a callable
- def bind(self,pdict):
- if self.func:
- self.callable = pdict[self.func]
- # This class serves as a minimal standin for Production objects when
- # reading table data from files. It only contains information
- # actually used by the LR parsing engine, plus some additional
- # debugging information.
- class MiniProduction(object):
- def __init__(self,str,name,len,func,file,line):
- self.name …
Large files files are truncated, but you can click here to view the full file