PageRenderTime 345ms CodeModel.GetById 181ms app.highlight 14ms RepoModel.GetById 147ms app.codeStats 0ms

/Lib/lib2to3/pgen2/parse.py

http://unladen-swallow.googlecode.com/
Python | 201 lines | 106 code | 9 blank | 86 comment | 2 complexity | 80c0ee069eab8de116e1c13572d6cd4b MD5 | raw file
  1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
  2# Licensed to PSF under a Contributor Agreement.
  3
  4"""Parser engine for the grammar tables generated by pgen.
  5
  6The grammar table must be loaded first.
  7
  8See Parser/parser.c in the Python distribution for additional info on
  9how this parsing engine works.
 10
 11"""
 12
 13# Local imports
 14from . import token
 15
 16class ParseError(Exception):
 17    """Exception to signal the parser is stuck."""
 18
 19    def __init__(self, msg, type, value, context):
 20        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
 21                           (msg, type, value, context))
 22        self.msg = msg
 23        self.type = type
 24        self.value = value
 25        self.context = context
 26
 27class Parser(object):
 28    """Parser engine.
 29
 30    The proper usage sequence is:
 31
 32    p = Parser(grammar, [converter])  # create instance
 33    p.setup([start])                  # prepare for parsing
 34    <for each input token>:
 35        if p.addtoken(...):           # parse a token; may raise ParseError
 36            break
 37    root = p.rootnode                 # root of abstract syntax tree
 38
 39    A Parser instance may be reused by calling setup() repeatedly.
 40
 41    A Parser instance contains state pertaining to the current token
 42    sequence, and should not be used concurrently by different threads
 43    to parse separate token sequences.
 44
 45    See driver.py for how to get input tokens by tokenizing a file or
 46    string.
 47
 48    Parsing is complete when addtoken() returns True; the root of the
 49    abstract syntax tree can then be retrieved from the rootnode
 50    instance variable.  When a syntax error occurs, addtoken() raises
 51    the ParseError exception.  There is no error recovery; the parser
 52    cannot be used after a syntax error was reported (but it can be
 53    reinitialized by calling setup()).
 54
 55    """
 56
 57    def __init__(self, grammar, convert=None):
 58        """Constructor.
 59
 60        The grammar argument is a grammar.Grammar instance; see the
 61        grammar module for more information.
 62
 63        The parser is not ready yet for parsing; you must call the
 64        setup() method to get it started.
 65
 66        The optional convert argument is a function mapping concrete
 67        syntax tree nodes to abstract syntax tree nodes.  If not
 68        given, no conversion is done and the syntax tree produced is
 69        the concrete syntax tree.  If given, it must be a function of
 70        two arguments, the first being the grammar (a grammar.Grammar
 71        instance), and the second being the concrete syntax tree node
 72        to be converted.  The syntax tree is converted from the bottom
 73        up.
 74
 75        A concrete syntax tree node is a (type, value, context, nodes)
 76        tuple, where type is the node type (a token or symbol number),
 77        value is None for symbols and a string for tokens, context is
 78        None or an opaque value used for error reporting (typically a
 79        (lineno, offset) pair), and nodes is a list of children for
 80        symbols, and None for tokens.
 81
 82        An abstract syntax tree node may be anything; this is entirely
 83        up to the converter function.
 84
 85        """
 86        self.grammar = grammar
 87        self.convert = convert or (lambda grammar, node: node)
 88
 89    def setup(self, start=None):
 90        """Prepare for parsing.
 91
 92        This *must* be called before starting to parse.
 93
 94        The optional argument is an alternative start symbol; it
 95        defaults to the grammar's start symbol.
 96
 97        You can use a Parser instance to parse any number of programs;
 98        each time you call setup() the parser is reset to an initial
 99        state determined by the (implicit or explicit) start symbol.
100
101        """
102        if start is None:
103            start = self.grammar.start
104        # Each stack entry is a tuple: (dfa, state, node).
105        # A node is a tuple: (type, value, context, children),
106        # where children is a list of nodes or None, and context may be None.
107        newnode = (start, None, None, [])
108        stackentry = (self.grammar.dfas[start], 0, newnode)
109        self.stack = [stackentry]
110        self.rootnode = None
111        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
112
113    def addtoken(self, type, value, context):
114        """Add a token; return True iff this is the end of the program."""
115        # Map from token to label
116        ilabel = self.classify(type, value, context)
117        # Loop until the token is shifted; may raise exceptions
118        while True:
119            dfa, state, node = self.stack[-1]
120            states, first = dfa
121            arcs = states[state]
122            # Look for a state with this label
123            for i, newstate in arcs:
124                t, v = self.grammar.labels[i]
125                if ilabel == i:
126                    # Look it up in the list of labels
127                    assert t < 256
128                    # Shift a token; we're done with it
129                    self.shift(type, value, newstate, context)
130                    # Pop while we are in an accept-only state
131                    state = newstate
132                    while states[state] == [(0, state)]:
133                        self.pop()
134                        if not self.stack:
135                            # Done parsing!
136                            return True
137                        dfa, state, node = self.stack[-1]
138                        states, first = dfa
139                    # Done with this token
140                    return False
141                elif t >= 256:
142                    # See if it's a symbol and if we're in its first set
143                    itsdfa = self.grammar.dfas[t]
144                    itsstates, itsfirst = itsdfa
145                    if ilabel in itsfirst:
146                        # Push a symbol
147                        self.push(t, self.grammar.dfas[t], newstate, context)
148                        break # To continue the outer while loop
149            else:
150                if (0, state) in arcs:
151                    # An accepting state, pop it and try something else
152                    self.pop()
153                    if not self.stack:
154                        # Done parsing, but another token is input
155                        raise ParseError("too much input",
156                                         type, value, context)
157                else:
158                    # No success finding a transition
159                    raise ParseError("bad input", type, value, context)
160
161    def classify(self, type, value, context):
162        """Turn a token into a label.  (Internal)"""
163        if type == token.NAME:
164            # Keep a listing of all used names
165            self.used_names.add(value)
166            # Check for reserved words
167            ilabel = self.grammar.keywords.get(value)
168            if ilabel is not None:
169                return ilabel
170        ilabel = self.grammar.tokens.get(type)
171        if ilabel is None:
172            raise ParseError("bad token", type, value, context)
173        return ilabel
174
175    def shift(self, type, value, newstate, context):
176        """Shift a token.  (Internal)"""
177        dfa, state, node = self.stack[-1]
178        newnode = (type, value, context, None)
179        newnode = self.convert(self.grammar, newnode)
180        if newnode is not None:
181            node[-1].append(newnode)
182        self.stack[-1] = (dfa, newstate, node)
183
184    def push(self, type, newdfa, newstate, context):
185        """Push a nonterminal.  (Internal)"""
186        dfa, state, node = self.stack[-1]
187        newnode = (type, None, context, [])
188        self.stack[-1] = (dfa, newstate, node)
189        self.stack.append((newdfa, 0, newnode))
190
191    def pop(self):
192        """Pop a nonterminal.  (Internal)"""
193        popdfa, popstate, popnode = self.stack.pop()
194        newnode = self.convert(self.grammar, popnode)
195        if newnode is not None:
196            if self.stack:
197                dfa, state, node = self.stack[-1]
198                node[-1].append(newnode)
199            else:
200                self.rootnode = newnode
201                self.rootnode.used_names = self.used_names