PageRenderTime 1103ms CodeModel.GetById 283ms app.highlight 546ms RepoModel.GetById 271ms app.codeStats 0ms

/Lib/lib2to3/pgen2/conv.py

http://unladen-swallow.googlecode.com/
Python | 257 lines | 232 code | 10 blank | 15 comment | 33 complexity | 942a8910f37b9e5d202806ea05f7b2f1 MD5 | raw file
  1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
  2# Licensed to PSF under a Contributor Agreement.
  3
  4"""Convert graminit.[ch] spit out by pgen to Python code.
  5
  6Pgen is the Python parser generator.  It is useful to quickly create a
  7parser from a grammar file in Python's grammar notation.  But I don't
  8want my parsers to be written in C (yet), so I'm translating the
  9parsing tables to Python data structures and writing a Python parse
 10engine.
 11
 12Note that the token numbers are constants determined by the standard
 13Python tokenizer.  The standard token module defines these numbers and
 14their names (the names are not used much).  The token numbers are
 15hardcoded into the Python tokenizer and into pgen.  A Python
 16implementation of the Python tokenizer is also available, in the
 17standard tokenize module.
 18
 19On the other hand, symbol numbers (representing the grammar's
 20non-terminals) are assigned by pgen based on the actual grammar
 21input.
 22
 23Note: this module is pretty much obsolete; the pgen module generates
 24equivalent grammar tables directly from the Grammar.txt input file
 25without having to invoke the Python pgen C program.
 26
 27"""
 28
 29# Python imports
 30import re
 31
 32# Local imports
 33from pgen2 import grammar, token
 34
 35
 36class Converter(grammar.Grammar):
 37    """Grammar subclass that reads classic pgen output files.
 38
 39    The run() method reads the tables as produced by the pgen parser
 40    generator, typically contained in two C files, graminit.h and
 41    graminit.c.  The other methods are for internal use only.
 42
 43    See the base class for more documentation.
 44
 45    """
 46
 47    def run(self, graminit_h, graminit_c):
 48        """Load the grammar tables from the text files written by pgen."""
 49        self.parse_graminit_h(graminit_h)
 50        self.parse_graminit_c(graminit_c)
 51        self.finish_off()
 52
 53    def parse_graminit_h(self, filename):
 54        """Parse the .h file writen by pgen.  (Internal)
 55
 56        This file is a sequence of #define statements defining the
 57        nonterminals of the grammar as numbers.  We build two tables
 58        mapping the numbers to names and back.
 59
 60        """
 61        try:
 62            f = open(filename)
 63        except IOError, err:
 64            print "Can't open %s: %s" % (filename, err)
 65            return False
 66        self.symbol2number = {}
 67        self.number2symbol = {}
 68        lineno = 0
 69        for line in f:
 70            lineno += 1
 71            mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
 72            if not mo and line.strip():
 73                print "%s(%s): can't parse %s" % (filename, lineno,
 74                                                  line.strip())
 75            else:
 76                symbol, number = mo.groups()
 77                number = int(number)
 78                assert symbol not in self.symbol2number
 79                assert number not in self.number2symbol
 80                self.symbol2number[symbol] = number
 81                self.number2symbol[number] = symbol
 82        return True
 83
 84    def parse_graminit_c(self, filename):
 85        """Parse the .c file writen by pgen.  (Internal)
 86
 87        The file looks as follows.  The first two lines are always this:
 88
 89        #include "pgenheaders.h"
 90        #include "grammar.h"
 91
 92        After that come four blocks:
 93
 94        1) one or more state definitions
 95        2) a table defining dfas
 96        3) a table defining labels
 97        4) a struct defining the grammar
 98
 99        A state definition has the following form:
100        - one or more arc arrays, each of the form:
101          static arc arcs_<n>_<m>[<k>] = {
102                  {<i>, <j>},
103                  ...
104          };
105        - followed by a state array, of the form:
106          static state states_<s>[<t>] = {
107                  {<k>, arcs_<n>_<m>},
108                  ...
109          };
110
111        """
112        try:
113            f = open(filename)
114        except IOError, err:
115            print "Can't open %s: %s" % (filename, err)
116            return False
117        # The code below essentially uses f's iterator-ness!
118        lineno = 0
119
120        # Expect the two #include lines
121        lineno, line = lineno+1, f.next()
122        assert line == '#include "pgenheaders.h"\n', (lineno, line)
123        lineno, line = lineno+1, f.next()
124        assert line == '#include "grammar.h"\n', (lineno, line)
125
126        # Parse the state definitions
127        lineno, line = lineno+1, f.next()
128        allarcs = {}
129        states = []
130        while line.startswith("static arc "):
131            while line.startswith("static arc "):
132                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
133                              line)
134                assert mo, (lineno, line)
135                n, m, k = map(int, mo.groups())
136                arcs = []
137                for _ in range(k):
138                    lineno, line = lineno+1, f.next()
139                    mo = re.match(r"\s+{(\d+), (\d+)},$", line)
140                    assert mo, (lineno, line)
141                    i, j = map(int, mo.groups())
142                    arcs.append((i, j))
143                lineno, line = lineno+1, f.next()
144                assert line == "};\n", (lineno, line)
145                allarcs[(n, m)] = arcs
146                lineno, line = lineno+1, f.next()
147            mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
148            assert mo, (lineno, line)
149            s, t = map(int, mo.groups())
150            assert s == len(states), (lineno, line)
151            state = []
152            for _ in range(t):
153                lineno, line = lineno+1, f.next()
154                mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
155                assert mo, (lineno, line)
156                k, n, m = map(int, mo.groups())
157                arcs = allarcs[n, m]
158                assert k == len(arcs), (lineno, line)
159                state.append(arcs)
160            states.append(state)
161            lineno, line = lineno+1, f.next()
162            assert line == "};\n", (lineno, line)
163            lineno, line = lineno+1, f.next()
164        self.states = states
165
166        # Parse the dfas
167        dfas = {}
168        mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
169        assert mo, (lineno, line)
170        ndfas = int(mo.group(1))
171        for i in range(ndfas):
172            lineno, line = lineno+1, f.next()
173            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
174                          line)
175            assert mo, (lineno, line)
176            symbol = mo.group(2)
177            number, x, y, z = map(int, mo.group(1, 3, 4, 5))
178            assert self.symbol2number[symbol] == number, (lineno, line)
179            assert self.number2symbol[number] == symbol, (lineno, line)
180            assert x == 0, (lineno, line)
181            state = states[z]
182            assert y == len(state), (lineno, line)
183            lineno, line = lineno+1, f.next()
184            mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
185            assert mo, (lineno, line)
186            first = {}
187            rawbitset = eval(mo.group(1))
188            for i, c in enumerate(rawbitset):
189                byte = ord(c)
190                for j in range(8):
191                    if byte & (1<<j):
192                        first[i*8 + j] = 1
193            dfas[number] = (state, first)
194        lineno, line = lineno+1, f.next()
195        assert line == "};\n", (lineno, line)
196        self.dfas = dfas
197
198        # Parse the labels
199        labels = []
200        lineno, line = lineno+1, f.next()
201        mo = re.match(r"static label labels\[(\d+)\] = {$", line)
202        assert mo, (lineno, line)
203        nlabels = int(mo.group(1))
204        for i in range(nlabels):
205            lineno, line = lineno+1, f.next()
206            mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
207            assert mo, (lineno, line)
208            x, y = mo.groups()
209            x = int(x)
210            if y == "0":
211                y = None
212            else:
213                y = eval(y)
214            labels.append((x, y))
215        lineno, line = lineno+1, f.next()
216        assert line == "};\n", (lineno, line)
217        self.labels = labels
218
219        # Parse the grammar struct
220        lineno, line = lineno+1, f.next()
221        assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
222        lineno, line = lineno+1, f.next()
223        mo = re.match(r"\s+(\d+),$", line)
224        assert mo, (lineno, line)
225        ndfas = int(mo.group(1))
226        assert ndfas == len(self.dfas)
227        lineno, line = lineno+1, f.next()
228        assert line == "\tdfas,\n", (lineno, line)
229        lineno, line = lineno+1, f.next()
230        mo = re.match(r"\s+{(\d+), labels},$", line)
231        assert mo, (lineno, line)
232        nlabels = int(mo.group(1))
233        assert nlabels == len(self.labels), (lineno, line)
234        lineno, line = lineno+1, f.next()
235        mo = re.match(r"\s+(\d+)$", line)
236        assert mo, (lineno, line)
237        start = int(mo.group(1))
238        assert start in self.number2symbol, (lineno, line)
239        self.start = start
240        lineno, line = lineno+1, f.next()
241        assert line == "};\n", (lineno, line)
242        try:
243            lineno, line = lineno+1, f.next()
244        except StopIteration:
245            pass
246        else:
247            assert 0, (lineno, line)
248
249    def finish_off(self):
250        """Create additional useful structures.  (Internal)."""
251        self.keywords = {} # map from keyword strings to arc labels
252        self.tokens = {}   # map from numeric token values to arc labels
253        for ilabel, (type, value) in enumerate(self.labels):
254            if type == token.NAME and value is not None:
255                self.keywords[value] = ilabel
256            elif value is None:
257                self.tokens[type] = ilabel