PageRenderTime 36ms CodeModel.GetById 16ms app.highlight 13ms RepoModel.GetById 2ms app.codeStats 0ms

/Lib/lib2to3/pgen2/grammar.py

http://unladen-swallow.googlecode.com/
Python | 171 lines | 159 code | 7 blank | 5 comment | 8 complexity | 612ee8e1a84660a7c44f7d5af3e7db69 MD5 | raw file
  1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
  2# Licensed to PSF under a Contributor Agreement.
  3
  4"""This module defines the data structures used to represent a grammar.
  5
  6These are a bit arcane because they are derived from the data
  7structures used by Python's 'pgen' parser generator.
  8
  9There's also a table here mapping operators to their names in the
 10token module; the Python tokenize module reports all operators as the
 11fallback token code OP, but the parser needs the actual token code.
 12
 13"""
 14
 15# Python imports
 16import pickle
 17
 18# Local imports
 19from . import token, tokenize
 20
 21
 22class Grammar(object):
 23    """Pgen parsing tables tables conversion class.
 24
 25    Once initialized, this class supplies the grammar tables for the
 26    parsing engine implemented by parse.py.  The parsing engine
 27    accesses the instance variables directly.  The class here does not
 28    provide initialization of the tables; several subclasses exist to
 29    do this (see the conv and pgen modules).
 30
 31    The load() method reads the tables from a pickle file, which is
 32    much faster than the other ways offered by subclasses.  The pickle
 33    file is written by calling dump() (after loading the grammar
 34    tables using a subclass).  The report() method prints a readable
 35    representation of the tables to stdout, for debugging.
 36
 37    The instance variables are as follows:
 38
 39    symbol2number -- a dict mapping symbol names to numbers.  Symbol
 40                     numbers are always 256 or higher, to distinguish
 41                     them from token numbers, which are between 0 and
 42                     255 (inclusive).
 43
 44    number2symbol -- a dict mapping numbers to symbol names;
 45                     these two are each other's inverse.
 46
 47    states        -- a list of DFAs, where each DFA is a list of
 48                     states, each state is is a list of arcs, and each
 49                     arc is a (i, j) pair where i is a label and j is
 50                     a state number.  The DFA number is the index into
 51                     this list.  (This name is slightly confusing.)
 52                     Final states are represented by a special arc of
 53                     the form (0, j) where j is its own state number.
 54
 55    dfas          -- a dict mapping symbol numbers to (DFA, first)
 56                     pairs, where DFA is an item from the states list
 57                     above, and first is a set of tokens that can
 58                     begin this grammar rule (represented by a dict
 59                     whose values are always 1).
 60
 61    labels        -- a list of (x, y) pairs where x is either a token
 62                     number or a symbol number, and y is either None
 63                     or a string; the strings are keywords.  The label
 64                     number is the index in this list; label numbers
 65                     are used to mark state transitions (arcs) in the
 66                     DFAs.
 67
 68    start         -- the number of the grammar's start symbol.
 69
 70    keywords      -- a dict mapping keyword strings to arc labels.
 71
 72    tokens        -- a dict mapping token numbers to arc labels.
 73
 74    """
 75
 76    def __init__(self):
 77        self.symbol2number = {}
 78        self.number2symbol = {}
 79        self.states = []
 80        self.dfas = {}
 81        self.labels = [(0, "EMPTY")]
 82        self.keywords = {}
 83        self.tokens = {}
 84        self.symbol2label = {}
 85        self.start = 256
 86
 87    def dump(self, filename):
 88        """Dump the grammar tables to a pickle file."""
 89        f = open(filename, "wb")
 90        pickle.dump(self.__dict__, f, 2)
 91        f.close()
 92
 93    def load(self, filename):
 94        """Load the grammar tables from a pickle file."""
 95        f = open(filename, "rb")
 96        d = pickle.load(f)
 97        f.close()
 98        self.__dict__.update(d)
 99
100    def report(self):
101        """Dump the grammar tables to standard output, for debugging."""
102        from pprint import pprint
103        print "s2n"
104        pprint(self.symbol2number)
105        print "n2s"
106        pprint(self.number2symbol)
107        print "states"
108        pprint(self.states)
109        print "dfas"
110        pprint(self.dfas)
111        print "labels"
112        pprint(self.labels)
113        print "start", self.start
114
115
116# Map from operator to number (since tokenize doesn't do this)
117
118opmap_raw = """
119( LPAR
120) RPAR
121[ LSQB
122] RSQB
123: COLON
124, COMMA
125; SEMI
126+ PLUS
127- MINUS
128* STAR
129/ SLASH
130| VBAR
131& AMPER
132< LESS
133> GREATER
134= EQUAL
135. DOT
136% PERCENT
137` BACKQUOTE
138{ LBRACE
139} RBRACE
140@ AT
141== EQEQUAL
142!= NOTEQUAL
143<> NOTEQUAL
144<= LESSEQUAL
145>= GREATEREQUAL
146~ TILDE
147^ CIRCUMFLEX
148<< LEFTSHIFT
149>> RIGHTSHIFT
150** DOUBLESTAR
151+= PLUSEQUAL
152-= MINEQUAL
153*= STAREQUAL
154/= SLASHEQUAL
155%= PERCENTEQUAL
156&= AMPEREQUAL
157|= VBAREQUAL
158^= CIRCUMFLEXEQUAL
159<<= LEFTSHIFTEQUAL
160>>= RIGHTSHIFTEQUAL
161**= DOUBLESTAREQUAL
162// DOUBLESLASH
163//= DOUBLESLASHEQUAL
164-> RARROW
165"""
166
167opmap = {}
168for line in opmap_raw.splitlines():
169    if line:
170        op, name = line.split()
171        opmap[op] = getattr(token, name)