JavaCodeGenerator.java

/Dependencies/boo/lib/antlr-2.7.5/antlr/JavaCodeGenerator.java

Large files files are truncated, but you can click here to view the full file

package antlr;

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id: //depot/code/org.antlr/release/antlr-2.7.5/antlr/JavaCodeGenerator.java#1 $
 */

import java.util.Enumeration;
import java.util.Hashtable;

import antlr.collections.impl.BitSet;
import antlr.collections.impl.Vector;

import java.io.PrintWriter; //SAS: changed for proper text file io
import java.io.IOException;
import java.io.FileWriter;

/**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
public class JavaCodeGenerator extends CodeGenerator {
    // non-zero if inside syntactic predicate generation
    protected int syntacticPredLevel = 0;

    // Are we generating ASTs (for parsers and tree parsers) right now?
    protected boolean genAST = false;

    // Are we saving the text consumed (for lexers) right now?
    protected boolean saveText = false;

    // Grammar parameters set up to handle different grammar classes.
    // These are used to get instanceof tests out of code generation
    String labeledElementType;
    String labeledElementASTType;
    String labeledElementInit;
    String commonExtraArgs;
    String commonExtraParams;
    String commonLocalVars;
    String lt1Value;
    String exceptionThrown;
    String throwNoViable;

    /** Tracks the rule being generated.  Used for mapTreeId */
    RuleBlock currentRule;

    /** Tracks the rule or labeled subrule being generated.  Used for
     AST generation. */
    String currentASTResult;

    /** Mapping between the ids used in the current alt, and the
     * names of variables used to represent their AST values.
     */
    Hashtable treeVariableMap = new Hashtable();

    /** Used to keep track of which AST variables have been defined in a rule
     * (except for the #rule_name and #rule_name_in var's
     */
    Hashtable declaredASTVariables = new Hashtable();

    /* Count of unnamed generated variables */
    int astVarNumber = 1;

    /** Special value used to mark duplicate in treeVariableMap */
    protected static final String NONUNIQUE = new String();

    public static final int caseSizeThreshold = 127; // ascii is max

    private Vector semPreds;

    /** Create a Java code-generator using the given Grammar.
     * The caller must still call setTool, setBehavior, and setAnalyzer
     * before generating code.
     */
    public JavaCodeGenerator() {
        super();
        charFormatter = new JavaCharFormatter();
    }

    /** Adds a semantic predicate string to the sem pred vector
     These strings will be used to build an array of sem pred names
     when building a debugging parser.  This method should only be
     called when the debug option is specified
     */
    protected int addSemPred(String predicate) {
        semPreds.appendElement(predicate);
        return semPreds.size() - 1;
    }

    public void exitIfError() {
        if (antlrTool.hasError()) {
            antlrTool.fatalError("Exiting due to errors.");
        }
    }

    /**Generate the parser, lexer, treeparser, and token types in Java */
    public void gen() {
        // Do the code generation
        try {
            // Loop over all grammars
            Enumeration grammarIter = behavior.grammars.elements();
            while (grammarIter.hasMoreElements()) {
                Grammar g = (Grammar)grammarIter.nextElement();
                // Connect all the components to each other
                g.setGrammarAnalyzer(analyzer);
                g.setCodeGenerator(this);
                analyzer.setGrammar(g);
                // To get right overloading behavior across hetrogeneous grammars
                setupGrammarParameters(g);
                g.generate();
                // print out the grammar with lookahead sets (and FOLLOWs)
                // System.out.print(g.toString());
                exitIfError();
            }

            // Loop over all token managers (some of which are lexers)
            Enumeration tmIter = behavior.tokenManagers.elements();
            while (tmIter.hasMoreElements()) {
                TokenManager tm = (TokenManager)tmIter.nextElement();
                if (!tm.isReadOnly()) {
                    // Write the token manager tokens as Java
                    // this must appear before genTokenInterchange so that
                    // labels are set on string literals
                    genTokenTypes(tm);
                    // Write the token manager tokens as plain text
                    genTokenInterchange(tm);
                }
                exitIfError();
            }
        }
        catch (IOException e) {
            antlrTool.reportException(e, null);
        }
    }

    /** Generate code for the given grammar element.
     * @param blk The {...} action to generate
     */
    public void gen(ActionElement action) {
        if (DEBUG_CODE_GENERATOR) System.out.println("genAction(" + action + ")");
        if (action.isSemPred) {
            genSemPred(action.actionText, action.line);
        }
        else {
            if (grammar.hasSyntacticPredicate) {
                println("if ( inputState.guessing==0 ) {");
                tabs++;
            }

			// get the name of the followSet for the current rule so that we
            // can replace $FOLLOW in the .g file.
            ActionTransInfo tInfo = new ActionTransInfo();
            String actionStr = processActionForSpecialSymbols(action.actionText,
															  action.getLine(),
															  currentRule,
															  tInfo);

            if (tInfo.refRuleRoot != null) {
                // Somebody referenced "#rule", make sure translated var is valid
                // assignment to #rule is left as a ref also, meaning that assignments
                // with no other refs like "#rule = foo();" still forces this code to be
                // generated (unnecessarily).
                println(tInfo.refRuleRoot + " = (" + labeledElementASTType + ")currentAST.root;");
            }

            // dump the translated action
            printAction(actionStr);

            if (tInfo.assignToRoot) {
                // Somebody did a "#rule=", reset internal currentAST.root
                println("currentAST.root = " + tInfo.refRuleRoot + ";");
                // reset the child pointer too to be last sibling in sibling list
                println("currentAST.child = " + tInfo.refRuleRoot + "!=null &&" + tInfo.refRuleRoot + ".getFirstChild()!=null ?");
                tabs++;
                println(tInfo.refRuleRoot + ".getFirstChild() : " + tInfo.refRuleRoot + ";");
                tabs--;
                println("currentAST.advanceChildToEnd();");
            }

            if (grammar.hasSyntacticPredicate) {
                tabs--;
                println("}");
            }
        }
    }

    /** Generate code for the given grammar element.
     * @param blk The "x|y|z|..." block to generate
     */
    public void gen(AlternativeBlock blk) {
        if (DEBUG_CODE_GENERATOR) System.out.println("gen(" + blk + ")");
        println("{");
        genBlockPreamble(blk);
        genBlockInitAction(blk);

        // Tell AST generation to build subrule result
        String saveCurrentASTResult = currentASTResult;
        if (blk.getLabel() != null) {
            currentASTResult = blk.getLabel();
        }

        boolean ok = grammar.theLLkAnalyzer.deterministic(blk);

        JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
        genBlockFinish(howToFinish, throwNoViable);

        println("}");

        // Restore previous AST generation
        currentASTResult = saveCurrentASTResult;
    }

    /** Generate code for the given grammar element.
     * @param blk The block-end element to generate.  Block-end
     * elements are synthesized by the grammar parser to represent
     * the end of a block.
     */
    public void gen(BlockEndElement end) {
        if (DEBUG_CODE_GENERATOR) System.out.println("genRuleEnd(" + end + ")");
    }

    /** Generate code for the given grammar element.
     * @param blk The character literal reference to generate
     */
    public void gen(CharLiteralElement atom) {
        if (DEBUG_CODE_GENERATOR) System.out.println("genChar(" + atom + ")");

        if (atom.getLabel() != null) {
            println(atom.getLabel() + " = " + lt1Value + ";");
        }

        boolean oldsaveText = saveText;
        saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
        genMatch(atom);
        saveText = oldsaveText;
    }

    /** Generate code for the given grammar element.
     * @param blk The character-range reference to generate
     */
    public void gen(CharRangeElement r) {
        if (r.getLabel() != null && syntacticPredLevel == 0) {
            println(r.getLabel() + " = " + lt1Value + ";");
        }
        boolean flag = ( grammar instanceof LexerGrammar &&
            ( !saveText ||
            r.getAutoGenType() ==
            GrammarElement.AUTO_GEN_BANG ) );
        if (flag) {
            println("_saveIndex=text.length();");
        }

        println("matchRange(" + r.beginText + "," + r.endText + ");");

        if (flag) {
            println("text.setLength(_saveIndex);");
        }
    }

    /** Generate the lexer Java file */
    public void gen(LexerGrammar g) throws IOException {
        // If debugging, create a new sempred vector for this grammar
        if (g.debuggingOutput)
            semPreds = new Vector();

        setGrammar(g);
        if (!(grammar instanceof LexerGrammar)) {
            antlrTool.panic("Internal error generating lexer");
        }

        // SAS: moved output creation to method so a subclass can change
        //      how the output is generated (for VAJ interface)
        setupOutput(grammar.getClassName());

        genAST = false;	// no way to gen trees.
        saveText = true;	// save consumed characters.

        tabs = 0;

        // Generate header common to all Java output files
        genHeader();
        // Do not use printAction because we assume tabs==0
        println(behavior.getHeaderAction(""));

        // Generate header specific to lexer Java file
        // println("import java.io.FileInputStream;");
        println("import java.io.InputStream;");
        println("import antlr.TokenStreamException;");
        println("import antlr.TokenStreamIOException;");
        println("import antlr.TokenStreamRecognitionException;");
        println("import antlr.CharStreamException;");
        println("import antlr.CharStreamIOException;");
        println("import antlr.ANTLRException;");
        println("import java.io.Reader;");
        println("import java.util.Hashtable;");
        println("import antlr." + grammar.getSuperClass() + ";");
        println("import antlr.InputBuffer;");
        println("import antlr.ByteBuffer;");
        println("import antlr.CharBuffer;");
        println("import antlr.Token;");
        println("import antlr.CommonToken;");
        println("import antlr.RecognitionException;");
        println("import antlr.NoViableAltForCharException;");
        println("import antlr.MismatchedCharException;");
        println("import antlr.TokenStream;");
        println("import antlr.ANTLRHashString;");
        println("import antlr.LexerSharedInputState;");
        println("import antlr.collections.impl.BitSet;");
        println("import antlr.SemanticException;");

        // Generate user-defined lexer file preamble
        println(grammar.preambleAction.getText());

        // Generate lexer class definition
        String sup = null;
        if (grammar.superClass != null) {
            sup = grammar.superClass;
        }
        else {
            sup = "antlr." + grammar.getSuperClass();
        }

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(grammar.comment);
        }

		// get prefix (replaces "public" and lets user specify)
		String prefix = "public";
		Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
		if (tprefix != null) {
			String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
			if (p != null) {
				prefix = p;
			}
		}

		print(prefix+" ");
		print("class " + grammar.getClassName() + " extends " + sup);
		println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix + ", TokenStream");
		Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
        if (tsuffix != null) {
            String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
            if (suffix != null) {
                print(", " + suffix);	// must be an interface name for Java
            }
        }
        println(" {");

        // Generate user-defined lexer class members
        print(
            processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
        );

        //
        // Generate the constructor from InputStream, which in turn
        // calls the ByteBuffer constructor
        //
        println("public " + grammar.getClassName() + "(InputStream in) {");
        tabs++;
        println("this(new ByteBuffer(in));");
        tabs--;
        println("}");

        //
        // Generate the constructor from Reader, which in turn
        // calls the CharBuffer constructor
        //
        println("public " + grammar.getClassName() + "(Reader in) {");
        tabs++;
        println("this(new CharBuffer(in));");
        tabs--;
        println("}");

        println("public " + grammar.getClassName() + "(InputBuffer ib) {");
        tabs++;
        // if debugging, wrap the input buffer in a debugger
        if (grammar.debuggingOutput)
            println("this(new LexerSharedInputState(new antlr.debug.DebuggingInputBuffer(ib)));");
        else
            println("this(new LexerSharedInputState(ib));");
        tabs--;
        println("}");

        //
        // Generate the constructor from InputBuffer (char or byte)
        //
        println("public " + grammar.getClassName() + "(LexerSharedInputState state) {");
        tabs++;

        println("super(state);");
        // if debugging, set up array variables and call user-overridable
        //   debugging setup method
        if (grammar.debuggingOutput) {
            println("  ruleNames  = _ruleNames;");
            println("  semPredNames = _semPredNames;");
            println("  setupDebugging();");
        }

        // Generate the setting of various generated options.
        // These need to be before the literals since ANTLRHashString depends on
        // the casesensitive stuff.
        println("caseSensitiveLiterals = " + g.caseSensitiveLiterals + ";");
        println("setCaseSensitive(" + g.caseSensitive + ");");

        // Generate the initialization of a hashtable
        // containing the string literals used in the lexer
        // The literals variable itself is in CharScanner
        println("literals = new Hashtable();");
        Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
        while (keys.hasMoreElements()) {
            String key = (String)keys.nextElement();
            if (key.charAt(0) != '"') {
                continue;
            }
            TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
            if (sym instanceof StringLiteralSymbol) {
                StringLiteralSymbol s = (StringLiteralSymbol)sym;
                println("literals.put(new ANTLRHashString(" + s.getId() + ", this), new Integer(" + s.getTokenType() + "));");
            }
        }
        tabs--;

        Enumeration ids;
        println("}");

        // generate the rule name array for debugging
        if (grammar.debuggingOutput) {
            println("private static final String _ruleNames[] = {");

            ids = grammar.rules.elements();
            int ruleNum = 0;
            while (ids.hasMoreElements()) {
                GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
                if (sym instanceof RuleSymbol)
                    println("  \"" + ((RuleSymbol)sym).getId() + "\",");
            }
            println("};");
        }

        // Generate nextToken() rule.
        // nextToken() is a synthetic lexer rule that is the implicit OR of all
        // user-defined lexer rules.
        genNextToken();

        // Generate code for each rule in the lexer
        ids = grammar.rules.elements();
        int ruleNum = 0;
        while (ids.hasMoreElements()) {
            RuleSymbol sym = (RuleSymbol)ids.nextElement();
            // Don't generate the synthetic rules
            if (!sym.getId().equals("mnextToken")) {
                genRule(sym, false, ruleNum++);
            }
            exitIfError();
        }

        // Generate the semantic predicate map for debugging
        if (grammar.debuggingOutput)
            genSemPredMap();

        // Generate the bitsets used throughout the lexer
        genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());

        println("");
        println("}");

        // Close the lexer output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate code for the given grammar element.
     * @param blk The (...)+ block to generate
     */
    public void gen(OneOrMoreBlock blk) {
        if (DEBUG_CODE_GENERATOR) System.out.println("gen+(" + blk + ")");
        String label;
        String cnt;
        println("{");
        genBlockPreamble(blk);
        if (blk.getLabel() != null) {
            cnt = "_cnt_" + blk.getLabel();
        }
        else {
            cnt = "_cnt" + blk.ID;
        }
        println("int " + cnt + "=0;");
        if (blk.getLabel() != null) {
            label = blk.getLabel();
        }
        else {
            label = "_loop" + blk.ID;
        }
        println(label + ":");
        println("do {");
        tabs++;
        // generate the init action for ()+ ()* inside the loop
        // this allows us to do usefull EOF checking...
        genBlockInitAction(blk);

        // Tell AST generation to build subrule result
        String saveCurrentASTResult = currentASTResult;
        if (blk.getLabel() != null) {
            currentASTResult = blk.getLabel();
        }

        boolean ok = grammar.theLLkAnalyzer.deterministic(blk);

        // generate exit test if greedy set to false
        // and an alt is ambiguous with exit branch
        // or when lookahead derived purely from end-of-file
        // Lookahead analysis stops when end-of-file is hit,
        // returning set {epsilon}.  Since {epsilon} is not
        // ambig with any real tokens, no error is reported
        // by deterministic() routines and we have to check
        // for the case where the lookahead depth didn't get
        // set to NONDETERMINISTIC (this only happens when the
        // FOLLOW contains real atoms + epsilon).
        boolean generateNonGreedyExitPath = false;
        int nonGreedyExitDepth = grammar.maxk;

        if (!blk.greedy &&
            blk.exitLookaheadDepth <= grammar.maxk &&
            blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
            generateNonGreedyExitPath = true;
            nonGreedyExitDepth = blk.exitLookaheadDepth;
        }
        else if (!blk.greedy &&
            blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
            generateNonGreedyExitPath = true;
        }

        // generate exit test if greedy set to false
        // and an alt is ambiguous with exit branch
        if (generateNonGreedyExitPath) {
            if (DEBUG_CODE_GENERATOR) {
                System.out.println("nongreedy (...)+ loop; exit depth is " +
                                   blk.exitLookaheadDepth);
            }
            String predictExit =
                getLookaheadTestExpression(blk.exitCache,
                                           nonGreedyExitDepth);
            println("// nongreedy exit test");
            println("if ( " + cnt + ">=1 && " + predictExit + ") break " + label + ";");
        }

        JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
        genBlockFinish(
            howToFinish,
            "if ( " + cnt + ">=1 ) { break " + label + "; } else {" + throwNoViable + "}"
        );

        println(cnt + "++;");
        tabs--;
        println("} while (true);");
        println("}");

        // Restore previous AST generation
        currentASTResult = saveCurrentASTResult;
    }

    /** Generate the parser Java file */
    public void gen(ParserGrammar g) throws IOException {

        // if debugging, set up a new vector to keep track of sempred
        //   strings for this grammar
        if (g.debuggingOutput)
            semPreds = new Vector();

        setGrammar(g);
        if (!(grammar instanceof ParserGrammar)) {
            antlrTool.panic("Internal error generating parser");
        }

        // Open the output stream for the parser and set the currentOutput
        // SAS: moved file setup so subclass could do it (for VAJ interface)
        setupOutput(grammar.getClassName());

        genAST = grammar.buildAST;

        tabs = 0;

        // Generate the header common to all output files.
        genHeader();
        // Do not use printAction because we assume tabs==0
        println(behavior.getHeaderAction(""));

        // Generate header for the parser
        println("import antlr.TokenBuffer;");
        println("import antlr.TokenStreamException;");
        println("import antlr.TokenStreamIOException;");
        println("import antlr.ANTLRException;");
        println("import antlr." + grammar.getSuperClass() + ";");
        println("import antlr.Token;");
        println("import antlr.TokenStream;");
        println("import antlr.RecognitionException;");
        println("import antlr.NoViableAltException;");
        println("import antlr.MismatchedTokenException;");
        println("import antlr.SemanticException;");
        println("import antlr.ParserSharedInputState;");
        println("import antlr.collections.impl.BitSet;");
        if ( genAST ) {
			println("import antlr.collections.AST;");
			println("import java.util.Hashtable;");
			println("import antlr.ASTFactory;");
            println("import antlr.ASTPair;");
            println("import antlr.collections.impl.ASTArray;");
        }

        // Output the user-defined parser preamble
        println(grammar.preambleAction.getText());

        // Generate parser class definition
        String sup = null;
        if (grammar.superClass != null)
            sup = grammar.superClass;
        else
            sup = "antlr." + grammar.getSuperClass();

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(grammar.comment);
        }

		// get prefix (replaces "public" and lets user specify)
		String prefix = "public";
		Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
		if (tprefix != null) {
			String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
			if (p != null) {
				prefix = p;
			}
		}

		print(prefix+" ");
		print("class " + grammar.getClassName() + " extends " + sup);
        println("       implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);

        Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
        if (tsuffix != null) {
            String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
            if (suffix != null)
                print(", " + suffix);	// must be an interface name for Java
        }
        println(" {");

        // set up an array of all the rule names so the debugger can
        // keep track of them only by number -- less to store in tree...
        if (grammar.debuggingOutput) {
            println("private static final String _ruleNames[] = {");

            Enumeration ids = grammar.rules.elements();
            int ruleNum = 0;
            while (ids.hasMoreElements()) {
                GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
                if (sym instanceof RuleSymbol)
                    println("  \"" + ((RuleSymbol)sym).getId() + "\",");
            }
            println("};");
        }

        // Generate user-defined parser class members
        print(
            processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
        );

        // Generate parser class constructor from TokenBuffer
        println("");
        println("protected " + grammar.getClassName() + "(TokenBuffer tokenBuf, int k) {");
        println("  super(tokenBuf,k);");
        println("  tokenNames = _tokenNames;");
        // if debugging, set up arrays and call the user-overridable
        //   debugging setup method
        if (grammar.debuggingOutput) {
            println("  ruleNames  = _ruleNames;");
            println("  semPredNames = _semPredNames;");
            println("  setupDebugging(tokenBuf);");
        }
		if ( grammar.buildAST ) {
			println("  buildTokenTypeASTClassMap();");
			println("  astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
		}
        println("}");
        println("");

        println("public " + grammar.getClassName() + "(TokenBuffer tokenBuf) {");
        println("  this(tokenBuf," + grammar.maxk + ");");
        println("}");
        println("");

        // Generate parser class constructor from TokenStream
        println("protected " + grammar.getClassName() + "(TokenStream lexer, int k) {");
        println("  super(lexer,k);");
        println("  tokenNames = _tokenNames;");

        // if debugging, set up arrays and call the user-overridable
        //   debugging setup method
        if (grammar.debuggingOutput) {
            println("  ruleNames  = _ruleNames;");
            println("  semPredNames = _semPredNames;");
            println("  setupDebugging(lexer);");
        }
		if ( grammar.buildAST ) {
			println("  buildTokenTypeASTClassMap();");
			println("  astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
		}
        println("}");
        println("");

        println("public " + grammar.getClassName() + "(TokenStream lexer) {");
        println("  this(lexer," + grammar.maxk + ");");
        println("}");
        println("");

        println("public " + grammar.getClassName() + "(ParserSharedInputState state) {");
        println("  super(state," + grammar.maxk + ");");
        println("  tokenNames = _tokenNames;");
		if ( grammar.buildAST ) {
			println("  buildTokenTypeASTClassMap();");
			println("  astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
		}
        println("}");
        println("");

        // Generate code for each rule in the grammar
        Enumeration ids = grammar.rules.elements();
        int ruleNum = 0;
        while (ids.hasMoreElements()) {
            GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
            if (sym instanceof RuleSymbol) {
                RuleSymbol rs = (RuleSymbol)sym;
                genRule(rs, rs.references.size() == 0, ruleNum++);
            }
            exitIfError();
        }

        // Generate the token names
        genTokenStrings();

		if ( grammar.buildAST ) {
			genTokenASTNodeMap();
		}

        // Generate the bitsets used throughout the grammar
        genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());

        // Generate the semantic predicate map for debugging
        if (grammar.debuggingOutput)
            genSemPredMap();

        // Close class definition
        println("");
        println("}");

        // Close the parser output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate code for the given grammar element.
     * @param blk The rule-reference to generate
     */
    public void gen(RuleRefElement rr) {
        if (DEBUG_CODE_GENERATOR) System.out.println("genRR(" + rr + ")");
        RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
        if (rs == null || !rs.isDefined()) {
            // Is this redundant???
            antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
            return;
        }
        if (!(rs instanceof RuleSymbol)) {
            // Is this redundant???
            antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
            return;
        }

        genErrorTryForElement(rr);

        // AST value for labeled rule refs in tree walker.
        // This is not AST construction;  it is just the input tree node value.
        if (grammar instanceof TreeWalkerGrammar &&
            rr.getLabel() != null &&
            syntacticPredLevel == 0) {
            println(rr.getLabel() + " = _t==ASTNULL ? null : " + lt1Value + ";");
        }

        // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
        if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
            println("_saveIndex=text.length();");
        }

        // Process return value assignment if any
        printTabs();
        if (rr.idAssign != null) {
            // Warn if the rule has no return type
            if (rs.block.returnAction == null) {
                antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
            }
            _print(rr.idAssign + "=");
        }
        else {
            // Warn about return value if any, but not inside syntactic predicate
            if (!(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) {
                antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
            }
        }

        // Call the rule
        GenRuleInvocation(rr);

        // if in lexer and ! on element or alt or rule, save buffer index to kill later
        if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
            println("text.setLength(_saveIndex);");
        }

        // if not in a syntactic predicate
        if (syntacticPredLevel == 0) {
            boolean doNoGuessTest = (
                grammar.hasSyntacticPredicate &&
                (
                grammar.buildAST && rr.getLabel() != null ||
                (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
                )
                );
            if (doNoGuessTest) {
                // println("if (inputState.guessing==0) {");
                // tabs++;
            }

            if (grammar.buildAST && rr.getLabel() != null) {
                // always gen variable for rule return on labeled rules
                println(rr.getLabel() + "_AST = (" + labeledElementASTType + ")returnAST;");
            }
            if (genAST) {
                switch (rr.getAutoGenType()) {
                    case GrammarElement.AUTO_GEN_NONE:
                        // println("theASTFactory.addASTChild(currentAST, returnAST);");
                        println("astFactory.addASTChild(currentAST, returnAST);");
                        break;
                    case GrammarElement.AUTO_GEN_CARET:
                        antlrTool.error("Internal: encountered ^ after rule reference");
                        break;
                    default:
                        break;
                }
            }

            // if a lexer and labeled, Token label defined at rule level, just set it here
            if (grammar instanceof LexerGrammar && rr.getLabel() != null) {
                println(rr.getLabel() + "=_returnToken;");
            }

            if (doNoGuessTest) {
                // tabs--;
                // println("}");
            }
        }
        genErrorCatchForElement(rr);
    }

    /** Generate code for the given grammar element.
     * @param blk The string-literal reference to generate
     */
    public void gen(StringLiteralElement atom) {
        if (DEBUG_CODE_GENERATOR) System.out.println("genString(" + atom + ")");

        // Variable declarations for labeled elements
        if (atom.getLabel() != null && syntacticPredLevel == 0) {
            println(atom.getLabel() + " = " + lt1Value + ";");
        }

        // AST
        genElementAST(atom);

        // is there a bang on the literal?
        boolean oldsaveText = saveText;
        saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;

        // matching
        genMatch(atom);

        saveText = oldsaveText;

        // tack on tree cursor motion if doing a tree walker
        if (grammar instanceof TreeWalkerGrammar) {
            println("_t = _t.getNextSibling();");
        }
    }

    /** Generate code for the given grammar element.
     * @param blk The token-range reference to generate
     */
    public void gen(TokenRangeElement r) {
        genErrorTryForElement(r);
        if (r.getLabel() != null && syntacticPredLevel == 0) {
            println(r.getLabel() + " = " + lt1Value + ";");
        }

        // AST
        genElementAST(r);

        // match
        println("matchRange(" + r.beginText + "," + r.endText + ");");
        genErrorCatchForElement(r);
    }

    /** Generate code for the given grammar element.
     * @param blk The token-reference to generate
     */
    public void gen(TokenRefElement atom) {
        if (DEBUG_CODE_GENERATOR) System.out.println("genTokenRef(" + atom + ")");
        if (grammar instanceof LexerGrammar) {
            antlrTool.panic("Token reference found in lexer");
        }
        genErrorTryForElement(atom);
        // Assign Token value to token label variable
        if (atom.getLabel() != null && syntacticPredLevel == 0) {
            println(atom.getLabel() + " = " + lt1Value + ";");
        }

        // AST
        genElementAST(atom);
        // matching
        genMatch(atom);
        genErrorCatchForElement(atom);

        // tack on tree cursor motion if doing a tree walker
        if (grammar instanceof TreeWalkerGrammar) {
            println("_t = _t.getNextSibling();");
        }
    }

    public void gen(TreeElement t) {
        // save AST cursor
        println("AST __t" + t.ID + " = _t;");

        // If there is a label on the root, then assign that to the variable
        if (t.root.getLabel() != null) {
            println(t.root.getLabel() + " = _t==ASTNULL ? null :(" + labeledElementASTType + ")_t;");
        }

        // check for invalid modifiers ! and ^ on tree element roots
        if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
            antlrTool.error("Suffixing a root node with '!' is not implemented",
                         grammar.getFilename(), t.getLine(), t.getColumn());
            t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
        }
        if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
            antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
                         grammar.getFilename(), t.getLine(), t.getColumn());
            t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
        }

        // Generate AST variables
        genElementAST(t.root);
        if (grammar.buildAST) {
            // Save the AST construction state
            println("ASTPair __currentAST" + t.ID + " = currentAST.copy();");
            // Make the next item added a child of the TreeElement root
            println("currentAST.root = currentAST.child;");
            println("currentAST.child = null;");
        }

        // match root
        if ( t.root instanceof WildcardElement ) {
            println("if ( _t==null ) throw new MismatchedTokenException();");
        }
        else {
            genMatch(t.root);
        }
        // move to list of children
        println("_t = _t.getFirstChild();");

        // walk list of children, generating code for each
        for (int i = 0; i < t.getAlternatives().size(); i++) {
            Alternative a = t.getAlternativeAt(i);
            AlternativeElement e = a.head;
            while (e != null) {
                e.generate();
                e = e.next;
            }
        }

        if (grammar.buildAST) {
            // restore the AST construction state to that just after the
            // tree root was added
            println("currentAST = __currentAST" + t.ID + ";");
        }
        // restore AST cursor
        println("_t = __t" + t.ID + ";");
        // move cursor to sibling of tree just parsed
        println("_t = _t.getNextSibling();");
    }

    /** Generate the tree-parser Java file */
    public void gen(TreeWalkerGrammar g) throws IOException {
        // SAS: debugging stuff removed for now...
        setGrammar(g);
        if (!(grammar instanceof TreeWalkerGrammar)) {
            antlrTool.panic("Internal error generating tree-walker");
        }
        // Open the output stream for the parser and set the currentOutput
        // SAS: move file open to method so subclass can override it
        //      (mainly for VAJ interface)
        setupOutput(grammar.getClassName());

        genAST = grammar.buildAST;
        tabs = 0;

        // Generate the header common to all output files.
        genHeader();
        // Do not use printAction because we assume tabs==0
        println(behavior.getHeaderAction(""));

        // Generate header for the parser
        println("import antlr." + grammar.getSuperClass() + ";");
        println("import antlr.Token;");
        println("import antlr.collections.AST;");
        println("import antlr.RecognitionException;");
        println("import antlr.ANTLRException;");
        println("import antlr.NoViableAltException;");
        println("import antlr.MismatchedTokenException;");
        println("import antlr.SemanticException;");
        println("import antlr.collections.impl.BitSet;");
        println("import antlr.ASTPair;");
        println("import antlr.collections.impl.ASTArray;");

        // Output the user-defined parser premamble
        println(grammar.preambleAction.getText());

        // Generate parser class definition
        String sup = null;
        if (grammar.superClass != null) {
            sup = grammar.superClass;
        }
        else {
            sup = "antlr." + grammar.getSuperClass();
        }
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(grammar.comment);
        }

		// get prefix (replaces "public" and lets user specify)
		String prefix = "public";
		Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
		if (tprefix != null) {
			String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
			if (p != null) {
				prefix = p;
			}
		}

		print(prefix+" ");
		print("class " + grammar.getClassName() + " extends " + sup);
        println("       implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
        Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
        if (tsuffix != null) {
            String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
            if (suffix != null) {
                print(", " + suffix);	// must be an interface name for Java
            }
        }
        println(" {");

        // Generate user-defined parser class members
        print(
            processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
        );

        // Generate default parser class constructor
        println("public " + grammar.getClassName() + "() {");
        tabs++;
        println("tokenNames = _tokenNames;");
        tabs--;
        println("}");
        println("");

        // Generate code for each rule in the grammar
        Enumeration ids = grammar.rules.elements();
        int ruleNum = 0;
        String ruleNameInits = "";
        while (ids.hasMoreElements()) {
            GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
            if (sym instanceof RuleSymbol) {
                RuleSymbol rs = (RuleSymbol)sym;
                genRule(rs, rs.references.size() == 0, ruleNum++);
            }
            exitIfError();
        }

        // Generate the token names
        genTokenStrings();

        // Generate the bitsets used throughout the grammar
        genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());

        // Close class definition
        println("}");
        println("");

        // Close the parser output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate code for the given grammar element.
     * @param wc The wildcard element to generate
     */
    public void gen(WildcardElement wc) {
        // Variable assignment for labeled elements
        if (wc.getLabel() != null && syntacticPredLevel == 0) {
            println(wc.getLabel() + " = " + lt1Value + ";");
        }

        // AST
        genElementAST(wc);
        // Match anything but EOF
        if (grammar instanceof TreeWalkerGrammar) {
            println("if ( _t==null ) throw new MismatchedTokenException();");
        }
        else if (grammar instanceof LexerGrammar) {
            if (grammar instanceof LexerGrammar &&
                (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
                println("_saveIndex=text.length();");
            }
            println("matchNot(EOF_CHAR);");
            if (grammar instanceof LexerGrammar &&
                (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
                println("text.setLength(_saveIndex);"); // kill text atom put in buffer
            }
        }
        else {
            println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
        }

        // tack on tree cursor motion if doing a tree walker
        if (grammar instanceof TreeWalkerGrammar) {
            println("_t = _t.getNextSibling();");
        }
    }

    /** Generate code for the given grammar element.
     * @param blk The (...)* block to generate
     */
    public void gen(ZeroOrMoreBlock blk) {
        if (DEBUG_CODE_GENERATOR) System.out.println("gen*(" + blk + ")");
        println("{");
        genBlockPreamble(blk);
        String label;
        if (blk.getLabel() != null) {
            label = blk.getLabel();
        }
        else {
            label = "_loop" + blk.ID;
        }
        println(label + ":");
        println("do {");
        tabs++;
        // generate the init action for ()* inside the loop
        // this allows us to do usefull EOF checking...
        genBlockInitAction(blk);

        // Tell AST generation to build subrule result
        String saveCurrentASTResult = currentASTResult;
        if (blk.getLabel() != null) {
            currentASTResult = blk.getLabel();
        }

        boolean ok = grammar.theLLkAnalyzer.deterministic(blk);

        // generate exit test if greedy set to false
        // and an alt is ambiguous with exit branch
        // or when lookahead derived purely from end-of-file
        // Lookahead analysis stops when end-of-file is hit,
        // returning set {epsilon}.  Since {epsilon} is not
        // ambig with any real tokens, no error is reported
        // by deterministic() routines and we have to check
        // for the case where the lookahead depth didn't get
        // set to NONDETERMINISTIC (this only happens when the
        // FOLLOW contains real atoms + epsilon).
        boolean generateNonGreedyExitPath = false;
        int nonGreedyExitDepth = grammar.maxk;

        if (!blk.greedy &&
            blk.exitLookaheadDepth <= grammar.maxk &&
            blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
            generateNonGreedyExitPath = true;
            nonGreedyExitDepth = blk.exitLookaheadDepth;
        }
        else if (!blk.greedy &&
            blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
            generateNonGreedyExitPath = true;
        }
        if (generateNonGreedyExitPath) {
            if (DEBUG_CODE_GENERATOR) {
                System.out.println("nongreedy (...)* loop; exit depth is " +
                                   blk.exitLookaheadDepth);
            }
            String predictExit =
                getLookaheadTestExpression(blk.exitCache,
                                           nonGreedyExitDepth);
            println("// nongreedy exit test");
            println("if (" + predictExit + ") break " + label + ";");
        }

        JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
        genBlockFinish(howToFinish, "break " + label + ";");

        tabs--;
        println("} while (true);");
        println("}");

        // Restore previous AST generation
        currentASTResult = saveCurrentASTResult;
    }

    /** Generate an alternative.
     * @param alt  The alternative to generate
     * @param blk The block to which the alternative belongs
     */
    protected void genAlt(Alternative alt, AlternativeBlock blk) {
        // Save the AST generation state, and set it to that of the alt
        boolean savegenAST = genAST;
        genAST = genAST && alt.getAutoGen();

        boolean oldsaveTest = saveText;
        saveText = saveText && alt.getAutoGen();

        // Reset the variable name map for the alternative
        Hashtable saveMap = treeVariableMap;
        treeVariableMap = new Hashtable();

        // Generate try block around the alt for  error handling
        if (alt.exceptionSpec != null) {
            println("try {      // for error handling");
            tabs++;
        }

        AlternativeElement elem = alt.head;
        while (!(elem instanceof BlockEndElement)) {
            elem.generate(); // alt can begin with anything. Ask target to gen.
            elem = elem.next;
        }

        if (genAST) {
            if (blk instanceof RuleBlock) {
                // Set the AST return value for the rule
                RuleBlock rblk = (RuleBlock)blk;
                if (grammar.hasSyntacticPredicate) {
                    // println("if ( inputState.guessing==0 ) {");
                    // tabs++;
                }
                println(rblk.getRuleName() + "_AST = (" + labeledElementASTType + ")currentAST.root;");
                if (grammar.hasSyntacticPredicate) {
                    // --tabs;
                    // println("}");
                }
            }
            else if (blk.getLabel() != null) {
                // ### future: also set AST value for labeled subrules.
                // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
                antlrTool.warning("Labeled subrules not yet supported", grammar.getFilename(), blk.getLine(), blk.getColumn());
            }
        }

        if (alt.exceptionSpec != null) {
            // close try block
            tabs--;
            println("}");
            genErrorHandler(alt.exceptionSpec);
        }

        genAST = savegenAST;
        saveText = oldsaveTest;

        treeVariableMap = saveMap;
    }

    /** Generate all the bitsets to be used in the parser or lexer
     * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
     * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
     * Note that most languages do not support object initialization inside a
     * class definition, so other code-generators may have to separate the
     * bitset declarations from the initializations (e.g., put the initializations
     * in the generated constructor instead).
     * @param bitsetList The list of bitsets to generate.
     * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
     */
    protected void genBitsets(Vector bitsetList,
                              int maxVocabulary
                              ) {
        println("");
        for (int i = 0; i < bitsetList.size(); i++) {
            BitSet p = (BitSet)bitsetList.elementAt(i);
            // Ensure that generated BitSet is large enough for vocabulary
            p.growToInclude(maxVocabulary);
            genBitSet(p, i);
        }
    }

    /** Do something simple like:
     *  private static final long[] mk_tokenSet_0() {
     *    long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
     *    return data;
     *  }
     *  public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
     *
     *  Or, for large bitsets, optimize init so ranges are collapsed into loops.
     *  This is most useful for lexers using unicode.
     */
    private void genBitSet(BitSet p, int id) {
        // initialization data
        println(
            "private static final long[] mk" + getBitsetName(id) + "() {"
        );
        int n = p.lengthInLongWords();
        if ( n<BITSET_OPTIMIZE_INIT_THRESHOLD ) {
            println("\tlong[] data = { " + p.toStringOfWords() + "};");
        }
        else {
            // will init manually, allocate space then set values
            println("\tlong[] data = new long["+n+"];");
            long[] elems = p.toPackedArray();
            for (int i = 0; i < elems.length;) {
				if ( elems[i]==0 ) {
					// done automatically by Java, don't waste time/code
					i++;
					continue;
				}
                if ( (i+1)==elems.length || elems[i]!=elems[i+1] ) {
                    // last number or no run of numbers, just dump assignment
 …
Large files files are truncated, but you can click here to view the full file