/Dependencies/boo/lib/antlr-2.7.5/antlr/JavaCodeGenerator.java
Java | 3484 lines | 2405 code | 361 blank | 718 comment | 691 complexity | d399396ee38472edf7c786462382b1ca MD5 | raw file
Possible License(s): GPL-2.0
Large files files are truncated, but you can click here to view the full file
- package antlr;
- /* ANTLR Translator Generator
- * Project led by Terence Parr at http://www.jGuru.com
- * Software rights: http://www.antlr.org/license.html
- *
- * $Id: //depot/code/org.antlr/release/antlr-2.7.5/antlr/JavaCodeGenerator.java#1 $
- */
- import java.util.Enumeration;
- import java.util.Hashtable;
- import antlr.collections.impl.BitSet;
- import antlr.collections.impl.Vector;
- import java.io.PrintWriter; //SAS: changed for proper text file io
- import java.io.IOException;
- import java.io.FileWriter;
- /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
- public class JavaCodeGenerator extends CodeGenerator {
- // non-zero if inside syntactic predicate generation
- protected int syntacticPredLevel = 0;
- // Are we generating ASTs (for parsers and tree parsers) right now?
- protected boolean genAST = false;
- // Are we saving the text consumed (for lexers) right now?
- protected boolean saveText = false;
- // Grammar parameters set up to handle different grammar classes.
- // These are used to get instanceof tests out of code generation
- String labeledElementType;
- String labeledElementASTType;
- String labeledElementInit;
- String commonExtraArgs;
- String commonExtraParams;
- String commonLocalVars;
- String lt1Value;
- String exceptionThrown;
- String throwNoViable;
- /** Tracks the rule being generated. Used for mapTreeId */
- RuleBlock currentRule;
- /** Tracks the rule or labeled subrule being generated. Used for
- AST generation. */
- String currentASTResult;
- /** Mapping between the ids used in the current alt, and the
- * names of variables used to represent their AST values.
- */
- Hashtable treeVariableMap = new Hashtable();
- /** Used to keep track of which AST variables have been defined in a rule
- * (except for the #rule_name and #rule_name_in var's
- */
- Hashtable declaredASTVariables = new Hashtable();
- /* Count of unnamed generated variables */
- int astVarNumber = 1;
- /** Special value used to mark duplicate in treeVariableMap */
- protected static final String NONUNIQUE = new String();
- public static final int caseSizeThreshold = 127; // ascii is max
- private Vector semPreds;
- /** Create a Java code-generator using the given Grammar.
- * The caller must still call setTool, setBehavior, and setAnalyzer
- * before generating code.
- */
- public JavaCodeGenerator() {
- super();
- charFormatter = new JavaCharFormatter();
- }
- /** Adds a semantic predicate string to the sem pred vector
- These strings will be used to build an array of sem pred names
- when building a debugging parser. This method should only be
- called when the debug option is specified
- */
- protected int addSemPred(String predicate) {
- semPreds.appendElement(predicate);
- return semPreds.size() - 1;
- }
- public void exitIfError() {
- if (antlrTool.hasError()) {
- antlrTool.fatalError("Exiting due to errors.");
- }
- }
- /**Generate the parser, lexer, treeparser, and token types in Java */
- public void gen() {
- // Do the code generation
- try {
- // Loop over all grammars
- Enumeration grammarIter = behavior.grammars.elements();
- while (grammarIter.hasMoreElements()) {
- Grammar g = (Grammar)grammarIter.nextElement();
- // Connect all the components to each other
- g.setGrammarAnalyzer(analyzer);
- g.setCodeGenerator(this);
- analyzer.setGrammar(g);
- // To get right overloading behavior across hetrogeneous grammars
- setupGrammarParameters(g);
- g.generate();
- // print out the grammar with lookahead sets (and FOLLOWs)
- // System.out.print(g.toString());
- exitIfError();
- }
- // Loop over all token managers (some of which are lexers)
- Enumeration tmIter = behavior.tokenManagers.elements();
- while (tmIter.hasMoreElements()) {
- TokenManager tm = (TokenManager)tmIter.nextElement();
- if (!tm.isReadOnly()) {
- // Write the token manager tokens as Java
- // this must appear before genTokenInterchange so that
- // labels are set on string literals
- genTokenTypes(tm);
- // Write the token manager tokens as plain text
- genTokenInterchange(tm);
- }
- exitIfError();
- }
- }
- catch (IOException e) {
- antlrTool.reportException(e, null);
- }
- }
- /** Generate code for the given grammar element.
- * @param blk The {...} action to generate
- */
- public void gen(ActionElement action) {
- if (DEBUG_CODE_GENERATOR) System.out.println("genAction(" + action + ")");
- if (action.isSemPred) {
- genSemPred(action.actionText, action.line);
- }
- else {
- if (grammar.hasSyntacticPredicate) {
- println("if ( inputState.guessing==0 ) {");
- tabs++;
- }
- // get the name of the followSet for the current rule so that we
- // can replace $FOLLOW in the .g file.
- ActionTransInfo tInfo = new ActionTransInfo();
- String actionStr = processActionForSpecialSymbols(action.actionText,
- action.getLine(),
- currentRule,
- tInfo);
- if (tInfo.refRuleRoot != null) {
- // Somebody referenced "#rule", make sure translated var is valid
- // assignment to #rule is left as a ref also, meaning that assignments
- // with no other refs like "#rule = foo();" still forces this code to be
- // generated (unnecessarily).
- println(tInfo.refRuleRoot + " = (" + labeledElementASTType + ")currentAST.root;");
- }
- // dump the translated action
- printAction(actionStr);
- if (tInfo.assignToRoot) {
- // Somebody did a "#rule=", reset internal currentAST.root
- println("currentAST.root = " + tInfo.refRuleRoot + ";");
- // reset the child pointer too to be last sibling in sibling list
- println("currentAST.child = " + tInfo.refRuleRoot + "!=null &&" + tInfo.refRuleRoot + ".getFirstChild()!=null ?");
- tabs++;
- println(tInfo.refRuleRoot + ".getFirstChild() : " + tInfo.refRuleRoot + ";");
- tabs--;
- println("currentAST.advanceChildToEnd();");
- }
- if (grammar.hasSyntacticPredicate) {
- tabs--;
- println("}");
- }
- }
- }
- /** Generate code for the given grammar element.
- * @param blk The "x|y|z|..." block to generate
- */
- public void gen(AlternativeBlock blk) {
- if (DEBUG_CODE_GENERATOR) System.out.println("gen(" + blk + ")");
- println("{");
- genBlockPreamble(blk);
- genBlockInitAction(blk);
- // Tell AST generation to build subrule result
- String saveCurrentASTResult = currentASTResult;
- if (blk.getLabel() != null) {
- currentASTResult = blk.getLabel();
- }
- boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
- JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
- genBlockFinish(howToFinish, throwNoViable);
- println("}");
- // Restore previous AST generation
- currentASTResult = saveCurrentASTResult;
- }
- /** Generate code for the given grammar element.
- * @param blk The block-end element to generate. Block-end
- * elements are synthesized by the grammar parser to represent
- * the end of a block.
- */
- public void gen(BlockEndElement end) {
- if (DEBUG_CODE_GENERATOR) System.out.println("genRuleEnd(" + end + ")");
- }
- /** Generate code for the given grammar element.
- * @param blk The character literal reference to generate
- */
- public void gen(CharLiteralElement atom) {
- if (DEBUG_CODE_GENERATOR) System.out.println("genChar(" + atom + ")");
- if (atom.getLabel() != null) {
- println(atom.getLabel() + " = " + lt1Value + ";");
- }
- boolean oldsaveText = saveText;
- saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
- genMatch(atom);
- saveText = oldsaveText;
- }
- /** Generate code for the given grammar element.
- * @param blk The character-range reference to generate
- */
- public void gen(CharRangeElement r) {
- if (r.getLabel() != null && syntacticPredLevel == 0) {
- println(r.getLabel() + " = " + lt1Value + ";");
- }
- boolean flag = ( grammar instanceof LexerGrammar &&
- ( !saveText ||
- r.getAutoGenType() ==
- GrammarElement.AUTO_GEN_BANG ) );
- if (flag) {
- println("_saveIndex=text.length();");
- }
- println("matchRange(" + r.beginText + "," + r.endText + ");");
- if (flag) {
- println("text.setLength(_saveIndex);");
- }
- }
- /** Generate the lexer Java file */
- public void gen(LexerGrammar g) throws IOException {
- // If debugging, create a new sempred vector for this grammar
- if (g.debuggingOutput)
- semPreds = new Vector();
- setGrammar(g);
- if (!(grammar instanceof LexerGrammar)) {
- antlrTool.panic("Internal error generating lexer");
- }
- // SAS: moved output creation to method so a subclass can change
- // how the output is generated (for VAJ interface)
- setupOutput(grammar.getClassName());
- genAST = false; // no way to gen trees.
- saveText = true; // save consumed characters.
- tabs = 0;
- // Generate header common to all Java output files
- genHeader();
- // Do not use printAction because we assume tabs==0
- println(behavior.getHeaderAction(""));
- // Generate header specific to lexer Java file
- // println("import java.io.FileInputStream;");
- println("import java.io.InputStream;");
- println("import antlr.TokenStreamException;");
- println("import antlr.TokenStreamIOException;");
- println("import antlr.TokenStreamRecognitionException;");
- println("import antlr.CharStreamException;");
- println("import antlr.CharStreamIOException;");
- println("import antlr.ANTLRException;");
- println("import java.io.Reader;");
- println("import java.util.Hashtable;");
- println("import antlr." + grammar.getSuperClass() + ";");
- println("import antlr.InputBuffer;");
- println("import antlr.ByteBuffer;");
- println("import antlr.CharBuffer;");
- println("import antlr.Token;");
- println("import antlr.CommonToken;");
- println("import antlr.RecognitionException;");
- println("import antlr.NoViableAltForCharException;");
- println("import antlr.MismatchedCharException;");
- println("import antlr.TokenStream;");
- println("import antlr.ANTLRHashString;");
- println("import antlr.LexerSharedInputState;");
- println("import antlr.collections.impl.BitSet;");
- println("import antlr.SemanticException;");
- // Generate user-defined lexer file preamble
- println(grammar.preambleAction.getText());
- // Generate lexer class definition
- String sup = null;
- if (grammar.superClass != null) {
- sup = grammar.superClass;
- }
- else {
- sup = "antlr." + grammar.getSuperClass();
- }
- // print javadoc comment if any
- if (grammar.comment != null) {
- _println(grammar.comment);
- }
- // get prefix (replaces "public" and lets user specify)
- String prefix = "public";
- Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
- if (tprefix != null) {
- String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
- if (p != null) {
- prefix = p;
- }
- }
- print(prefix+" ");
- print("class " + grammar.getClassName() + " extends " + sup);
- println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix + ", TokenStream");
- Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
- if (tsuffix != null) {
- String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
- if (suffix != null) {
- print(", " + suffix); // must be an interface name for Java
- }
- }
- println(" {");
- // Generate user-defined lexer class members
- print(
- processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
- );
- //
- // Generate the constructor from InputStream, which in turn
- // calls the ByteBuffer constructor
- //
- println("public " + grammar.getClassName() + "(InputStream in) {");
- tabs++;
- println("this(new ByteBuffer(in));");
- tabs--;
- println("}");
- //
- // Generate the constructor from Reader, which in turn
- // calls the CharBuffer constructor
- //
- println("public " + grammar.getClassName() + "(Reader in) {");
- tabs++;
- println("this(new CharBuffer(in));");
- tabs--;
- println("}");
- println("public " + grammar.getClassName() + "(InputBuffer ib) {");
- tabs++;
- // if debugging, wrap the input buffer in a debugger
- if (grammar.debuggingOutput)
- println("this(new LexerSharedInputState(new antlr.debug.DebuggingInputBuffer(ib)));");
- else
- println("this(new LexerSharedInputState(ib));");
- tabs--;
- println("}");
- //
- // Generate the constructor from InputBuffer (char or byte)
- //
- println("public " + grammar.getClassName() + "(LexerSharedInputState state) {");
- tabs++;
- println("super(state);");
- // if debugging, set up array variables and call user-overridable
- // debugging setup method
- if (grammar.debuggingOutput) {
- println(" ruleNames = _ruleNames;");
- println(" semPredNames = _semPredNames;");
- println(" setupDebugging();");
- }
- // Generate the setting of various generated options.
- // These need to be before the literals since ANTLRHashString depends on
- // the casesensitive stuff.
- println("caseSensitiveLiterals = " + g.caseSensitiveLiterals + ";");
- println("setCaseSensitive(" + g.caseSensitive + ");");
- // Generate the initialization of a hashtable
- // containing the string literals used in the lexer
- // The literals variable itself is in CharScanner
- println("literals = new Hashtable();");
- Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
- while (keys.hasMoreElements()) {
- String key = (String)keys.nextElement();
- if (key.charAt(0) != '"') {
- continue;
- }
- TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
- if (sym instanceof StringLiteralSymbol) {
- StringLiteralSymbol s = (StringLiteralSymbol)sym;
- println("literals.put(new ANTLRHashString(" + s.getId() + ", this), new Integer(" + s.getTokenType() + "));");
- }
- }
- tabs--;
- Enumeration ids;
- println("}");
- // generate the rule name array for debugging
- if (grammar.debuggingOutput) {
- println("private static final String _ruleNames[] = {");
- ids = grammar.rules.elements();
- int ruleNum = 0;
- while (ids.hasMoreElements()) {
- GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
- if (sym instanceof RuleSymbol)
- println(" \"" + ((RuleSymbol)sym).getId() + "\",");
- }
- println("};");
- }
- // Generate nextToken() rule.
- // nextToken() is a synthetic lexer rule that is the implicit OR of all
- // user-defined lexer rules.
- genNextToken();
- // Generate code for each rule in the lexer
- ids = grammar.rules.elements();
- int ruleNum = 0;
- while (ids.hasMoreElements()) {
- RuleSymbol sym = (RuleSymbol)ids.nextElement();
- // Don't generate the synthetic rules
- if (!sym.getId().equals("mnextToken")) {
- genRule(sym, false, ruleNum++);
- }
- exitIfError();
- }
- // Generate the semantic predicate map for debugging
- if (grammar.debuggingOutput)
- genSemPredMap();
- // Generate the bitsets used throughout the lexer
- genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());
- println("");
- println("}");
- // Close the lexer output stream
- currentOutput.close();
- currentOutput = null;
- }
- /** Generate code for the given grammar element.
- * @param blk The (...)+ block to generate
- */
- public void gen(OneOrMoreBlock blk) {
- if (DEBUG_CODE_GENERATOR) System.out.println("gen+(" + blk + ")");
- String label;
- String cnt;
- println("{");
- genBlockPreamble(blk);
- if (blk.getLabel() != null) {
- cnt = "_cnt_" + blk.getLabel();
- }
- else {
- cnt = "_cnt" + blk.ID;
- }
- println("int " + cnt + "=0;");
- if (blk.getLabel() != null) {
- label = blk.getLabel();
- }
- else {
- label = "_loop" + blk.ID;
- }
- println(label + ":");
- println("do {");
- tabs++;
- // generate the init action for ()+ ()* inside the loop
- // this allows us to do usefull EOF checking...
- genBlockInitAction(blk);
- // Tell AST generation to build subrule result
- String saveCurrentASTResult = currentASTResult;
- if (blk.getLabel() != null) {
- currentASTResult = blk.getLabel();
- }
- boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
- // generate exit test if greedy set to false
- // and an alt is ambiguous with exit branch
- // or when lookahead derived purely from end-of-file
- // Lookahead analysis stops when end-of-file is hit,
- // returning set {epsilon}. Since {epsilon} is not
- // ambig with any real tokens, no error is reported
- // by deterministic() routines and we have to check
- // for the case where the lookahead depth didn't get
- // set to NONDETERMINISTIC (this only happens when the
- // FOLLOW contains real atoms + epsilon).
- boolean generateNonGreedyExitPath = false;
- int nonGreedyExitDepth = grammar.maxk;
- if (!blk.greedy &&
- blk.exitLookaheadDepth <= grammar.maxk &&
- blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
- generateNonGreedyExitPath = true;
- nonGreedyExitDepth = blk.exitLookaheadDepth;
- }
- else if (!blk.greedy &&
- blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
- generateNonGreedyExitPath = true;
- }
- // generate exit test if greedy set to false
- // and an alt is ambiguous with exit branch
- if (generateNonGreedyExitPath) {
- if (DEBUG_CODE_GENERATOR) {
- System.out.println("nongreedy (...)+ loop; exit depth is " +
- blk.exitLookaheadDepth);
- }
- String predictExit =
- getLookaheadTestExpression(blk.exitCache,
- nonGreedyExitDepth);
- println("// nongreedy exit test");
- println("if ( " + cnt + ">=1 && " + predictExit + ") break " + label + ";");
- }
- JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
- genBlockFinish(
- howToFinish,
- "if ( " + cnt + ">=1 ) { break " + label + "; } else {" + throwNoViable + "}"
- );
- println(cnt + "++;");
- tabs--;
- println("} while (true);");
- println("}");
- // Restore previous AST generation
- currentASTResult = saveCurrentASTResult;
- }
- /** Generate the parser Java file */
- public void gen(ParserGrammar g) throws IOException {
- // if debugging, set up a new vector to keep track of sempred
- // strings for this grammar
- if (g.debuggingOutput)
- semPreds = new Vector();
- setGrammar(g);
- if (!(grammar instanceof ParserGrammar)) {
- antlrTool.panic("Internal error generating parser");
- }
- // Open the output stream for the parser and set the currentOutput
- // SAS: moved file setup so subclass could do it (for VAJ interface)
- setupOutput(grammar.getClassName());
- genAST = grammar.buildAST;
- tabs = 0;
- // Generate the header common to all output files.
- genHeader();
- // Do not use printAction because we assume tabs==0
- println(behavior.getHeaderAction(""));
- // Generate header for the parser
- println("import antlr.TokenBuffer;");
- println("import antlr.TokenStreamException;");
- println("import antlr.TokenStreamIOException;");
- println("import antlr.ANTLRException;");
- println("import antlr." + grammar.getSuperClass() + ";");
- println("import antlr.Token;");
- println("import antlr.TokenStream;");
- println("import antlr.RecognitionException;");
- println("import antlr.NoViableAltException;");
- println("import antlr.MismatchedTokenException;");
- println("import antlr.SemanticException;");
- println("import antlr.ParserSharedInputState;");
- println("import antlr.collections.impl.BitSet;");
- if ( genAST ) {
- println("import antlr.collections.AST;");
- println("import java.util.Hashtable;");
- println("import antlr.ASTFactory;");
- println("import antlr.ASTPair;");
- println("import antlr.collections.impl.ASTArray;");
- }
- // Output the user-defined parser preamble
- println(grammar.preambleAction.getText());
- // Generate parser class definition
- String sup = null;
- if (grammar.superClass != null)
- sup = grammar.superClass;
- else
- sup = "antlr." + grammar.getSuperClass();
- // print javadoc comment if any
- if (grammar.comment != null) {
- _println(grammar.comment);
- }
- // get prefix (replaces "public" and lets user specify)
- String prefix = "public";
- Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
- if (tprefix != null) {
- String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
- if (p != null) {
- prefix = p;
- }
- }
- print(prefix+" ");
- print("class " + grammar.getClassName() + " extends " + sup);
- println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
- Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
- if (tsuffix != null) {
- String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
- if (suffix != null)
- print(", " + suffix); // must be an interface name for Java
- }
- println(" {");
- // set up an array of all the rule names so the debugger can
- // keep track of them only by number -- less to store in tree...
- if (grammar.debuggingOutput) {
- println("private static final String _ruleNames[] = {");
- Enumeration ids = grammar.rules.elements();
- int ruleNum = 0;
- while (ids.hasMoreElements()) {
- GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
- if (sym instanceof RuleSymbol)
- println(" \"" + ((RuleSymbol)sym).getId() + "\",");
- }
- println("};");
- }
- // Generate user-defined parser class members
- print(
- processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
- );
- // Generate parser class constructor from TokenBuffer
- println("");
- println("protected " + grammar.getClassName() + "(TokenBuffer tokenBuf, int k) {");
- println(" super(tokenBuf,k);");
- println(" tokenNames = _tokenNames;");
- // if debugging, set up arrays and call the user-overridable
- // debugging setup method
- if (grammar.debuggingOutput) {
- println(" ruleNames = _ruleNames;");
- println(" semPredNames = _semPredNames;");
- println(" setupDebugging(tokenBuf);");
- }
- if ( grammar.buildAST ) {
- println(" buildTokenTypeASTClassMap();");
- println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
- }
- println("}");
- println("");
- println("public " + grammar.getClassName() + "(TokenBuffer tokenBuf) {");
- println(" this(tokenBuf," + grammar.maxk + ");");
- println("}");
- println("");
- // Generate parser class constructor from TokenStream
- println("protected " + grammar.getClassName() + "(TokenStream lexer, int k) {");
- println(" super(lexer,k);");
- println(" tokenNames = _tokenNames;");
- // if debugging, set up arrays and call the user-overridable
- // debugging setup method
- if (grammar.debuggingOutput) {
- println(" ruleNames = _ruleNames;");
- println(" semPredNames = _semPredNames;");
- println(" setupDebugging(lexer);");
- }
- if ( grammar.buildAST ) {
- println(" buildTokenTypeASTClassMap();");
- println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
- }
- println("}");
- println("");
- println("public " + grammar.getClassName() + "(TokenStream lexer) {");
- println(" this(lexer," + grammar.maxk + ");");
- println("}");
- println("");
- println("public " + grammar.getClassName() + "(ParserSharedInputState state) {");
- println(" super(state," + grammar.maxk + ");");
- println(" tokenNames = _tokenNames;");
- if ( grammar.buildAST ) {
- println(" buildTokenTypeASTClassMap();");
- println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
- }
- println("}");
- println("");
- // Generate code for each rule in the grammar
- Enumeration ids = grammar.rules.elements();
- int ruleNum = 0;
- while (ids.hasMoreElements()) {
- GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
- if (sym instanceof RuleSymbol) {
- RuleSymbol rs = (RuleSymbol)sym;
- genRule(rs, rs.references.size() == 0, ruleNum++);
- }
- exitIfError();
- }
- // Generate the token names
- genTokenStrings();
- if ( grammar.buildAST ) {
- genTokenASTNodeMap();
- }
- // Generate the bitsets used throughout the grammar
- genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
- // Generate the semantic predicate map for debugging
- if (grammar.debuggingOutput)
- genSemPredMap();
- // Close class definition
- println("");
- println("}");
- // Close the parser output stream
- currentOutput.close();
- currentOutput = null;
- }
- /** Generate code for the given grammar element.
- * @param blk The rule-reference to generate
- */
- public void gen(RuleRefElement rr) {
- if (DEBUG_CODE_GENERATOR) System.out.println("genRR(" + rr + ")");
- RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
- if (rs == null || !rs.isDefined()) {
- // Is this redundant???
- antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
- return;
- }
- if (!(rs instanceof RuleSymbol)) {
- // Is this redundant???
- antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
- return;
- }
- genErrorTryForElement(rr);
- // AST value for labeled rule refs in tree walker.
- // This is not AST construction; it is just the input tree node value.
- if (grammar instanceof TreeWalkerGrammar &&
- rr.getLabel() != null &&
- syntacticPredLevel == 0) {
- println(rr.getLabel() + " = _t==ASTNULL ? null : " + lt1Value + ";");
- }
- // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
- if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
- println("_saveIndex=text.length();");
- }
- // Process return value assignment if any
- printTabs();
- if (rr.idAssign != null) {
- // Warn if the rule has no return type
- if (rs.block.returnAction == null) {
- antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
- }
- _print(rr.idAssign + "=");
- }
- else {
- // Warn about return value if any, but not inside syntactic predicate
- if (!(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) {
- antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
- }
- }
- // Call the rule
- GenRuleInvocation(rr);
- // if in lexer and ! on element or alt or rule, save buffer index to kill later
- if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
- println("text.setLength(_saveIndex);");
- }
- // if not in a syntactic predicate
- if (syntacticPredLevel == 0) {
- boolean doNoGuessTest = (
- grammar.hasSyntacticPredicate &&
- (
- grammar.buildAST && rr.getLabel() != null ||
- (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
- )
- );
- if (doNoGuessTest) {
- // println("if (inputState.guessing==0) {");
- // tabs++;
- }
- if (grammar.buildAST && rr.getLabel() != null) {
- // always gen variable for rule return on labeled rules
- println(rr.getLabel() + "_AST = (" + labeledElementASTType + ")returnAST;");
- }
- if (genAST) {
- switch (rr.getAutoGenType()) {
- case GrammarElement.AUTO_GEN_NONE:
- // println("theASTFactory.addASTChild(currentAST, returnAST);");
- println("astFactory.addASTChild(currentAST, returnAST);");
- break;
- case GrammarElement.AUTO_GEN_CARET:
- antlrTool.error("Internal: encountered ^ after rule reference");
- break;
- default:
- break;
- }
- }
- // if a lexer and labeled, Token label defined at rule level, just set it here
- if (grammar instanceof LexerGrammar && rr.getLabel() != null) {
- println(rr.getLabel() + "=_returnToken;");
- }
- if (doNoGuessTest) {
- // tabs--;
- // println("}");
- }
- }
- genErrorCatchForElement(rr);
- }
- /** Generate code for the given grammar element.
- * @param blk The string-literal reference to generate
- */
- public void gen(StringLiteralElement atom) {
- if (DEBUG_CODE_GENERATOR) System.out.println("genString(" + atom + ")");
- // Variable declarations for labeled elements
- if (atom.getLabel() != null && syntacticPredLevel == 0) {
- println(atom.getLabel() + " = " + lt1Value + ";");
- }
- // AST
- genElementAST(atom);
- // is there a bang on the literal?
- boolean oldsaveText = saveText;
- saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
- // matching
- genMatch(atom);
- saveText = oldsaveText;
- // tack on tree cursor motion if doing a tree walker
- if (grammar instanceof TreeWalkerGrammar) {
- println("_t = _t.getNextSibling();");
- }
- }
- /** Generate code for the given grammar element.
- * @param blk The token-range reference to generate
- */
- public void gen(TokenRangeElement r) {
- genErrorTryForElement(r);
- if (r.getLabel() != null && syntacticPredLevel == 0) {
- println(r.getLabel() + " = " + lt1Value + ";");
- }
- // AST
- genElementAST(r);
- // match
- println("matchRange(" + r.beginText + "," + r.endText + ");");
- genErrorCatchForElement(r);
- }
- /** Generate code for the given grammar element.
- * @param blk The token-reference to generate
- */
- public void gen(TokenRefElement atom) {
- if (DEBUG_CODE_GENERATOR) System.out.println("genTokenRef(" + atom + ")");
- if (grammar instanceof LexerGrammar) {
- antlrTool.panic("Token reference found in lexer");
- }
- genErrorTryForElement(atom);
- // Assign Token value to token label variable
- if (atom.getLabel() != null && syntacticPredLevel == 0) {
- println(atom.getLabel() + " = " + lt1Value + ";");
- }
- // AST
- genElementAST(atom);
- // matching
- genMatch(atom);
- genErrorCatchForElement(atom);
- // tack on tree cursor motion if doing a tree walker
- if (grammar instanceof TreeWalkerGrammar) {
- println("_t = _t.getNextSibling();");
- }
- }
- public void gen(TreeElement t) {
- // save AST cursor
- println("AST __t" + t.ID + " = _t;");
- // If there is a label on the root, then assign that to the variable
- if (t.root.getLabel() != null) {
- println(t.root.getLabel() + " = _t==ASTNULL ? null :(" + labeledElementASTType + ")_t;");
- }
- // check for invalid modifiers ! and ^ on tree element roots
- if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
- antlrTool.error("Suffixing a root node with '!' is not implemented",
- grammar.getFilename(), t.getLine(), t.getColumn());
- t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
- }
- if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
- antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
- grammar.getFilename(), t.getLine(), t.getColumn());
- t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
- }
- // Generate AST variables
- genElementAST(t.root);
- if (grammar.buildAST) {
- // Save the AST construction state
- println("ASTPair __currentAST" + t.ID + " = currentAST.copy();");
- // Make the next item added a child of the TreeElement root
- println("currentAST.root = currentAST.child;");
- println("currentAST.child = null;");
- }
- // match root
- if ( t.root instanceof WildcardElement ) {
- println("if ( _t==null ) throw new MismatchedTokenException();");
- }
- else {
- genMatch(t.root);
- }
- // move to list of children
- println("_t = _t.getFirstChild();");
- // walk list of children, generating code for each
- for (int i = 0; i < t.getAlternatives().size(); i++) {
- Alternative a = t.getAlternativeAt(i);
- AlternativeElement e = a.head;
- while (e != null) {
- e.generate();
- e = e.next;
- }
- }
- if (grammar.buildAST) {
- // restore the AST construction state to that just after the
- // tree root was added
- println("currentAST = __currentAST" + t.ID + ";");
- }
- // restore AST cursor
- println("_t = __t" + t.ID + ";");
- // move cursor to sibling of tree just parsed
- println("_t = _t.getNextSibling();");
- }
- /** Generate the tree-parser Java file */
- public void gen(TreeWalkerGrammar g) throws IOException {
- // SAS: debugging stuff removed for now...
- setGrammar(g);
- if (!(grammar instanceof TreeWalkerGrammar)) {
- antlrTool.panic("Internal error generating tree-walker");
- }
- // Open the output stream for the parser and set the currentOutput
- // SAS: move file open to method so subclass can override it
- // (mainly for VAJ interface)
- setupOutput(grammar.getClassName());
- genAST = grammar.buildAST;
- tabs = 0;
- // Generate the header common to all output files.
- genHeader();
- // Do not use printAction because we assume tabs==0
- println(behavior.getHeaderAction(""));
- // Generate header for the parser
- println("import antlr." + grammar.getSuperClass() + ";");
- println("import antlr.Token;");
- println("import antlr.collections.AST;");
- println("import antlr.RecognitionException;");
- println("import antlr.ANTLRException;");
- println("import antlr.NoViableAltException;");
- println("import antlr.MismatchedTokenException;");
- println("import antlr.SemanticException;");
- println("import antlr.collections.impl.BitSet;");
- println("import antlr.ASTPair;");
- println("import antlr.collections.impl.ASTArray;");
- // Output the user-defined parser premamble
- println(grammar.preambleAction.getText());
- // Generate parser class definition
- String sup = null;
- if (grammar.superClass != null) {
- sup = grammar.superClass;
- }
- else {
- sup = "antlr." + grammar.getSuperClass();
- }
- println("");
- // print javadoc comment if any
- if (grammar.comment != null) {
- _println(grammar.comment);
- }
- // get prefix (replaces "public" and lets user specify)
- String prefix = "public";
- Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
- if (tprefix != null) {
- String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
- if (p != null) {
- prefix = p;
- }
- }
- print(prefix+" ");
- print("class " + grammar.getClassName() + " extends " + sup);
- println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
- Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
- if (tsuffix != null) {
- String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
- if (suffix != null) {
- print(", " + suffix); // must be an interface name for Java
- }
- }
- println(" {");
- // Generate user-defined parser class members
- print(
- processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
- );
- // Generate default parser class constructor
- println("public " + grammar.getClassName() + "() {");
- tabs++;
- println("tokenNames = _tokenNames;");
- tabs--;
- println("}");
- println("");
- // Generate code for each rule in the grammar
- Enumeration ids = grammar.rules.elements();
- int ruleNum = 0;
- String ruleNameInits = "";
- while (ids.hasMoreElements()) {
- GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
- if (sym instanceof RuleSymbol) {
- RuleSymbol rs = (RuleSymbol)sym;
- genRule(rs, rs.references.size() == 0, ruleNum++);
- }
- exitIfError();
- }
- // Generate the token names
- genTokenStrings();
- // Generate the bitsets used throughout the grammar
- genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
- // Close class definition
- println("}");
- println("");
- // Close the parser output stream
- currentOutput.close();
- currentOutput = null;
- }
- /** Generate code for the given grammar element.
- * @param wc The wildcard element to generate
- */
- public void gen(WildcardElement wc) {
- // Variable assignment for labeled elements
- if (wc.getLabel() != null && syntacticPredLevel == 0) {
- println(wc.getLabel() + " = " + lt1Value + ";");
- }
- // AST
- genElementAST(wc);
- // Match anything but EOF
- if (grammar instanceof TreeWalkerGrammar) {
- println("if ( _t==null ) throw new MismatchedTokenException();");
- }
- else if (grammar instanceof LexerGrammar) {
- if (grammar instanceof LexerGrammar &&
- (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
- println("_saveIndex=text.length();");
- }
- println("matchNot(EOF_CHAR);");
- if (grammar instanceof LexerGrammar &&
- (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
- println("text.setLength(_saveIndex);"); // kill text atom put in buffer
- }
- }
- else {
- println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
- }
- // tack on tree cursor motion if doing a tree walker
- if (grammar instanceof TreeWalkerGrammar) {
- println("_t = _t.getNextSibling();");
- }
- }
- /** Generate code for the given grammar element.
- * @param blk The (...)* block to generate
- */
- public void gen(ZeroOrMoreBlock blk) {
- if (DEBUG_CODE_GENERATOR) System.out.println("gen*(" + blk + ")");
- println("{");
- genBlockPreamble(blk);
- String label;
- if (blk.getLabel() != null) {
- label = blk.getLabel();
- }
- else {
- label = "_loop" + blk.ID;
- }
- println(label + ":");
- println("do {");
- tabs++;
- // generate the init action for ()* inside the loop
- // this allows us to do usefull EOF checking...
- genBlockInitAction(blk);
- // Tell AST generation to build subrule result
- String saveCurrentASTResult = currentASTResult;
- if (blk.getLabel() != null) {
- currentASTResult = blk.getLabel();
- }
- boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
- // generate exit test if greedy set to false
- // and an alt is ambiguous with exit branch
- // or when lookahead derived purely from end-of-file
- // Lookahead analysis stops when end-of-file is hit,
- // returning set {epsilon}. Since {epsilon} is not
- // ambig with any real tokens, no error is reported
- // by deterministic() routines and we have to check
- // for the case where the lookahead depth didn't get
- // set to NONDETERMINISTIC (this only happens when the
- // FOLLOW contains real atoms + epsilon).
- boolean generateNonGreedyExitPath = false;
- int nonGreedyExitDepth = grammar.maxk;
- if (!blk.greedy &&
- blk.exitLookaheadDepth <= grammar.maxk &&
- blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
- generateNonGreedyExitPath = true;
- nonGreedyExitDepth = blk.exitLookaheadDepth;
- }
- else if (!blk.greedy &&
- blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
- generateNonGreedyExitPath = true;
- }
- if (generateNonGreedyExitPath) {
- if (DEBUG_CODE_GENERATOR) {
- System.out.println("nongreedy (...)* loop; exit depth is " +
- blk.exitLookaheadDepth);
- }
- String predictExit =
- getLookaheadTestExpression(blk.exitCache,
- nonGreedyExitDepth);
- println("// nongreedy exit test");
- println("if (" + predictExit + ") break " + label + ";");
- }
- JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
- genBlockFinish(howToFinish, "break " + label + ";");
- tabs--;
- println("} while (true);");
- println("}");
- // Restore previous AST generation
- currentASTResult = saveCurrentASTResult;
- }
- /** Generate an alternative.
- * @param alt The alternative to generate
- * @param blk The block to which the alternative belongs
- */
- protected void genAlt(Alternative alt, AlternativeBlock blk) {
- // Save the AST generation state, and set it to that of the alt
- boolean savegenAST = genAST;
- genAST = genAST && alt.getAutoGen();
- boolean oldsaveTest = saveText;
- saveText = saveText && alt.getAutoGen();
- // Reset the variable name map for the alternative
- Hashtable saveMap = treeVariableMap;
- treeVariableMap = new Hashtable();
- // Generate try block around the alt for error handling
- if (alt.exceptionSpec != null) {
- println("try { // for error handling");
- tabs++;
- }
- AlternativeElement elem = alt.head;
- while (!(elem instanceof BlockEndElement)) {
- elem.generate(); // alt can begin with anything. Ask target to gen.
- elem = elem.next;
- }
- if (genAST) {
- if (blk instanceof RuleBlock) {
- // Set the AST return value for the rule
- RuleBlock rblk = (RuleBlock)blk;
- if (grammar.hasSyntacticPredicate) {
- // println("if ( inputState.guessing==0 ) {");
- // tabs++;
- }
- println(rblk.getRuleName() + "_AST = (" + labeledElementASTType + ")currentAST.root;");
- if (grammar.hasSyntacticPredicate) {
- // --tabs;
- // println("}");
- }
- }
- else if (blk.getLabel() != null) {
- // ### future: also set AST value for labeled subrules.
- // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
- antlrTool.warning("Labeled subrules not yet supported", grammar.getFilename(), blk.getLine(), blk.getColumn());
- }
- }
- if (alt.exceptionSpec != null) {
- // close try block
- tabs--;
- println("}");
- genErrorHandler(alt.exceptionSpec);
- }
- genAST = savegenAST;
- saveText = oldsaveTest;
- treeVariableMap = saveMap;
- }
- /** Generate all the bitsets to be used in the parser or lexer
- * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
- * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
- * Note that most languages do not support object initialization inside a
- * class definition, so other code-generators may have to separate the
- * bitset declarations from the initializations (e.g., put the initializations
- * in the generated constructor instead).
- * @param bitsetList The list of bitsets to generate.
- * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
- */
- protected void genBitsets(Vector bitsetList,
- int maxVocabulary
- ) {
- println("");
- for (int i = 0; i < bitsetList.size(); i++) {
- BitSet p = (BitSet)bitsetList.elementAt(i);
- // Ensure that generated BitSet is large enough for vocabulary
- p.growToInclude(maxVocabulary);
- genBitSet(p, i);
- }
- }
- /** Do something simple like:
- * private static final long[] mk_tokenSet_0() {
- * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
- * return data;
- * }
- * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
- *
- * Or, for large bitsets, optimize init so ranges are collapsed into loops.
- * This is most useful for lexers using unicode.
- */
- private void genBitSet(BitSet p, int id) {
- // initialization data
- println(
- "private static final long[] mk" + getBitsetName(id) + "() {"
- );
- int n = p.lengthInLongWords();
- if ( n<BITSET_OPTIMIZE_INIT_THRESHOLD ) {
- println("\tlong[] data = { " + p.toStringOfWords() + "};");
- }
- else {
- // will init manually, allocate space then set values
- println("\tlong[] data = new long["+n+"];");
- long[] elems = p.toPackedArray();
- for (int i = 0; i < elems.length;) {
- if ( elems[i]==0 ) {
- // done automatically by Java, don't waste time/code
- i++;
- continue;
- }
- if ( (i+1)==elems.length || elems[i]!=elems[i+1] ) {
- // last number or no run of numbers, just dump assignment
- …
Large files files are truncated, but you can click here to view the full file