/tool/src/main/java/org/antlr/codegen/CodeGenerator.java
Java | 1310 lines | 987 code | 77 blank | 246 comment | 93 complexity | 27b81c4a2f16716208571827af5284cd MD5 | raw file
- /*
- * [The "BSD license"]
- * Copyright (c) 2010 Terence Parr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- package org.antlr.codegen;
- import org.antlr.Tool;
- import org.antlr.analysis.DFA;
- import org.antlr.analysis.*;
- import org.antlr.grammar.v3.ANTLRLexer;
- import org.antlr.grammar.v3.ANTLRParser;
- import org.antlr.grammar.v3.ActionTranslator;
- import org.antlr.grammar.v3.CodeGenTreeWalker;
- import org.antlr.misc.BitSet;
- import org.antlr.misc.*;
- import org.antlr.runtime.*;
- import org.antlr.runtime.tree.CommonTreeNodeStream;
- import org.antlr.tool.*;
- import org.stringtemplate.v4.*;
- import java.io.IOException;
- import java.io.Writer;
- import java.util.*;
- /** ANTLR's code generator.
- *
- * Generate recognizers derived from grammars. Language independence
- * achieved through the use of STGroup objects. All output
- * strings are completely encapsulated in the group files such as Java.stg.
- * Some computations are done that are unused by a particular language.
- * This generator just computes and sets the values into the templates;
- * the templates are free to use or not use the information.
- *
- * To make a new code generation target, define X.stg for language X
- * by copying from existing Y.stg most closely releated to your language;
- * e.g., to do CSharp.stg copy Java.stg. The template group file has a
- * bunch of templates that are needed by the code generator. You can add
- * a new target w/o even recompiling ANTLR itself. The language=X option
- * in a grammar file dictates which templates get loaded/used.
- *
- * Some language like C need both parser files and header files. Java needs
- * to have a separate file for the cyclic DFA as ANTLR generates bytecodes
- * directly (which cannot be in the generated parser Java file). To facilitate
- * this,
- *
- * cyclic can be in same file, but header, output must be searpate. recognizer
- * is in outptufile.
- */
- public class CodeGenerator {
- /** When generating SWITCH statements, some targets might need to limit
- * the size (based upon the number of case labels). Generally, this
- * limit will be hit only for lexers where wildcard in a UNICODE
- * vocabulary environment would generate a SWITCH with 65000 labels.
- */
- public final static int MSCL_DEFAULT = 300;
- public static int MAX_SWITCH_CASE_LABELS = MSCL_DEFAULT;
- public final static int MSA_DEFAULT = 3;
- public static int MIN_SWITCH_ALTS = MSA_DEFAULT;
- public boolean GENERATE_SWITCHES_WHEN_POSSIBLE = true;
- public static boolean LAUNCH_ST_INSPECTOR = false;
- public final static int MADSI_DEFAULT = 60; // do lots of states inline (needed for expression rules)
- public static int MAX_ACYCLIC_DFA_STATES_INLINE = MADSI_DEFAULT;
- public static String classpathTemplateRootDirectoryName =
- "org/antlr/codegen/templates";
- /** Which grammar are we generating code for? Each generator
- * is attached to a specific grammar.
- */
- public Grammar grammar;
- /** What language are we generating? */
- protected String language;
- /** The target specifies how to write out files and do other language
- * specific actions.
- */
- public Target target = null;
- /** Where are the templates this generator should use to generate code? */
- protected STGroup templates;
- /** The basic output templates without AST or templates stuff; this will be
- * the templates loaded for the language such as Java.stg *and* the Dbg
- * stuff if turned on. This is used for generating syntactic predicates.
- */
- protected STGroup baseTemplates;
- protected ST recognizerST;
- protected ST outputFileST;
- protected ST headerFileST;
- /** Used to create unique labels */
- protected int uniqueLabelNumber = 1;
- /** A reference to the ANTLR tool so we can learn about output directories
- * and such.
- */
- protected Tool tool;
- /** Generate debugging event method calls */
- protected boolean debug;
- /** Create a Tracer object and make the recognizer invoke this. */
- protected boolean trace;
- /** Track runtime parsing information about decisions etc...
- * This requires the debugging event mechanism to work.
- */
- protected boolean profile;
- protected int lineWidth = 72;
- /** I have factored out the generation of acyclic DFAs to separate class */
- public ACyclicDFACodeGenerator acyclicDFAGenerator =
- new ACyclicDFACodeGenerator(this);
- /** I have factored out the generation of cyclic DFAs to separate class */
- /*
- public CyclicDFACodeGenerator cyclicDFAGenerator =
- new CyclicDFACodeGenerator(this);
- */
- public static final String VOCAB_FILE_EXTENSION = ".tokens";
- protected final static String vocabFilePattern =
- "<tokens:{it|<it.name>=<it.type>\n}>" +
- "<literals:{it|<it.name>=<it.type>\n}>";
- public CodeGenerator(Tool tool, Grammar grammar, String language) {
- this.tool = tool;
- this.grammar = grammar;
- this.language = language;
- target = loadLanguageTarget(language);
- }
- public static Target loadLanguageTarget(String language) {
- Target target = null;
- String targetName = "org.antlr.codegen."+language+"Target";
- try {
- Class<? extends Target> c = Class.forName(targetName).asSubclass(Target.class);
- target = (Target)c.newInstance();
- }
- catch (ClassNotFoundException cnfe) {
- target = new Target(); // use default
- }
- catch (InstantiationException ie) {
- ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR,
- targetName,
- ie);
- }
- catch (IllegalAccessException cnfe) {
- ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR,
- targetName,
- cnfe);
- }
- return target;
- }
- /** load the main language.stg template group file */
- public void loadTemplates(String language) {
- String langDir = classpathTemplateRootDirectoryName+"/"+language;
- STGroup coreTemplates = new STGroupFile(langDir+"/"+language+".stg");
- baseTemplates = coreTemplates;
- if ( coreTemplates ==null ) {
- ErrorManager.error(ErrorManager.MSG_MISSING_CODE_GEN_TEMPLATES,
- language);
- return;
- }
- // dynamically add subgroups that act like filters to apply to
- // their supergroup. E.g., Java:Dbg:AST:ASTParser::ASTDbg.
- String outputOption = (String)grammar.getOption("output");
- if ( outputOption!=null && outputOption.equals("AST") ) {
- if ( debug && grammar.type!=Grammar.LEXER ) {
- STGroup dbgTemplates = new STGroupFile(langDir+"/Dbg.stg");
- dbgTemplates.importTemplates(coreTemplates);
- baseTemplates = dbgTemplates;
- STGroup astTemplates = new STGroupFile(langDir+"/AST.stg");
- astTemplates.importTemplates(dbgTemplates);
- STGroup astParserTemplates;
- if ( grammar.type==Grammar.TREE_PARSER ) {
- astParserTemplates = new STGroupFile(langDir+"/ASTTreeParser.stg");
- astParserTemplates.importTemplates(astTemplates);
- }
- else {
- astParserTemplates = new STGroupFile(langDir+"/ASTParser.stg");
- astParserTemplates.importTemplates(astTemplates);
- }
- STGroup astDbgTemplates = new STGroupFile(langDir+"/ASTDbg.stg");
- astDbgTemplates.importTemplates(astParserTemplates);
- templates = astDbgTemplates;
- dbgTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
- astDbgTemplates.iterateAcrossValues = true;
- astParserTemplates.iterateAcrossValues = true;
- }
- else {
- STGroup astTemplates = new STGroupFile(langDir+"/AST.stg");
- astTemplates.importTemplates(coreTemplates);
- STGroup astParserTemplates;
- if ( grammar.type==Grammar.TREE_PARSER ) {
- astParserTemplates = new STGroupFile(langDir+"/ASTTreeParser.stg");
- astParserTemplates.importTemplates(astTemplates);
- }
- else {
- astParserTemplates = new STGroupFile(langDir+"/ASTParser.stg");
- astParserTemplates.importTemplates(astTemplates);
- }
- templates = astParserTemplates;
- astTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
- astParserTemplates.iterateAcrossValues = true;
- }
- }
- else if ( outputOption!=null && outputOption.equals("template") ) {
- if ( debug && grammar.type!=Grammar.LEXER ) {
- STGroup dbgTemplates = new STGroupFile(langDir+"/Dbg.stg");
- dbgTemplates.importTemplates(coreTemplates);
- baseTemplates = dbgTemplates;
- STGroup stTemplates = new STGroupFile(langDir+"/ST.stg");
- stTemplates.importTemplates(dbgTemplates);
- templates = stTemplates;
- dbgTemplates.iterateAcrossValues = true;
- }
- else {
- STGroup stTemplates = new STGroupFile(langDir+"/ST.stg");
- stTemplates.importTemplates(coreTemplates);
- templates = stTemplates;
- }
- templates.iterateAcrossValues = true; // ST v3 compatibility with Maps
- }
- else if ( debug && grammar.type!=Grammar.LEXER ) {
- STGroup dbgTemplates = new STGroupFile(langDir+"/Dbg.stg");
- dbgTemplates.importTemplates(coreTemplates);
- templates = dbgTemplates;
- baseTemplates = templates;
- baseTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
- }
- else {
- templates = coreTemplates;
- coreTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
- }
- }
- /** Given the grammar to which we are attached, walk the AST associated
- * with that grammar to create NFAs. Then create the DFAs for all
- * decision points in the grammar by converting the NFAs to DFAs.
- * Finally, walk the AST again to generate code.
- *
- * Either 1 or 2 files are written:
- *
- * recognizer: the main parser/lexer/treewalker item
- * header file: language like C/C++ need extern definitions
- *
- * The target, such as JavaTarget, dictates which files get written.
- */
- public ST genRecognizer() {
- //System.out.println("### generate "+grammar.name+" recognizer");
- // LOAD OUTPUT TEMPLATES
- loadTemplates(language);
- if ( templates==null ) {
- return null;
- }
- // CREATE NFA FROM GRAMMAR, CREATE DFA FROM NFA
- if ( ErrorManager.doNotAttemptAnalysis() ) {
- return null;
- }
- target.performGrammarAnalysis(this, grammar);
- // some grammar analysis errors will not yield reliable DFA
- if ( ErrorManager.doNotAttemptCodeGen() ) {
- return null;
- }
- // OPTIMIZE DFA
- DFAOptimizer optimizer = new DFAOptimizer(grammar);
- optimizer.optimize();
- // OUTPUT FILE (contains recognizerST)
- outputFileST = templates.getInstanceOf("outputFile");
- // HEADER FILE
- if ( templates.isDefined("headerFile") ) {
- headerFileST = templates.getInstanceOf("headerFile");
- }
- else {
- // create a dummy to avoid null-checks all over code generator
- headerFileST = new ST(templates,"xyz");
- headerFileST.add("cyclicDFAs", (Object)null); // it normally sees this from outputFile
- //headerFileST.impl.name = "dummy-header-file";
- }
- boolean filterMode = grammar.getOption("filter")!=null &&
- grammar.getOption("filter").equals("true");
- boolean canBacktrack = grammar.getSyntacticPredicates()!=null ||
- grammar.composite.getRootGrammar().atLeastOneBacktrackOption ||
- filterMode;
- // TODO: move this down further because generating the recognizer
- // alters the model with info on who uses predefined properties etc...
- // The actions here might refer to something.
- // The only two possible output files are available at this point.
- // Verify action scopes are ok for target and dump actions into output
- // Templates can say <actions.parser.header> for example.
- Map<String, Map<String, Object>> actions = grammar.getActions();
- verifyActionScopesOkForTarget(actions);
- // translate $x::y references
- translateActionAttributeReferences(actions);
- ST gateST = templates.getInstanceOf("actionGate");
- if ( filterMode ) {
- // if filtering, we need to set actions to execute at backtracking
- // level 1 not 0.
- gateST = templates.getInstanceOf("filteringActionGate");
- }
- grammar.setSynPredGateIfNotAlready(gateST);
- headerFileST.add("actions", actions);
- outputFileST.add("actions", actions);
- headerFileST.add("buildTemplate", grammar.buildTemplate());
- outputFileST.add("buildTemplate", grammar.buildTemplate());
- headerFileST.add("buildAST", grammar.buildAST());
- outputFileST.add("buildAST", grammar.buildAST());
- outputFileST.add("rewriteMode", grammar.rewriteMode());
- headerFileST.add("rewriteMode", grammar.rewriteMode());
- outputFileST.add("backtracking", canBacktrack);
- headerFileST.add("backtracking", canBacktrack);
- // turn on memoize attribute at grammar level so we can create ruleMemo.
- // each rule has memoize attr that hides this one, indicating whether
- // it needs to save results
- String memoize = (String)grammar.getOption("memoize");
- outputFileST.add("memoize",
- (grammar.atLeastOneRuleMemoizes ||
- memoize != null && memoize.equals("true") &&
- canBacktrack));
- headerFileST.add("memoize",
- (grammar.atLeastOneRuleMemoizes ||
- memoize != null && memoize.equals("true") &&
- canBacktrack));
- outputFileST.add("trace", trace);
- headerFileST.add("trace", trace);
- outputFileST.add("profile", profile);
- headerFileST.add("profile", profile);
- // RECOGNIZER
- if ( grammar.type==Grammar.LEXER ) {
- recognizerST = templates.getInstanceOf("lexer");
- outputFileST.add("LEXER", true);
- headerFileST.add("LEXER", true);
- recognizerST.add("filterMode",
- filterMode);
- }
- else if ( grammar.type==Grammar.PARSER ||
- grammar.type==Grammar.COMBINED )
- {
- recognizerST = templates.getInstanceOf("parser");
- outputFileST.add("PARSER", true);
- headerFileST.add("PARSER", true);
- }
- else {
- recognizerST = templates.getInstanceOf("treeParser");
- outputFileST.add("TREE_PARSER", true);
- headerFileST.add("TREE_PARSER", true);
- recognizerST.add("filterMode",
- filterMode);
- }
- outputFileST.add("recognizer", recognizerST);
- headerFileST.add("recognizer", recognizerST);
- outputFileST.add("actionScope",
- grammar.getDefaultActionScope(grammar.type));
- headerFileST.add("actionScope",
- grammar.getDefaultActionScope(grammar.type));
- String targetAppropriateFileNameString =
- target.getTargetStringLiteralFromString(grammar.getFileName());
- outputFileST.add("fileName", targetAppropriateFileNameString);
- headerFileST.add("fileName", targetAppropriateFileNameString);
- outputFileST.add("ANTLRVersion", tool.VERSION);
- headerFileST.add("ANTLRVersion", tool.VERSION);
- outputFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp());
- headerFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp());
- // GENERATE RECOGNIZER
- // Walk the AST holding the input grammar, this time generating code
- // Decisions are generated by using the precomputed DFAs
- // Fill in the various templates with data
- CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(grammar.getGrammarTree()));
- try {
- gen.grammar_(
- grammar,
- recognizerST,
- outputFileST,
- headerFileST);
- }
- catch (RecognitionException re) {
- ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
- re);
- }
- genTokenTypeConstants(recognizerST);
- genTokenTypeConstants(outputFileST);
- genTokenTypeConstants(headerFileST);
- if ( grammar.type!=Grammar.LEXER ) {
- genTokenTypeNames(recognizerST);
- genTokenTypeNames(outputFileST);
- genTokenTypeNames(headerFileST);
- }
- // Now that we know what synpreds are used, we can set into template
- Set<String> synpredNames = null;
- if ( grammar.synPredNamesUsedInDFA.size()>0 ) {
- synpredNames = grammar.synPredNamesUsedInDFA;
- }
- outputFileST.add("synpreds", synpredNames);
- headerFileST.add("synpreds", synpredNames);
- // all recognizers can see Grammar object
- recognizerST.add("grammar", grammar);
- if (LAUNCH_ST_INSPECTOR) {
- outputFileST.inspect();
- if ( templates.isDefined("headerFile") ) headerFileST.inspect();
- }
- // WRITE FILES
- try {
- target.genRecognizerFile(tool,this,grammar,outputFileST);
- if ( templates.isDefined("headerFile") ) {
- ST extST = templates.getInstanceOf("headerFileExtension");
- target.genRecognizerHeaderFile(tool,this,grammar,headerFileST,extST.render());
- }
- // write out the vocab interchange file; used by antlr,
- // does not change per target
- ST tokenVocabSerialization = genTokenVocabOutput();
- String vocabFileName = getVocabFileName();
- if ( vocabFileName!=null ) {
- write(tokenVocabSerialization, vocabFileName);
- }
- //System.out.println(outputFileST.getDOTForDependencyGraph(false));
- }
- catch (IOException ioe) {
- ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE, ioe);
- }
- /*
- System.out.println("num obj.prop refs: "+ ASTExpr.totalObjPropRefs);
- System.out.println("num reflection lookups: "+ ASTExpr.totalReflectionLookups);
- */
- return outputFileST;
- }
- /** Some targets will have some extra scopes like C++ may have
- * '@headerfile:name {action}' or something. Make sure the
- * target likes the scopes in action table.
- */
- protected void verifyActionScopesOkForTarget(Map<String, Map<String, Object>> actions) {
- for (Map.Entry<String, Map<String, Object>> entry : actions.entrySet()) {
- String scope = entry.getKey();
- if ( !target.isValidActionScope(grammar.type, scope) ) {
- // get any action from the scope to get error location
- Map<String, Object> scopeActions = entry.getValue();
- GrammarAST actionAST =
- (GrammarAST)scopeActions.values().iterator().next();
- ErrorManager.grammarError(
- ErrorManager.MSG_INVALID_ACTION_SCOPE,grammar,
- actionAST.getToken(),scope,
- grammar.getGrammarTypeString());
- }
- }
- }
- /** Actions may reference $x::y attributes, call translateAction on
- * each action and replace that action in the Map.
- */
- protected void translateActionAttributeReferences(Map<String, Map<String, Object>> actions) {
- for (Map.Entry<String, Map<String, Object>> entry : actions.entrySet()) {
- Map<String, Object> scopeActions = entry.getValue();
- translateActionAttributeReferencesForSingleScope(null,scopeActions);
- }
- }
- /** Use for translating rule @init{...} actions that have no scope */
- public void translateActionAttributeReferencesForSingleScope(
- Rule r,
- Map<String, Object> scopeActions)
- {
- String ruleName=null;
- if ( r!=null ) {
- ruleName = r.name;
- }
- for (Map.Entry<String, Object> entry : scopeActions.entrySet()) {
- String name = entry.getKey();
- GrammarAST actionAST = (GrammarAST)entry.getValue();
- List<?> chunks = translateAction(ruleName,actionAST);
- scopeActions.put(name, chunks); // replace with translation
- }
- }
- /** Error recovery in ANTLR recognizers.
- *
- * Based upon original ideas:
- *
- * Algorithms + Data Structures = Programs by Niklaus Wirth
- *
- * and
- *
- * A note on error recovery in recursive descent parsers:
- * http://portal.acm.org/citation.cfm?id=947902.947905
- *
- * Later, Josef Grosch had some good ideas:
- * Efficient and Comfortable Error Recovery in Recursive Descent Parsers:
- * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
- *
- * Like Grosch I implemented local FOLLOW sets that are combined at run-time
- * upon error to avoid parsing overhead.
- */
- public void generateLocalFOLLOW(GrammarAST referencedElementNode,
- String referencedElementName,
- String enclosingRuleName,
- int elementIndex)
- {
- /*
- System.out.println("compute FOLLOW "+grammar.name+"."+referencedElementNode.toString()+
- " for "+referencedElementName+"#"+elementIndex +" in "+
- enclosingRuleName+
- " line="+referencedElementNode.getLine());
- */
- NFAState followingNFAState = referencedElementNode.followingNFAState;
- LookaheadSet follow = null;
- if ( followingNFAState!=null ) {
- // compute follow for this element and, as side-effect, track
- // the rule LOOK sensitivity.
- follow = grammar.FIRST(followingNFAState);
- }
- if ( follow==null ) {
- ErrorManager.internalError("no follow state or cannot compute follow");
- follow = new LookaheadSet();
- }
- if ( follow.member(Label.EOF) ) {
- // TODO: can we just remove? Seems needed here:
- // compilation_unit : global_statement* EOF
- // Actually i guess we resync to EOF regardless
- follow.remove(Label.EOF);
- }
- //System.out.println(" "+follow);
- List<Integer> tokenTypeList;
- long[] words;
- if ( follow.tokenTypeSet==null ) {
- words = new long[1];
- tokenTypeList = new ArrayList<Integer>();
- }
- else {
- BitSet bits = BitSet.of(follow.tokenTypeSet);
- words = bits.toPackedArray();
- tokenTypeList = follow.tokenTypeSet.toList();
- }
- // use the target to convert to hex strings (typically)
- String[] wordStrings = new String[words.length];
- for (int j = 0; j < words.length; j++) {
- long w = words[j];
- wordStrings[j] = target.getTarget64BitStringFromValue(w);
- }
- recognizerST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}",
- referencedElementName,
- enclosingRuleName,
- wordStrings,
- tokenTypeList,
- Utils.integer(elementIndex));
- outputFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}",
- referencedElementName,
- enclosingRuleName,
- wordStrings,
- tokenTypeList,
- Utils.integer(elementIndex));
- headerFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}",
- referencedElementName,
- enclosingRuleName,
- wordStrings,
- tokenTypeList,
- Utils.integer(elementIndex));
- }
- // L O O K A H E A D D E C I S I O N G E N E R A T I O N
- /** Generate code that computes the predicted alt given a DFA. The
- * recognizerST can be either the main generated recognizerTemplate
- * for storage in the main parser file or a separate file. It's up to
- * the code that ultimately invokes the codegen.g grammar rule.
- *
- * Regardless, the output file and header file get a copy of the DFAs.
- */
- public ST genLookaheadDecision(ST recognizerST,
- DFA dfa)
- {
- ST decisionST;
- // If we are doing inline DFA and this one is acyclic and LL(*)
- // I have to check for is-non-LL(*) because if non-LL(*) the cyclic
- // check is not done by DFA.verify(); that is, verify() avoids
- // doesStateReachAcceptState() if non-LL(*)
- if ( dfa.canInlineDecision() ) {
- decisionST =
- acyclicDFAGenerator.genFixedLookaheadDecision(getTemplates(), dfa);
- }
- else {
- // generate any kind of DFA here (cyclic or acyclic)
- dfa.createStateTables(this);
- outputFileST.add("cyclicDFAs", dfa);
- headerFileST.add("cyclicDFAs", dfa);
- decisionST = templates.getInstanceOf("dfaDecision");
- String description = dfa.getNFADecisionStartState().getDescription();
- description = target.getTargetStringLiteralFromString(description);
- if ( description!=null ) {
- decisionST.add("description", description);
- }
- decisionST.add("decisionNumber",
- Utils.integer(dfa.getDecisionNumber()));
- }
- return decisionST;
- }
- /** A special state is huge (too big for state tables) or has a predicated
- * edge. Generate a simple if-then-else. Cannot be an accept state as
- * they have no emanating edges. Don't worry about switch vs if-then-else
- * because if you get here, the state is super complicated and needs an
- * if-then-else. This is used by the new DFA scheme created June 2006.
- */
- public ST generateSpecialState(DFAState s) {
- ST stateST;
- stateST = templates.getInstanceOf("cyclicDFAState");
- stateST.add("needErrorClause", true);
- stateST.add("semPredState",
- s.isResolvedWithPredicates());
- stateST.add("stateNumber", s.stateNumber);
- stateST.add("decisionNumber", s.dfa.decisionNumber);
- boolean foundGatedPred = false;
- ST eotST = null;
- for (int i = 0; i < s.getNumberOfTransitions(); i++) {
- Transition edge = s.transition(i);
- ST edgeST;
- if ( edge.label.getAtom()==Label.EOT ) {
- // this is the default clause; has to held until last
- edgeST = templates.getInstanceOf("eotDFAEdge");
- stateST.remove("needErrorClause");
- eotST = edgeST;
- }
- else {
- edgeST = templates.getInstanceOf("cyclicDFAEdge");
- ST exprST =
- genLabelExpr(templates,edge,1);
- edgeST.add("labelExpr", exprST);
- }
- edgeST.add("edgeNumber", Utils.integer(i + 1));
- edgeST.add("targetStateNumber",
- Utils.integer(edge.target.stateNumber));
- // stick in any gated predicates for any edge if not already a pred
- if ( !edge.label.isSemanticPredicate() ) {
- DFAState t = (DFAState)edge.target;
- SemanticContext preds = t.getGatedPredicatesInNFAConfigurations();
- if ( preds!=null ) {
- foundGatedPred = true;
- ST predST = preds.genExpr(this,
- getTemplates(),
- t.dfa);
- edgeST.add("predicates", predST.render());
- }
- }
- if ( edge.label.getAtom()!=Label.EOT ) {
- stateST.add("edges", edgeST);
- }
- }
- if ( foundGatedPred ) {
- // state has >= 1 edge with a gated pred (syn or sem)
- // must rewind input first, set flag.
- stateST.add("semPredState", foundGatedPred);
- }
- if ( eotST!=null ) {
- stateST.add("edges", eotST);
- }
- return stateST;
- }
- /** Generate an expression for traversing an edge. */
- protected ST genLabelExpr(STGroup templates,
- Transition edge,
- int k)
- {
- Label label = edge.label;
- if ( label.isSemanticPredicate() ) {
- return genSemanticPredicateExpr(templates, edge);
- }
- if ( label.isSet() ) {
- return genSetExpr(templates, label.getSet(), k, true);
- }
- // must be simple label
- ST eST = templates.getInstanceOf("lookaheadTest");
- eST.add("atom", getTokenTypeAsTargetLabel(label.getAtom()));
- eST.add("atomAsInt", Utils.integer(label.getAtom()));
- eST.add("k", Utils.integer(k));
- return eST;
- }
- protected ST genSemanticPredicateExpr(STGroup templates,
- Transition edge)
- {
- DFA dfa = ((DFAState)edge.target).dfa; // which DFA are we in
- Label label = edge.label;
- SemanticContext semCtx = label.getSemanticContext();
- return semCtx.genExpr(this,templates,dfa);
- }
- /** For intervals such as [3..3, 30..35], generate an expression that
- * tests the lookahead similar to LA(1)==3 || (LA(1)>=30&&LA(1)<=35)
- */
- public ST genSetExpr(STGroup templates,
- IntSet set,
- int k,
- boolean partOfDFA)
- {
- if ( !(set instanceof IntervalSet) ) {
- throw new IllegalArgumentException("unable to generate expressions for non IntervalSet objects");
- }
- IntervalSet iset = (IntervalSet)set;
- if ( iset.getIntervals()==null || iset.getIntervals().isEmpty() ) {
- ST emptyST = new ST(templates, "");
- emptyST.impl.name = "empty-set-expr";
- return emptyST;
- }
- String testSTName = "lookaheadTest";
- String testRangeSTName = "lookaheadRangeTest";
- if ( !partOfDFA ) {
- testSTName = "isolatedLookaheadTest";
- testRangeSTName = "isolatedLookaheadRangeTest";
- }
- ST setST = templates.getInstanceOf("setTest");
- Iterator<Interval> iter = iset.getIntervals().iterator();
- int rangeNumber = 1;
- while (iter.hasNext()) {
- Interval I = iter.next();
- int a = I.a;
- int b = I.b;
- ST eST;
- if ( a==b ) {
- eST = templates.getInstanceOf(testSTName);
- eST.add("atom", getTokenTypeAsTargetLabel(a));
- eST.add("atomAsInt", Utils.integer(a));
- //eST.add("k",Utils.integer(k));
- }
- else {
- eST = templates.getInstanceOf(testRangeSTName);
- eST.add("lower", getTokenTypeAsTargetLabel(a));
- eST.add("lowerAsInt", Utils.integer(a));
- eST.add("upper", getTokenTypeAsTargetLabel(b));
- eST.add("upperAsInt", Utils.integer(b));
- eST.add("rangeNumber", Utils.integer(rangeNumber));
- }
- eST.add("k", Utils.integer(k));
- setST.add("ranges", eST);
- rangeNumber++;
- }
- return setST;
- }
- // T O K E N D E F I N I T I O N G E N E R A T I O N
- /** Set attributes tokens and literals attributes in the incoming
- * code template. This is not the token vocab interchange file, but
- * rather a list of token type ID needed by the recognizer.
- */
- protected void genTokenTypeConstants(ST code) {
- // make constants for the token types
- for (String tokenID : grammar.getTokenIDs()) {
- int tokenType = grammar.getTokenType(tokenID);
- if ( tokenType==Label.EOF ||
- tokenType>=Label.MIN_TOKEN_TYPE )
- {
- // don't do FAUX labels 'cept EOF
- code.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType));
- }
- }
- }
- /** Generate a token names table that maps token type to a printable
- * name: either the label like INT or the literal like "begin".
- */
- protected void genTokenTypeNames(ST code) {
- for (int t=Label.MIN_TOKEN_TYPE; t<=grammar.getMaxTokenType(); t++) {
- String tokenName = grammar.getTokenDisplayName(t);
- if ( tokenName!=null ) {
- tokenName=target.getTargetStringLiteralFromString(tokenName, true);
- code.add("tokenNames", tokenName);
- }
- }
- }
- /** Get a meaningful name for a token type useful during code generation.
- * Literals without associated names are converted to the string equivalent
- * of their integer values. Used to generate x==ID and x==34 type comparisons
- * etc... Essentially we are looking for the most obvious way to refer
- * to a token type in the generated code. If in the lexer, return the
- * char literal translated to the target language. For example, ttype=10
- * will yield '\n' from the getTokenDisplayName method. That must
- * be converted to the target languages literals. For most C-derived
- * languages no translation is needed.
- */
- public String getTokenTypeAsTargetLabel(int ttype) {
- if ( grammar.type==Grammar.LEXER ) {
- String name = grammar.getTokenDisplayName(ttype);
- return target.getTargetCharLiteralFromANTLRCharLiteral(this,name);
- }
- return target.getTokenTypeAsTargetLabel(this,ttype);
- }
- /** Generate a token vocab file with all the token names/types. For example:
- * ID=7
- * FOR=8
- * 'for'=8
- *
- * This is independent of the target language; used by antlr internally
- */
- protected ST genTokenVocabOutput() {
- ST vocabFileST = new ST(vocabFilePattern);
- vocabFileST.add("literals",(Object)null); // "define" literals arg
- vocabFileST.add("tokens",(Object)null);
- vocabFileST.impl.name = "vocab-file";
- // make constants for the token names
- for (String tokenID : grammar.getTokenIDs()) {
- int tokenType = grammar.getTokenType(tokenID);
- if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
- vocabFileST.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType));
- }
- }
- // now dump the strings
- for (String literal : grammar.getStringLiterals()) {
- int tokenType = grammar.getTokenType(literal);
- if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
- vocabFileST.addAggr("tokens.{name,type}", literal, Utils.integer(tokenType));
- }
- }
- return vocabFileST;
- }
- public List<? extends Object> translateAction(String ruleName,
- GrammarAST actionTree)
- {
- if ( actionTree.getType()==ANTLRParser.ARG_ACTION ) {
- return translateArgAction(ruleName, actionTree);
- }
- ActionTranslator translator = new ActionTranslator(this,ruleName,actionTree);
- List<Object> chunks = translator.translateToChunks();
- chunks = target.postProcessAction(chunks, actionTree.token);
- return chunks;
- }
- /** Translate an action like [3,"foo",a[3]] and return a List of the
- * translated actions. Because actions are themselves translated to a list
- * of chunks, must cat together into a ST>. Don't translate
- * to strings early as we need to eval templates in context.
- */
- public List<ST> translateArgAction(String ruleName,
- GrammarAST actionTree)
- {
- String actionText = actionTree.token.getText();
- List<String> args = getListOfArgumentsFromAction(actionText,',');
- List<ST> translatedArgs = new ArrayList<ST>();
- for (String arg : args) {
- if ( arg!=null ) {
- Token actionToken =
- new CommonToken(ANTLRParser.ACTION,arg);
- ActionTranslator translator =
- new ActionTranslator(this,ruleName,
- actionToken,
- actionTree.outerAltNum);
- List<Object> chunks = translator.translateToChunks();
- chunks = target.postProcessAction(chunks, actionToken);
- ST catST = new ST(templates, "<chunks>");
- catST.add("chunks", chunks);
- translatedArgs.add(catST);
- }
- }
- if ( translatedArgs.isEmpty() ) {
- return null;
- }
- return translatedArgs;
- }
- public static List<String> getListOfArgumentsFromAction(String actionText,
- int separatorChar)
- {
- List<String> args = new ArrayList<String>();
- getListOfArgumentsFromAction(actionText, 0, -1, separatorChar, args);
- return args;
- }
- /** Given an arg action like
- *
- * [x, (*a).foo(21,33), 3.2+1, '\n',
- * "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43]]
- *
- * convert to a list of arguments. Allow nested square brackets etc...
- * Set separatorChar to ';' or ',' or whatever you want.
- */
- public static int getListOfArgumentsFromAction(String actionText,
- int start,
- int targetChar,
- int separatorChar,
- List<String> args)
- {
- if ( actionText==null ) {
- return -1;
- }
- actionText = actionText.replaceAll("//.*\n", "");
- int n = actionText.length();
- //System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n));
- int p = start;
- int last = p;
- while ( p<n && actionText.charAt(p)!=targetChar ) {
- int c = actionText.charAt(p);
- switch ( c ) {
- case '\'' :
- p++;
- while ( p<n && actionText.charAt(p)!='\'' ) {
- if ( actionText.charAt(p)=='\\' && (p+1)<n &&
- actionText.charAt(p+1)=='\'' )
- {
- p++; // skip escaped quote
- }
- p++;
- }
- p++;
- break;
- case '"' :
- p++;
- while ( p<n && actionText.charAt(p)!='\"' ) {
- if ( actionText.charAt(p)=='\\' && (p+1)<n &&
- actionText.charAt(p+1)=='\"' )
- {
- p++; // skip escaped quote
- }
- p++;
- }
- p++;
- break;
- case '(' :
- p = getListOfArgumentsFromAction(actionText,p+1,')',separatorChar,args);
- break;
- case '{' :
- p = getListOfArgumentsFromAction(actionText,p+1,'}',separatorChar,args);
- break;
- case '<' :
- if ( actionText.indexOf('>',p+1)>=p ) {
- // do we see a matching '>' ahead? if so, hope it's a generic
- // and not less followed by expr with greater than
- p = getListOfArgumentsFromAction(actionText,p+1,'>',separatorChar,args);
- }
- else {
- p++; // treat as normal char
- }
- break;
- case '[' :
- p = getListOfArgumentsFromAction(actionText,p+1,']',separatorChar,args);
- break;
- default :
- if ( c==separatorChar && targetChar==-1 ) {
- String arg = actionText.substring(last, p);
- //System.out.println("arg="+arg);
- args.add(arg.trim());
- last = p+1;
- }
- p++;
- break;
- }
- }
- if ( targetChar==-1 && p<=n ) {
- String arg = actionText.substring(last, p).trim();
- //System.out.println("arg="+arg);
- if ( arg.length()>0 ) {
- args.add(arg.trim());
- }
- }
- p++;
- return p;
- }
- /** Given a template constructor action like %foo(a={...}) in
- * an action, translate it to the appropriate template constructor
- * from the templateLib. This translates a *piece* of the action.
- */
- public ST translateTemplateConstructor(String ruleName,
- int outerAltNum,
- Token actionToken,
- String templateActionText)
- {
- // first, parse with antlr.g
- //System.out.println("translate template: "+templateActionText);
- ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(templateActionText));
- lexer.setFileName(grammar.getFileName());
- ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer));
- parser.setFileName(grammar.getFileName());
- ANTLRParser.rewrite_template_return parseResult = null;
- try {
- parseResult = parser.rewrite_template();
- }
- catch (RecognitionException re) {
- ErrorManager.grammarError(ErrorManager.MSG_INVALID_TEMPLATE_ACTION,
- grammar,
- actionToken,
- templateActionText);
- }
- catch (Exception tse) {
- ErrorManager.internalError("can't parse template action",tse);
- }
- GrammarAST rewriteTree = (GrammarAST)parseResult.getTree();
- // then translate via codegen.g
- CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(rewriteTree));
- gen.init(grammar);
- gen.setCurrentRuleName(ruleName);
- gen.setOuterAltNum(outerAltNum);
- ST st = null;
- try {
- st = gen.rewrite_template();
- }
- catch (RecognitionException re) {
- ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
- re);
- }
- return st;
- }
- public void issueInvalidScopeError(String x,
- String y,
- Rule enclosingRule,
- Token actionToken,
- int outerAltNum)
- {
- //System.out.println("error $"+x+"::"+y);
- Rule r = grammar.getRule(x);
- AttributeScope scope = grammar.getGlobalScope(x);
- if ( scope==null ) {
- if ( r!=null ) {
- scope = r.ruleScope; // if not global, might be rule scope
- }
- }
- if ( scope==null ) {
- ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE,
- grammar,
- actionToken,
- x);
- }
- else if ( scope.getAttribute(y)==null ) {
- ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE_ATTRIBUTE,
- grammar,
- actionToken,
- x,
- y);
- }
- }
- public void issueInvalidAttributeError(String x,
- String y,
- Rule enclosingRule,
- Token actionToken,
- int outerAltNum)
- {
- //System.out.println("error $"+x+"."+y);
- if ( enclosingRule==null ) {
- // action not in a rule
- ErrorManager.grammarError(ErrorManager.MSG_ATTRIBUTE_REF_NOT_IN_RULE,
- grammar,
- actionToken,
- x,
- y);
- return;
- }
- // action is in a rule
- Grammar.LabelElementPair label = enclosingRule.getRuleLabel(x);
- if ( label!=null || enclosingRule.getRuleRefsInAlt(x, outerAltNum)!=null ) {
- // $rulelabel.attr or $ruleref.attr; must be unknown attr
- String refdRuleName = x;
- if ( label!=null ) {
- refdRuleName = enclosingRule.getRuleLabel(x).referencedRuleName;
- }
- Rule refdRule = grammar.getRule(refdRuleName);
- AttributeScope scope = refdRule.getAttributeScope(y);
- if ( scope==null ) {
- ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_RULE_ATTRIBUTE,
- grammar,
- actionToken,
- refdRuleName,
- y);
- }
- else if ( scope.isParameterScope ) {
- ErrorManager.grammarError(ErrorManager.MSG_INVALID_RULE_PARAMETER_REF,
- grammar,
- actionToken,
- refdRuleName,
- y);
- }
- else if ( scope.isDynamicRuleScope ) {
- ErrorManager.grammarError(ErrorManager.MSG_INVALID_RULE_SCOPE_ATTRIBUTE_REF,
- grammar,
- actionToken,
- refdRuleName,
- y);
- }
- }
- }
- public void issueInvalidAttributeError(String x,
- Rule enclosingRule,
- Token actionToken,
- int outerAltNum)
- {
- //System.out.println("error $"+x);
- if ( enclosingRule==null ) {
- // action not in a rule
- ErrorManager.grammarError(ErrorManager.MSG_ATTRIBUTE_REF_NOT_IN_RULE,
- grammar,
- actionToken,
- x);
- return;
- }
- // action is in a rule
- Grammar.LabelElementPair label = enclosingRule.getRuleLabel(x);
- AttributeScope scope = enclosingRule.getAttributeScope(x);
- if ( label!=null ||
- enclosingRule.getRuleRefsInAlt(x, outerAltNum)!=null ||
- enclosingRule.name.equals(x) )
- {
- ErrorManager.grammarError(ErrorManager.MSG_ISOLATED_RULE_SCOPE,
- grammar,
- actionToken,
- x);
- }
- else if ( scope!=null && scope.isDynamicRuleScope ) {
- ErrorManager.grammarError(ErrorManager.MSG_ISOLATED_RULE_ATTRIBUTE,
- grammar,
- actionToken,
- x);
- }
- else {
- ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_SIMPLE_ATTRIBUTE,
- grammar,
- actionToken,
- x);
- }
- }
- // M I S C
- public STGroup getTemplates() {
- return templates;
- }
- public STGroup getBaseTemplates() {
- return baseTemplates;
- }
- public void setDebug(boolean debug) {
- this.debug = debug;
- }
- public void setTrace(boolean trace) {
- this.trace = trace;
- }
- public void setProfile(boolean profile) {
- this.profile = profile;
- if ( profile ) {
- setDebug(true); // requires debug events
- }
- }
- public ST getRecognizerST() {
- return outputFileST;
- }
- /** Generate TParser.java and TLexer.java from T.g if combined, else
- * just use T.java as output regardless of type.
- */
- public String getRecognizerFileName(String name, int type) {
- ST extST = templates.getInstanceOf("codeFileExtension");
- String recognizerName = grammar.getRecognizerName();
- return recognizerName+extST.render();
- /*
- String suffix = "";
- if ( type==Grammar.COMBINED ||
- (type==Grammar.LEXER && !grammar.implicitLexer) )
- {
- suffix = Grammar.grammarTypeToFileNameSuffix[type];
- }
- return name+suffix+extST.toString();
- */
- }
- /** What is the name of the vocab file generated for this grammar?
- * Returns null if no .tokens file should be generated.
- */
- public String getVocabFileName() {
- if ( grammar.isBuiltFromString() ) {
- return null;
- }
- return grammar.name+VOCAB_FILE_EXTENSION;
- }
- public void write(ST code, String fileName) throws IOException {
- //long start = System.currentTimeMillis();
- Writer w = tool.getOutputFile(grammar, fileName);
- // Write the output to a StringWriter
- STWriter wr = new AutoIndentWriter(w);
- wr.setLineWidth(lineWidth);
- code.write(wr);
- w.close();
- //long stop = System.currentTimeMillis();
- //System.out.println("render time for "+fileName+": "+(int)(stop-start)+"ms");
- }
- /** You can generate a switch rather than if-then-else for a DFA state
- * if there are no semantic predicates and the number of edge label
- * values is small enough; e.g., don't generate a switch for a state
- * containing an edge label such as 20..52330 (the resulting byte codes
- * would overflow the method 65k limit probably).
- */
- protected boolean canGenerateSwitch(DFAState s) {
- if ( !GENERATE_SWITCHES_WHEN_POSSIBLE ) {
- return false;
- }
- int size = 0;
- for (int i = 0; i < s.getNumberOfTransitions(); i++) {
- Transition edge = s.transition(i);
- if ( edge.label.isSemanticPredicate() ) {
- return false;
- }
- // can't do a switch if the edges are going to require predicates
- if ( edge.label.getAtom()==Label.EOT ) {
- int EOTPredicts = ((DFAState)edge.target).getUniquelyPredictedAlt();
- if ( EOTPredicts==NFA.INVALID_ALT_NUMBER ) {
- // EOT target has to be a predicate then; no unique alt
- return false;
- }
- }
- // if target is a state with gated preds, we need to use preds on
- // this edge then to reach it.
- if ( ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null ) {
- return false;
- }
- size += edge.label.getSet().size();
- }
- if ( s.getNumberOfTransitions()<MIN_SWITCH_ALTS ||
- size>MAX_SWITCH_CASE_LABELS ) {
- return false;
- }
- return true;
- }
- /** Create a label to track a token / rule reference's result.
- * Technically, this is a place where I break model-view separation
- * as I am creating a variable name that could be invalid in a
- * target language, however, label ::= <ID><INT> is probably ok in
- * all languages we care about.
- */
- public String createUniqueLabel(String name) {
- return new StringBuffer()
- .append(name).append(uniqueLabelNumber++).toString();
- }
- }