PageRenderTime 37ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/Dependencies/boo/lib/antlr-2.7.5/antlr/JavaCodeGenerator.java

https://github.com/w4x/boolangstudio
Java | 3484 lines | 2405 code | 361 blank | 718 comment | 691 complexity | d399396ee38472edf7c786462382b1ca MD5 | raw file
Possible License(s): GPL-2.0
  1. package antlr;
  2. /* ANTLR Translator Generator
  3. * Project led by Terence Parr at http://www.jGuru.com
  4. * Software rights: http://www.antlr.org/license.html
  5. *
  6. * $Id: //depot/code/org.antlr/release/antlr-2.7.5/antlr/JavaCodeGenerator.java#1 $
  7. */
  8. import java.util.Enumeration;
  9. import java.util.Hashtable;
  10. import antlr.collections.impl.BitSet;
  11. import antlr.collections.impl.Vector;
  12. import java.io.PrintWriter; //SAS: changed for proper text file io
  13. import java.io.IOException;
  14. import java.io.FileWriter;
  15. /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
  16. public class JavaCodeGenerator extends CodeGenerator {
  17. // non-zero if inside syntactic predicate generation
  18. protected int syntacticPredLevel = 0;
  19. // Are we generating ASTs (for parsers and tree parsers) right now?
  20. protected boolean genAST = false;
  21. // Are we saving the text consumed (for lexers) right now?
  22. protected boolean saveText = false;
  23. // Grammar parameters set up to handle different grammar classes.
  24. // These are used to get instanceof tests out of code generation
  25. String labeledElementType;
  26. String labeledElementASTType;
  27. String labeledElementInit;
  28. String commonExtraArgs;
  29. String commonExtraParams;
  30. String commonLocalVars;
  31. String lt1Value;
  32. String exceptionThrown;
  33. String throwNoViable;
  34. /** Tracks the rule being generated. Used for mapTreeId */
  35. RuleBlock currentRule;
  36. /** Tracks the rule or labeled subrule being generated. Used for
  37. AST generation. */
  38. String currentASTResult;
  39. /** Mapping between the ids used in the current alt, and the
  40. * names of variables used to represent their AST values.
  41. */
  42. Hashtable treeVariableMap = new Hashtable();
  43. /** Used to keep track of which AST variables have been defined in a rule
  44. * (except for the #rule_name and #rule_name_in var's
  45. */
  46. Hashtable declaredASTVariables = new Hashtable();
  47. /* Count of unnamed generated variables */
  48. int astVarNumber = 1;
  49. /** Special value used to mark duplicate in treeVariableMap */
  50. protected static final String NONUNIQUE = new String();
  51. public static final int caseSizeThreshold = 127; // ascii is max
  52. private Vector semPreds;
  53. /** Create a Java code-generator using the given Grammar.
  54. * The caller must still call setTool, setBehavior, and setAnalyzer
  55. * before generating code.
  56. */
  57. public JavaCodeGenerator() {
  58. super();
  59. charFormatter = new JavaCharFormatter();
  60. }
  61. /** Adds a semantic predicate string to the sem pred vector
  62. These strings will be used to build an array of sem pred names
  63. when building a debugging parser. This method should only be
  64. called when the debug option is specified
  65. */
  66. protected int addSemPred(String predicate) {
  67. semPreds.appendElement(predicate);
  68. return semPreds.size() - 1;
  69. }
  70. public void exitIfError() {
  71. if (antlrTool.hasError()) {
  72. antlrTool.fatalError("Exiting due to errors.");
  73. }
  74. }
  75. /**Generate the parser, lexer, treeparser, and token types in Java */
  76. public void gen() {
  77. // Do the code generation
  78. try {
  79. // Loop over all grammars
  80. Enumeration grammarIter = behavior.grammars.elements();
  81. while (grammarIter.hasMoreElements()) {
  82. Grammar g = (Grammar)grammarIter.nextElement();
  83. // Connect all the components to each other
  84. g.setGrammarAnalyzer(analyzer);
  85. g.setCodeGenerator(this);
  86. analyzer.setGrammar(g);
  87. // To get right overloading behavior across hetrogeneous grammars
  88. setupGrammarParameters(g);
  89. g.generate();
  90. // print out the grammar with lookahead sets (and FOLLOWs)
  91. // System.out.print(g.toString());
  92. exitIfError();
  93. }
  94. // Loop over all token managers (some of which are lexers)
  95. Enumeration tmIter = behavior.tokenManagers.elements();
  96. while (tmIter.hasMoreElements()) {
  97. TokenManager tm = (TokenManager)tmIter.nextElement();
  98. if (!tm.isReadOnly()) {
  99. // Write the token manager tokens as Java
  100. // this must appear before genTokenInterchange so that
  101. // labels are set on string literals
  102. genTokenTypes(tm);
  103. // Write the token manager tokens as plain text
  104. genTokenInterchange(tm);
  105. }
  106. exitIfError();
  107. }
  108. }
  109. catch (IOException e) {
  110. antlrTool.reportException(e, null);
  111. }
  112. }
  113. /** Generate code for the given grammar element.
  114. * @param blk The {...} action to generate
  115. */
  116. public void gen(ActionElement action) {
  117. if (DEBUG_CODE_GENERATOR) System.out.println("genAction(" + action + ")");
  118. if (action.isSemPred) {
  119. genSemPred(action.actionText, action.line);
  120. }
  121. else {
  122. if (grammar.hasSyntacticPredicate) {
  123. println("if ( inputState.guessing==0 ) {");
  124. tabs++;
  125. }
  126. // get the name of the followSet for the current rule so that we
  127. // can replace $FOLLOW in the .g file.
  128. ActionTransInfo tInfo = new ActionTransInfo();
  129. String actionStr = processActionForSpecialSymbols(action.actionText,
  130. action.getLine(),
  131. currentRule,
  132. tInfo);
  133. if (tInfo.refRuleRoot != null) {
  134. // Somebody referenced "#rule", make sure translated var is valid
  135. // assignment to #rule is left as a ref also, meaning that assignments
  136. // with no other refs like "#rule = foo();" still forces this code to be
  137. // generated (unnecessarily).
  138. println(tInfo.refRuleRoot + " = (" + labeledElementASTType + ")currentAST.root;");
  139. }
  140. // dump the translated action
  141. printAction(actionStr);
  142. if (tInfo.assignToRoot) {
  143. // Somebody did a "#rule=", reset internal currentAST.root
  144. println("currentAST.root = " + tInfo.refRuleRoot + ";");
  145. // reset the child pointer too to be last sibling in sibling list
  146. println("currentAST.child = " + tInfo.refRuleRoot + "!=null &&" + tInfo.refRuleRoot + ".getFirstChild()!=null ?");
  147. tabs++;
  148. println(tInfo.refRuleRoot + ".getFirstChild() : " + tInfo.refRuleRoot + ";");
  149. tabs--;
  150. println("currentAST.advanceChildToEnd();");
  151. }
  152. if (grammar.hasSyntacticPredicate) {
  153. tabs--;
  154. println("}");
  155. }
  156. }
  157. }
  158. /** Generate code for the given grammar element.
  159. * @param blk The "x|y|z|..." block to generate
  160. */
  161. public void gen(AlternativeBlock blk) {
  162. if (DEBUG_CODE_GENERATOR) System.out.println("gen(" + blk + ")");
  163. println("{");
  164. genBlockPreamble(blk);
  165. genBlockInitAction(blk);
  166. // Tell AST generation to build subrule result
  167. String saveCurrentASTResult = currentASTResult;
  168. if (blk.getLabel() != null) {
  169. currentASTResult = blk.getLabel();
  170. }
  171. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  172. JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
  173. genBlockFinish(howToFinish, throwNoViable);
  174. println("}");
  175. // Restore previous AST generation
  176. currentASTResult = saveCurrentASTResult;
  177. }
  178. /** Generate code for the given grammar element.
  179. * @param blk The block-end element to generate. Block-end
  180. * elements are synthesized by the grammar parser to represent
  181. * the end of a block.
  182. */
  183. public void gen(BlockEndElement end) {
  184. if (DEBUG_CODE_GENERATOR) System.out.println("genRuleEnd(" + end + ")");
  185. }
  186. /** Generate code for the given grammar element.
  187. * @param blk The character literal reference to generate
  188. */
  189. public void gen(CharLiteralElement atom) {
  190. if (DEBUG_CODE_GENERATOR) System.out.println("genChar(" + atom + ")");
  191. if (atom.getLabel() != null) {
  192. println(atom.getLabel() + " = " + lt1Value + ";");
  193. }
  194. boolean oldsaveText = saveText;
  195. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  196. genMatch(atom);
  197. saveText = oldsaveText;
  198. }
  199. /** Generate code for the given grammar element.
  200. * @param blk The character-range reference to generate
  201. */
  202. public void gen(CharRangeElement r) {
  203. if (r.getLabel() != null && syntacticPredLevel == 0) {
  204. println(r.getLabel() + " = " + lt1Value + ";");
  205. }
  206. boolean flag = ( grammar instanceof LexerGrammar &&
  207. ( !saveText ||
  208. r.getAutoGenType() ==
  209. GrammarElement.AUTO_GEN_BANG ) );
  210. if (flag) {
  211. println("_saveIndex=text.length();");
  212. }
  213. println("matchRange(" + r.beginText + "," + r.endText + ");");
  214. if (flag) {
  215. println("text.setLength(_saveIndex);");
  216. }
  217. }
  218. /** Generate the lexer Java file */
  219. public void gen(LexerGrammar g) throws IOException {
  220. // If debugging, create a new sempred vector for this grammar
  221. if (g.debuggingOutput)
  222. semPreds = new Vector();
  223. setGrammar(g);
  224. if (!(grammar instanceof LexerGrammar)) {
  225. antlrTool.panic("Internal error generating lexer");
  226. }
  227. // SAS: moved output creation to method so a subclass can change
  228. // how the output is generated (for VAJ interface)
  229. setupOutput(grammar.getClassName());
  230. genAST = false; // no way to gen trees.
  231. saveText = true; // save consumed characters.
  232. tabs = 0;
  233. // Generate header common to all Java output files
  234. genHeader();
  235. // Do not use printAction because we assume tabs==0
  236. println(behavior.getHeaderAction(""));
  237. // Generate header specific to lexer Java file
  238. // println("import java.io.FileInputStream;");
  239. println("import java.io.InputStream;");
  240. println("import antlr.TokenStreamException;");
  241. println("import antlr.TokenStreamIOException;");
  242. println("import antlr.TokenStreamRecognitionException;");
  243. println("import antlr.CharStreamException;");
  244. println("import antlr.CharStreamIOException;");
  245. println("import antlr.ANTLRException;");
  246. println("import java.io.Reader;");
  247. println("import java.util.Hashtable;");
  248. println("import antlr." + grammar.getSuperClass() + ";");
  249. println("import antlr.InputBuffer;");
  250. println("import antlr.ByteBuffer;");
  251. println("import antlr.CharBuffer;");
  252. println("import antlr.Token;");
  253. println("import antlr.CommonToken;");
  254. println("import antlr.RecognitionException;");
  255. println("import antlr.NoViableAltForCharException;");
  256. println("import antlr.MismatchedCharException;");
  257. println("import antlr.TokenStream;");
  258. println("import antlr.ANTLRHashString;");
  259. println("import antlr.LexerSharedInputState;");
  260. println("import antlr.collections.impl.BitSet;");
  261. println("import antlr.SemanticException;");
  262. // Generate user-defined lexer file preamble
  263. println(grammar.preambleAction.getText());
  264. // Generate lexer class definition
  265. String sup = null;
  266. if (grammar.superClass != null) {
  267. sup = grammar.superClass;
  268. }
  269. else {
  270. sup = "antlr." + grammar.getSuperClass();
  271. }
  272. // print javadoc comment if any
  273. if (grammar.comment != null) {
  274. _println(grammar.comment);
  275. }
  276. // get prefix (replaces "public" and lets user specify)
  277. String prefix = "public";
  278. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  279. if (tprefix != null) {
  280. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  281. if (p != null) {
  282. prefix = p;
  283. }
  284. }
  285. print(prefix+" ");
  286. print("class " + grammar.getClassName() + " extends " + sup);
  287. println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix + ", TokenStream");
  288. Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
  289. if (tsuffix != null) {
  290. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  291. if (suffix != null) {
  292. print(", " + suffix); // must be an interface name for Java
  293. }
  294. }
  295. println(" {");
  296. // Generate user-defined lexer class members
  297. print(
  298. processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
  299. );
  300. //
  301. // Generate the constructor from InputStream, which in turn
  302. // calls the ByteBuffer constructor
  303. //
  304. println("public " + grammar.getClassName() + "(InputStream in) {");
  305. tabs++;
  306. println("this(new ByteBuffer(in));");
  307. tabs--;
  308. println("}");
  309. //
  310. // Generate the constructor from Reader, which in turn
  311. // calls the CharBuffer constructor
  312. //
  313. println("public " + grammar.getClassName() + "(Reader in) {");
  314. tabs++;
  315. println("this(new CharBuffer(in));");
  316. tabs--;
  317. println("}");
  318. println("public " + grammar.getClassName() + "(InputBuffer ib) {");
  319. tabs++;
  320. // if debugging, wrap the input buffer in a debugger
  321. if (grammar.debuggingOutput)
  322. println("this(new LexerSharedInputState(new antlr.debug.DebuggingInputBuffer(ib)));");
  323. else
  324. println("this(new LexerSharedInputState(ib));");
  325. tabs--;
  326. println("}");
  327. //
  328. // Generate the constructor from InputBuffer (char or byte)
  329. //
  330. println("public " + grammar.getClassName() + "(LexerSharedInputState state) {");
  331. tabs++;
  332. println("super(state);");
  333. // if debugging, set up array variables and call user-overridable
  334. // debugging setup method
  335. if (grammar.debuggingOutput) {
  336. println(" ruleNames = _ruleNames;");
  337. println(" semPredNames = _semPredNames;");
  338. println(" setupDebugging();");
  339. }
  340. // Generate the setting of various generated options.
  341. // These need to be before the literals since ANTLRHashString depends on
  342. // the casesensitive stuff.
  343. println("caseSensitiveLiterals = " + g.caseSensitiveLiterals + ";");
  344. println("setCaseSensitive(" + g.caseSensitive + ");");
  345. // Generate the initialization of a hashtable
  346. // containing the string literals used in the lexer
  347. // The literals variable itself is in CharScanner
  348. println("literals = new Hashtable();");
  349. Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
  350. while (keys.hasMoreElements()) {
  351. String key = (String)keys.nextElement();
  352. if (key.charAt(0) != '"') {
  353. continue;
  354. }
  355. TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
  356. if (sym instanceof StringLiteralSymbol) {
  357. StringLiteralSymbol s = (StringLiteralSymbol)sym;
  358. println("literals.put(new ANTLRHashString(" + s.getId() + ", this), new Integer(" + s.getTokenType() + "));");
  359. }
  360. }
  361. tabs--;
  362. Enumeration ids;
  363. println("}");
  364. // generate the rule name array for debugging
  365. if (grammar.debuggingOutput) {
  366. println("private static final String _ruleNames[] = {");
  367. ids = grammar.rules.elements();
  368. int ruleNum = 0;
  369. while (ids.hasMoreElements()) {
  370. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  371. if (sym instanceof RuleSymbol)
  372. println(" \"" + ((RuleSymbol)sym).getId() + "\",");
  373. }
  374. println("};");
  375. }
  376. // Generate nextToken() rule.
  377. // nextToken() is a synthetic lexer rule that is the implicit OR of all
  378. // user-defined lexer rules.
  379. genNextToken();
  380. // Generate code for each rule in the lexer
  381. ids = grammar.rules.elements();
  382. int ruleNum = 0;
  383. while (ids.hasMoreElements()) {
  384. RuleSymbol sym = (RuleSymbol)ids.nextElement();
  385. // Don't generate the synthetic rules
  386. if (!sym.getId().equals("mnextToken")) {
  387. genRule(sym, false, ruleNum++);
  388. }
  389. exitIfError();
  390. }
  391. // Generate the semantic predicate map for debugging
  392. if (grammar.debuggingOutput)
  393. genSemPredMap();
  394. // Generate the bitsets used throughout the lexer
  395. genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());
  396. println("");
  397. println("}");
  398. // Close the lexer output stream
  399. currentOutput.close();
  400. currentOutput = null;
  401. }
  402. /** Generate code for the given grammar element.
  403. * @param blk The (...)+ block to generate
  404. */
  405. public void gen(OneOrMoreBlock blk) {
  406. if (DEBUG_CODE_GENERATOR) System.out.println("gen+(" + blk + ")");
  407. String label;
  408. String cnt;
  409. println("{");
  410. genBlockPreamble(blk);
  411. if (blk.getLabel() != null) {
  412. cnt = "_cnt_" + blk.getLabel();
  413. }
  414. else {
  415. cnt = "_cnt" + blk.ID;
  416. }
  417. println("int " + cnt + "=0;");
  418. if (blk.getLabel() != null) {
  419. label = blk.getLabel();
  420. }
  421. else {
  422. label = "_loop" + blk.ID;
  423. }
  424. println(label + ":");
  425. println("do {");
  426. tabs++;
  427. // generate the init action for ()+ ()* inside the loop
  428. // this allows us to do usefull EOF checking...
  429. genBlockInitAction(blk);
  430. // Tell AST generation to build subrule result
  431. String saveCurrentASTResult = currentASTResult;
  432. if (blk.getLabel() != null) {
  433. currentASTResult = blk.getLabel();
  434. }
  435. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  436. // generate exit test if greedy set to false
  437. // and an alt is ambiguous with exit branch
  438. // or when lookahead derived purely from end-of-file
  439. // Lookahead analysis stops when end-of-file is hit,
  440. // returning set {epsilon}. Since {epsilon} is not
  441. // ambig with any real tokens, no error is reported
  442. // by deterministic() routines and we have to check
  443. // for the case where the lookahead depth didn't get
  444. // set to NONDETERMINISTIC (this only happens when the
  445. // FOLLOW contains real atoms + epsilon).
  446. boolean generateNonGreedyExitPath = false;
  447. int nonGreedyExitDepth = grammar.maxk;
  448. if (!blk.greedy &&
  449. blk.exitLookaheadDepth <= grammar.maxk &&
  450. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
  451. generateNonGreedyExitPath = true;
  452. nonGreedyExitDepth = blk.exitLookaheadDepth;
  453. }
  454. else if (!blk.greedy &&
  455. blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
  456. generateNonGreedyExitPath = true;
  457. }
  458. // generate exit test if greedy set to false
  459. // and an alt is ambiguous with exit branch
  460. if (generateNonGreedyExitPath) {
  461. if (DEBUG_CODE_GENERATOR) {
  462. System.out.println("nongreedy (...)+ loop; exit depth is " +
  463. blk.exitLookaheadDepth);
  464. }
  465. String predictExit =
  466. getLookaheadTestExpression(blk.exitCache,
  467. nonGreedyExitDepth);
  468. println("// nongreedy exit test");
  469. println("if ( " + cnt + ">=1 && " + predictExit + ") break " + label + ";");
  470. }
  471. JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  472. genBlockFinish(
  473. howToFinish,
  474. "if ( " + cnt + ">=1 ) { break " + label + "; } else {" + throwNoViable + "}"
  475. );
  476. println(cnt + "++;");
  477. tabs--;
  478. println("} while (true);");
  479. println("}");
  480. // Restore previous AST generation
  481. currentASTResult = saveCurrentASTResult;
  482. }
  483. /** Generate the parser Java file */
  484. public void gen(ParserGrammar g) throws IOException {
  485. // if debugging, set up a new vector to keep track of sempred
  486. // strings for this grammar
  487. if (g.debuggingOutput)
  488. semPreds = new Vector();
  489. setGrammar(g);
  490. if (!(grammar instanceof ParserGrammar)) {
  491. antlrTool.panic("Internal error generating parser");
  492. }
  493. // Open the output stream for the parser and set the currentOutput
  494. // SAS: moved file setup so subclass could do it (for VAJ interface)
  495. setupOutput(grammar.getClassName());
  496. genAST = grammar.buildAST;
  497. tabs = 0;
  498. // Generate the header common to all output files.
  499. genHeader();
  500. // Do not use printAction because we assume tabs==0
  501. println(behavior.getHeaderAction(""));
  502. // Generate header for the parser
  503. println("import antlr.TokenBuffer;");
  504. println("import antlr.TokenStreamException;");
  505. println("import antlr.TokenStreamIOException;");
  506. println("import antlr.ANTLRException;");
  507. println("import antlr." + grammar.getSuperClass() + ";");
  508. println("import antlr.Token;");
  509. println("import antlr.TokenStream;");
  510. println("import antlr.RecognitionException;");
  511. println("import antlr.NoViableAltException;");
  512. println("import antlr.MismatchedTokenException;");
  513. println("import antlr.SemanticException;");
  514. println("import antlr.ParserSharedInputState;");
  515. println("import antlr.collections.impl.BitSet;");
  516. if ( genAST ) {
  517. println("import antlr.collections.AST;");
  518. println("import java.util.Hashtable;");
  519. println("import antlr.ASTFactory;");
  520. println("import antlr.ASTPair;");
  521. println("import antlr.collections.impl.ASTArray;");
  522. }
  523. // Output the user-defined parser preamble
  524. println(grammar.preambleAction.getText());
  525. // Generate parser class definition
  526. String sup = null;
  527. if (grammar.superClass != null)
  528. sup = grammar.superClass;
  529. else
  530. sup = "antlr." + grammar.getSuperClass();
  531. // print javadoc comment if any
  532. if (grammar.comment != null) {
  533. _println(grammar.comment);
  534. }
  535. // get prefix (replaces "public" and lets user specify)
  536. String prefix = "public";
  537. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  538. if (tprefix != null) {
  539. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  540. if (p != null) {
  541. prefix = p;
  542. }
  543. }
  544. print(prefix+" ");
  545. print("class " + grammar.getClassName() + " extends " + sup);
  546. println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
  547. Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
  548. if (tsuffix != null) {
  549. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  550. if (suffix != null)
  551. print(", " + suffix); // must be an interface name for Java
  552. }
  553. println(" {");
  554. // set up an array of all the rule names so the debugger can
  555. // keep track of them only by number -- less to store in tree...
  556. if (grammar.debuggingOutput) {
  557. println("private static final String _ruleNames[] = {");
  558. Enumeration ids = grammar.rules.elements();
  559. int ruleNum = 0;
  560. while (ids.hasMoreElements()) {
  561. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  562. if (sym instanceof RuleSymbol)
  563. println(" \"" + ((RuleSymbol)sym).getId() + "\",");
  564. }
  565. println("};");
  566. }
  567. // Generate user-defined parser class members
  568. print(
  569. processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
  570. );
  571. // Generate parser class constructor from TokenBuffer
  572. println("");
  573. println("protected " + grammar.getClassName() + "(TokenBuffer tokenBuf, int k) {");
  574. println(" super(tokenBuf,k);");
  575. println(" tokenNames = _tokenNames;");
  576. // if debugging, set up arrays and call the user-overridable
  577. // debugging setup method
  578. if (grammar.debuggingOutput) {
  579. println(" ruleNames = _ruleNames;");
  580. println(" semPredNames = _semPredNames;");
  581. println(" setupDebugging(tokenBuf);");
  582. }
  583. if ( grammar.buildAST ) {
  584. println(" buildTokenTypeASTClassMap();");
  585. println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
  586. }
  587. println("}");
  588. println("");
  589. println("public " + grammar.getClassName() + "(TokenBuffer tokenBuf) {");
  590. println(" this(tokenBuf," + grammar.maxk + ");");
  591. println("}");
  592. println("");
  593. // Generate parser class constructor from TokenStream
  594. println("protected " + grammar.getClassName() + "(TokenStream lexer, int k) {");
  595. println(" super(lexer,k);");
  596. println(" tokenNames = _tokenNames;");
  597. // if debugging, set up arrays and call the user-overridable
  598. // debugging setup method
  599. if (grammar.debuggingOutput) {
  600. println(" ruleNames = _ruleNames;");
  601. println(" semPredNames = _semPredNames;");
  602. println(" setupDebugging(lexer);");
  603. }
  604. if ( grammar.buildAST ) {
  605. println(" buildTokenTypeASTClassMap();");
  606. println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
  607. }
  608. println("}");
  609. println("");
  610. println("public " + grammar.getClassName() + "(TokenStream lexer) {");
  611. println(" this(lexer," + grammar.maxk + ");");
  612. println("}");
  613. println("");
  614. println("public " + grammar.getClassName() + "(ParserSharedInputState state) {");
  615. println(" super(state," + grammar.maxk + ");");
  616. println(" tokenNames = _tokenNames;");
  617. if ( grammar.buildAST ) {
  618. println(" buildTokenTypeASTClassMap();");
  619. println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
  620. }
  621. println("}");
  622. println("");
  623. // Generate code for each rule in the grammar
  624. Enumeration ids = grammar.rules.elements();
  625. int ruleNum = 0;
  626. while (ids.hasMoreElements()) {
  627. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  628. if (sym instanceof RuleSymbol) {
  629. RuleSymbol rs = (RuleSymbol)sym;
  630. genRule(rs, rs.references.size() == 0, ruleNum++);
  631. }
  632. exitIfError();
  633. }
  634. // Generate the token names
  635. genTokenStrings();
  636. if ( grammar.buildAST ) {
  637. genTokenASTNodeMap();
  638. }
  639. // Generate the bitsets used throughout the grammar
  640. genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
  641. // Generate the semantic predicate map for debugging
  642. if (grammar.debuggingOutput)
  643. genSemPredMap();
  644. // Close class definition
  645. println("");
  646. println("}");
  647. // Close the parser output stream
  648. currentOutput.close();
  649. currentOutput = null;
  650. }
  651. /** Generate code for the given grammar element.
  652. * @param blk The rule-reference to generate
  653. */
  654. public void gen(RuleRefElement rr) {
  655. if (DEBUG_CODE_GENERATOR) System.out.println("genRR(" + rr + ")");
  656. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
  657. if (rs == null || !rs.isDefined()) {
  658. // Is this redundant???
  659. antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
  660. return;
  661. }
  662. if (!(rs instanceof RuleSymbol)) {
  663. // Is this redundant???
  664. antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
  665. return;
  666. }
  667. genErrorTryForElement(rr);
  668. // AST value for labeled rule refs in tree walker.
  669. // This is not AST construction; it is just the input tree node value.
  670. if (grammar instanceof TreeWalkerGrammar &&
  671. rr.getLabel() != null &&
  672. syntacticPredLevel == 0) {
  673. println(rr.getLabel() + " = _t==ASTNULL ? null : " + lt1Value + ";");
  674. }
  675. // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
  676. if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  677. println("_saveIndex=text.length();");
  678. }
  679. // Process return value assignment if any
  680. printTabs();
  681. if (rr.idAssign != null) {
  682. // Warn if the rule has no return type
  683. if (rs.block.returnAction == null) {
  684. antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
  685. }
  686. _print(rr.idAssign + "=");
  687. }
  688. else {
  689. // Warn about return value if any, but not inside syntactic predicate
  690. if (!(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) {
  691. antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
  692. }
  693. }
  694. // Call the rule
  695. GenRuleInvocation(rr);
  696. // if in lexer and ! on element or alt or rule, save buffer index to kill later
  697. if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  698. println("text.setLength(_saveIndex);");
  699. }
  700. // if not in a syntactic predicate
  701. if (syntacticPredLevel == 0) {
  702. boolean doNoGuessTest = (
  703. grammar.hasSyntacticPredicate &&
  704. (
  705. grammar.buildAST && rr.getLabel() != null ||
  706. (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
  707. )
  708. );
  709. if (doNoGuessTest) {
  710. // println("if (inputState.guessing==0) {");
  711. // tabs++;
  712. }
  713. if (grammar.buildAST && rr.getLabel() != null) {
  714. // always gen variable for rule return on labeled rules
  715. println(rr.getLabel() + "_AST = (" + labeledElementASTType + ")returnAST;");
  716. }
  717. if (genAST) {
  718. switch (rr.getAutoGenType()) {
  719. case GrammarElement.AUTO_GEN_NONE:
  720. // println("theASTFactory.addASTChild(currentAST, returnAST);");
  721. println("astFactory.addASTChild(currentAST, returnAST);");
  722. break;
  723. case GrammarElement.AUTO_GEN_CARET:
  724. antlrTool.error("Internal: encountered ^ after rule reference");
  725. break;
  726. default:
  727. break;
  728. }
  729. }
  730. // if a lexer and labeled, Token label defined at rule level, just set it here
  731. if (grammar instanceof LexerGrammar && rr.getLabel() != null) {
  732. println(rr.getLabel() + "=_returnToken;");
  733. }
  734. if (doNoGuessTest) {
  735. // tabs--;
  736. // println("}");
  737. }
  738. }
  739. genErrorCatchForElement(rr);
  740. }
  741. /** Generate code for the given grammar element.
  742. * @param blk The string-literal reference to generate
  743. */
  744. public void gen(StringLiteralElement atom) {
  745. if (DEBUG_CODE_GENERATOR) System.out.println("genString(" + atom + ")");
  746. // Variable declarations for labeled elements
  747. if (atom.getLabel() != null && syntacticPredLevel == 0) {
  748. println(atom.getLabel() + " = " + lt1Value + ";");
  749. }
  750. // AST
  751. genElementAST(atom);
  752. // is there a bang on the literal?
  753. boolean oldsaveText = saveText;
  754. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  755. // matching
  756. genMatch(atom);
  757. saveText = oldsaveText;
  758. // tack on tree cursor motion if doing a tree walker
  759. if (grammar instanceof TreeWalkerGrammar) {
  760. println("_t = _t.getNextSibling();");
  761. }
  762. }
  763. /** Generate code for the given grammar element.
  764. * @param blk The token-range reference to generate
  765. */
  766. public void gen(TokenRangeElement r) {
  767. genErrorTryForElement(r);
  768. if (r.getLabel() != null && syntacticPredLevel == 0) {
  769. println(r.getLabel() + " = " + lt1Value + ";");
  770. }
  771. // AST
  772. genElementAST(r);
  773. // match
  774. println("matchRange(" + r.beginText + "," + r.endText + ");");
  775. genErrorCatchForElement(r);
  776. }
  777. /** Generate code for the given grammar element.
  778. * @param blk The token-reference to generate
  779. */
  780. public void gen(TokenRefElement atom) {
  781. if (DEBUG_CODE_GENERATOR) System.out.println("genTokenRef(" + atom + ")");
  782. if (grammar instanceof LexerGrammar) {
  783. antlrTool.panic("Token reference found in lexer");
  784. }
  785. genErrorTryForElement(atom);
  786. // Assign Token value to token label variable
  787. if (atom.getLabel() != null && syntacticPredLevel == 0) {
  788. println(atom.getLabel() + " = " + lt1Value + ";");
  789. }
  790. // AST
  791. genElementAST(atom);
  792. // matching
  793. genMatch(atom);
  794. genErrorCatchForElement(atom);
  795. // tack on tree cursor motion if doing a tree walker
  796. if (grammar instanceof TreeWalkerGrammar) {
  797. println("_t = _t.getNextSibling();");
  798. }
  799. }
  800. public void gen(TreeElement t) {
  801. // save AST cursor
  802. println("AST __t" + t.ID + " = _t;");
  803. // If there is a label on the root, then assign that to the variable
  804. if (t.root.getLabel() != null) {
  805. println(t.root.getLabel() + " = _t==ASTNULL ? null :(" + labeledElementASTType + ")_t;");
  806. }
  807. // check for invalid modifiers ! and ^ on tree element roots
  808. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
  809. antlrTool.error("Suffixing a root node with '!' is not implemented",
  810. grammar.getFilename(), t.getLine(), t.getColumn());
  811. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  812. }
  813. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
  814. antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
  815. grammar.getFilename(), t.getLine(), t.getColumn());
  816. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  817. }
  818. // Generate AST variables
  819. genElementAST(t.root);
  820. if (grammar.buildAST) {
  821. // Save the AST construction state
  822. println("ASTPair __currentAST" + t.ID + " = currentAST.copy();");
  823. // Make the next item added a child of the TreeElement root
  824. println("currentAST.root = currentAST.child;");
  825. println("currentAST.child = null;");
  826. }
  827. // match root
  828. if ( t.root instanceof WildcardElement ) {
  829. println("if ( _t==null ) throw new MismatchedTokenException();");
  830. }
  831. else {
  832. genMatch(t.root);
  833. }
  834. // move to list of children
  835. println("_t = _t.getFirstChild();");
  836. // walk list of children, generating code for each
  837. for (int i = 0; i < t.getAlternatives().size(); i++) {
  838. Alternative a = t.getAlternativeAt(i);
  839. AlternativeElement e = a.head;
  840. while (e != null) {
  841. e.generate();
  842. e = e.next;
  843. }
  844. }
  845. if (grammar.buildAST) {
  846. // restore the AST construction state to that just after the
  847. // tree root was added
  848. println("currentAST = __currentAST" + t.ID + ";");
  849. }
  850. // restore AST cursor
  851. println("_t = __t" + t.ID + ";");
  852. // move cursor to sibling of tree just parsed
  853. println("_t = _t.getNextSibling();");
  854. }
  855. /** Generate the tree-parser Java file */
  856. public void gen(TreeWalkerGrammar g) throws IOException {
  857. // SAS: debugging stuff removed for now...
  858. setGrammar(g);
  859. if (!(grammar instanceof TreeWalkerGrammar)) {
  860. antlrTool.panic("Internal error generating tree-walker");
  861. }
  862. // Open the output stream for the parser and set the currentOutput
  863. // SAS: move file open to method so subclass can override it
  864. // (mainly for VAJ interface)
  865. setupOutput(grammar.getClassName());
  866. genAST = grammar.buildAST;
  867. tabs = 0;
  868. // Generate the header common to all output files.
  869. genHeader();
  870. // Do not use printAction because we assume tabs==0
  871. println(behavior.getHeaderAction(""));
  872. // Generate header for the parser
  873. println("import antlr." + grammar.getSuperClass() + ";");
  874. println("import antlr.Token;");
  875. println("import antlr.collections.AST;");
  876. println("import antlr.RecognitionException;");
  877. println("import antlr.ANTLRException;");
  878. println("import antlr.NoViableAltException;");
  879. println("import antlr.MismatchedTokenException;");
  880. println("import antlr.SemanticException;");
  881. println("import antlr.collections.impl.BitSet;");
  882. println("import antlr.ASTPair;");
  883. println("import antlr.collections.impl.ASTArray;");
  884. // Output the user-defined parser premamble
  885. println(grammar.preambleAction.getText());
  886. // Generate parser class definition
  887. String sup = null;
  888. if (grammar.superClass != null) {
  889. sup = grammar.superClass;
  890. }
  891. else {
  892. sup = "antlr." + grammar.getSuperClass();
  893. }
  894. println("");
  895. // print javadoc comment if any
  896. if (grammar.comment != null) {
  897. _println(grammar.comment);
  898. }
  899. // get prefix (replaces "public" and lets user specify)
  900. String prefix = "public";
  901. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  902. if (tprefix != null) {
  903. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  904. if (p != null) {
  905. prefix = p;
  906. }
  907. }
  908. print(prefix+" ");
  909. print("class " + grammar.getClassName() + " extends " + sup);
  910. println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
  911. Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
  912. if (tsuffix != null) {
  913. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  914. if (suffix != null) {
  915. print(", " + suffix); // must be an interface name for Java
  916. }
  917. }
  918. println(" {");
  919. // Generate user-defined parser class members
  920. print(
  921. processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
  922. );
  923. // Generate default parser class constructor
  924. println("public " + grammar.getClassName() + "() {");
  925. tabs++;
  926. println("tokenNames = _tokenNames;");
  927. tabs--;
  928. println("}");
  929. println("");
  930. // Generate code for each rule in the grammar
  931. Enumeration ids = grammar.rules.elements();
  932. int ruleNum = 0;
  933. String ruleNameInits = "";
  934. while (ids.hasMoreElements()) {
  935. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  936. if (sym instanceof RuleSymbol) {
  937. RuleSymbol rs = (RuleSymbol)sym;
  938. genRule(rs, rs.references.size() == 0, ruleNum++);
  939. }
  940. exitIfError();
  941. }
  942. // Generate the token names
  943. genTokenStrings();
  944. // Generate the bitsets used throughout the grammar
  945. genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
  946. // Close class definition
  947. println("}");
  948. println("");
  949. // Close the parser output stream
  950. currentOutput.close();
  951. currentOutput = null;
  952. }
  953. /** Generate code for the given grammar element.
  954. * @param wc The wildcard element to generate
  955. */
  956. public void gen(WildcardElement wc) {
  957. // Variable assignment for labeled elements
  958. if (wc.getLabel() != null && syntacticPredLevel == 0) {
  959. println(wc.getLabel() + " = " + lt1Value + ";");
  960. }
  961. // AST
  962. genElementAST(wc);
  963. // Match anything but EOF
  964. if (grammar instanceof TreeWalkerGrammar) {
  965. println("if ( _t==null ) throw new MismatchedTokenException();");
  966. }
  967. else if (grammar instanceof LexerGrammar) {
  968. if (grammar instanceof LexerGrammar &&
  969. (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  970. println("_saveIndex=text.length();");
  971. }
  972. println("matchNot(EOF_CHAR);");
  973. if (grammar instanceof LexerGrammar &&
  974. (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  975. println("text.setLength(_saveIndex);"); // kill text atom put in buffer
  976. }
  977. }
  978. else {
  979. println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
  980. }
  981. // tack on tree cursor motion if doing a tree walker
  982. if (grammar instanceof TreeWalkerGrammar) {
  983. println("_t = _t.getNextSibling();");
  984. }
  985. }
  986. /** Generate code for the given grammar element.
  987. * @param blk The (...)* block to generate
  988. */
  989. public void gen(ZeroOrMoreBlock blk) {
  990. if (DEBUG_CODE_GENERATOR) System.out.println("gen*(" + blk + ")");
  991. println("{");
  992. genBlockPreamble(blk);
  993. String label;
  994. if (blk.getLabel() != null) {
  995. label = blk.getLabel();
  996. }
  997. else {
  998. label = "_loop" + blk.ID;
  999. }
  1000. println(label + ":");
  1001. println("do {");
  1002. tabs++;
  1003. // generate the init action for ()* inside the loop
  1004. // this allows us to do usefull EOF checking...
  1005. genBlockInitAction(blk);
  1006. // Tell AST generation to build subrule result
  1007. String saveCurrentASTResult = currentASTResult;
  1008. if (blk.getLabel() != null) {
  1009. currentASTResult = blk.getLabel();
  1010. }
  1011. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  1012. // generate exit test if greedy set to false
  1013. // and an alt is ambiguous with exit branch
  1014. // or when lookahead derived purely from end-of-file
  1015. // Lookahead analysis stops when end-of-file is hit,
  1016. // returning set {epsilon}. Since {epsilon} is not
  1017. // ambig with any real tokens, no error is reported
  1018. // by deterministic() routines and we have to check
  1019. // for the case where the lookahead depth didn't get
  1020. // set to NONDETERMINISTIC (this only happens when the
  1021. // FOLLOW contains real atoms + epsilon).
  1022. boolean generateNonGreedyExitPath = false;
  1023. int nonGreedyExitDepth = grammar.maxk;
  1024. if (!blk.greedy &&
  1025. blk.exitLookaheadDepth <= grammar.maxk &&
  1026. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
  1027. generateNonGreedyExitPath = true;
  1028. nonGreedyExitDepth = blk.exitLookaheadDepth;
  1029. }
  1030. else if (!blk.greedy &&
  1031. blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
  1032. generateNonGreedyExitPath = true;
  1033. }
  1034. if (generateNonGreedyExitPath) {
  1035. if (DEBUG_CODE_GENERATOR) {
  1036. System.out.println("nongreedy (...)* loop; exit depth is " +
  1037. blk.exitLookaheadDepth);
  1038. }
  1039. String predictExit =
  1040. getLookaheadTestExpression(blk.exitCache,
  1041. nonGreedyExitDepth);
  1042. println("// nongreedy exit test");
  1043. println("if (" + predictExit + ") break " + label + ";");
  1044. }
  1045. JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  1046. genBlockFinish(howToFinish, "break " + label + ";");
  1047. tabs--;
  1048. println("} while (true);");
  1049. println("}");
  1050. // Restore previous AST generation
  1051. currentASTResult = saveCurrentASTResult;
  1052. }
  1053. /** Generate an alternative.
  1054. * @param alt The alternative to generate
  1055. * @param blk The block to which the alternative belongs
  1056. */
  1057. protected void genAlt(Alternative alt, AlternativeBlock blk) {
  1058. // Save the AST generation state, and set it to that of the alt
  1059. boolean savegenAST = genAST;
  1060. genAST = genAST && alt.getAutoGen();
  1061. boolean oldsaveTest = saveText;
  1062. saveText = saveText && alt.getAutoGen();
  1063. // Reset the variable name map for the alternative
  1064. Hashtable saveMap = treeVariableMap;
  1065. treeVariableMap = new Hashtable();
  1066. // Generate try block around the alt for error handling
  1067. if (alt.exceptionSpec != null) {
  1068. println("try { // for error handling");
  1069. tabs++;
  1070. }
  1071. AlternativeElement elem = alt.head;
  1072. while (!(elem instanceof BlockEndElement)) {
  1073. elem.generate(); // alt can begin with anything. Ask target to gen.
  1074. elem = elem.next;
  1075. }
  1076. if (genAST) {
  1077. if (blk instanceof RuleBlock) {
  1078. // Set the AST return value for the rule
  1079. RuleBlock rblk = (RuleBlock)blk;
  1080. if (grammar.hasSyntacticPredicate) {
  1081. // println("if ( inputState.guessing==0 ) {");
  1082. // tabs++;
  1083. }
  1084. println(rblk.getRuleName() + "_AST = (" + labeledElementASTType + ")currentAST.root;");
  1085. if (grammar.hasSyntacticPredicate) {
  1086. // --tabs;
  1087. // println("}");
  1088. }
  1089. }
  1090. else if (blk.getLabel() != null) {
  1091. // ### future: also set AST value for labeled subrules.
  1092. // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
  1093. antlrTool.warning("Labeled subrules not yet supported", grammar.getFilename(), blk.getLine(), blk.getColumn());
  1094. }
  1095. }
  1096. if (alt.exceptionSpec != null) {
  1097. // close try block
  1098. tabs--;
  1099. println("}");
  1100. genErrorHandler(alt.exceptionSpec);
  1101. }
  1102. genAST = savegenAST;
  1103. saveText = oldsaveTest;
  1104. treeVariableMap = saveMap;
  1105. }
  1106. /** Generate all the bitsets to be used in the parser or lexer
  1107. * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
  1108. * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
  1109. * Note that most languages do not support object initialization inside a
  1110. * class definition, so other code-generators may have to separate the
  1111. * bitset declarations from the initializations (e.g., put the initializations
  1112. * in the generated constructor instead).
  1113. * @param bitsetList The list of bitsets to generate.
  1114. * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
  1115. */
  1116. protected void genBitsets(Vector bitsetList,
  1117. int maxVocabulary
  1118. ) {
  1119. println("");
  1120. for (int i = 0; i < bitsetList.size(); i++) {
  1121. BitSet p = (BitSet)bitsetList.elementAt(i);
  1122. // Ensure that generated BitSet is large enough for vocabulary
  1123. p.growToInclude(maxVocabulary);
  1124. genBitSet(p, i);
  1125. }
  1126. }
  1127. /** Do something simple like:
  1128. * private static final long[] mk_tokenSet_0() {
  1129. * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
  1130. * return data;
  1131. * }
  1132. * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
  1133. *
  1134. * Or, for large bitsets, optimize init so ranges are collapsed into loops.
  1135. * This is most useful for lexers using unicode.
  1136. */
  1137. private void genBitSet(BitSet p, int id) {
  1138. // initialization data
  1139. println(
  1140. "private static final long[] mk" + getBitsetName(id) + "() {"
  1141. );
  1142. int n = p.lengthInLongWords();
  1143. if ( n<BITSET_OPTIMIZE_INIT_THRESHOLD ) {
  1144. println("\tlong[] data = { " + p.toStringOfWords() + "};");
  1145. }
  1146. else {
  1147. // will init manually, allocate space then set values
  1148. println("\tlong[] data = new long["+n+"];");
  1149. long[] elems = p.toPackedArray();
  1150. for (int i = 0; i < elems.length;) {
  1151. if ( elems[i]==0 ) {
  1152. // done automatically by Java, don't waste time/code
  1153. i++;
  1154. continue;
  1155. }
  1156. if ( (i+1)==elems.length || elems[i]!=elems[i+1] ) {
  1157. // last number or no run of numbers, just dump assignment
  1158. println("\tdata["+i+"]="+elems[i]+"L;");
  1159. i++;
  1160. }
  1161. else {
  1162. // scan to find end of run
  1163. int j;
  1164. for (j = i + 1;
  1165. j < elems.length && elems[j]==elems[i];
  1166. j++)
  1167. {
  1168. }
  1169. // j-1 is last member of run
  1170. println("\tfor (int i = "+i+"; i<="+(j-1)+"; i++) { data[i]="+
  1171. elems[i]+"L; }");
  1172. i = j;
  1173. }
  1174. }
  1175. }
  1176. println("\treturn data;");
  1177. println("}");
  1178. // BitSet object
  1179. println(
  1180. "public static final BitSet " + getBitsetName(id) + " = new BitSet(" +
  1181. "mk" + getBitsetName(id) + "()" +
  1182. ");"
  1183. );
  1184. }
  1185. /** Generate the finish of a block, using a combination of the info
  1186. * returned from genCommonBlock() and the action to perform when
  1187. * no alts were taken
  1188. * @param howToFinish The return of genCommonBlock()
  1189. * @param noViableAction What to generate when no alt is taken
  1190. */
  1191. private void genBlockFinish(JavaBlockFinishingInfo howToFinish, String noViableAction) {
  1192. if (howToFinish.needAnErrorClause &&
  1193. (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
  1194. if (howToFinish.generatedAnIf) {
  1195. println("else {");
  1196. }
  1197. else {
  1198. println("{");
  1199. }
  1200. tabs++;
  1201. println(noViableAction);
  1202. tabs--;
  1203. println("}");
  1204. }
  1205. if (howToFinish.postscript != null) {
  1206. println(howToFinish.postscript);
  1207. }
  1208. }
  1209. /** Generate the init action for a block, which may be a RuleBlock or a
  1210. * plain AlternativeBLock.
  1211. * @blk The block for which the preamble is to be generated.
  1212. */
  1213. protected void genBlockInitAction(AlternativeBlock blk) {
  1214. // dump out init action
  1215. if (blk.initAction != null) {
  1216. printAction(processActionForSpecialSymbols(blk.initAction, blk.getLine(), currentRule, null));
  1217. }
  1218. }
  1219. /** Generate the header for a block, which may be a RuleBlock or a
  1220. * plain AlternativeBLock. This generates any variable declarations
  1221. * and syntactic-predicate-testing variables.
  1222. * @blk The block for which the preamble is to be generated.
  1223. */
  1224. protected void genBlockPreamble(AlternativeBlock blk) {
  1225. // define labels for rule blocks.
  1226. if (blk instanceof RuleBlock) {
  1227. RuleBlock rblk = (RuleBlock)blk;
  1228. if (rblk.labeledElements != null) {
  1229. for (int i = 0; i < rblk.labeledElements.size(); i++) {
  1230. AlternativeElement a = (AlternativeElement)rblk.labeledElements.elementAt(i);
  1231. // System.out.println("looking at labeled element: "+a);
  1232. // Variables for labeled rule refs and
  1233. // subrules are different than variables for
  1234. // grammar atoms. This test is a little tricky
  1235. // because we want to get all rule refs and ebnf,
  1236. // but not rule blocks or syntactic predicates
  1237. if (
  1238. a instanceof RuleRefElement ||
  1239. a instanceof AlternativeBlock &&
  1240. !(a instanceof RuleBlock) &&
  1241. !(a instanceof SynPredBlock)
  1242. ) {
  1243. if (
  1244. !(a instanceof RuleRefElement) &&
  1245. ((AlternativeBlock)a).not &&
  1246. analyzer.subruleCanBeInverted(((AlternativeBlock)a), grammar instanceof LexerGrammar)
  1247. ) {
  1248. // Special case for inverted subrules that
  1249. // will be inlined. Treat these like
  1250. // token or char literal references
  1251. println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
  1252. if (grammar.buildAST) {
  1253. genASTDeclaration(a);
  1254. }
  1255. }
  1256. else {
  1257. if (grammar.buildAST) {
  1258. // Always gen AST variables for
  1259. // labeled elements, even if the
  1260. // element itself is marked with !
  1261. genASTDeclaration(a);
  1262. }
  1263. if (grammar instanceof LexerGrammar) {
  1264. println("Token " + a.getLabel() + "=null;");
  1265. }
  1266. if (grammar instanceof TreeWalkerGrammar) {
  1267. // always generate rule-ref variables
  1268. // for tree walker
  1269. println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
  1270. }
  1271. }
  1272. }
  1273. else {
  1274. // It is a token or literal reference. Generate the
  1275. // correct variable type for this grammar
  1276. println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
  1277. // In addition, generate *_AST variables if
  1278. // building ASTs
  1279. if (grammar.buildAST) {
  1280. if (a instanceof GrammarAtom &&
  1281. ((GrammarAtom)a).getASTNodeType() != null) {
  1282. GrammarAtom ga = (GrammarAtom)a;
  1283. genASTDeclaration(a, ga.getASTNodeType());
  1284. }
  1285. else {
  1286. genASTDeclaration(a);
  1287. }
  1288. }
  1289. }
  1290. }
  1291. }
  1292. }
  1293. }
  1294. /** Generate a series of case statements that implement a BitSet test.
  1295. * @param p The Bitset for which cases are to be generated
  1296. */
  1297. protected void genCases(BitSet p) {
  1298. if (DEBUG_CODE_GENERATOR) System.out.println("genCases(" + p + ")");
  1299. int[] elems;
  1300. elems = p.toArray();
  1301. // Wrap cases four-per-line for lexer, one-per-line for parser
  1302. int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
  1303. int j = 1;
  1304. boolean startOfLine = true;
  1305. for (int i = 0; i < elems.length; i++) {
  1306. if (j == 1) {
  1307. print("");
  1308. }
  1309. else {
  1310. _print(" ");
  1311. }
  1312. _print("case " + getValueString(elems[i]) + ":");
  1313. if (j == wrap) {
  1314. _println("");
  1315. startOfLine = true;
  1316. j = 1;
  1317. }
  1318. else {
  1319. j++;
  1320. startOfLine = false;
  1321. }
  1322. }
  1323. if (!startOfLine) {
  1324. _println("");
  1325. }
  1326. }
  1327. /**Generate common code for a block of alternatives; return a
  1328. * postscript that needs to be generated at the end of the
  1329. * block. Other routines may append else-clauses and such for
  1330. * error checking before the postfix is generated. If the
  1331. * grammar is a lexer, then generate alternatives in an order
  1332. * where alternatives requiring deeper lookahead are generated
  1333. * first, and EOF in the lookahead set reduces the depth of
  1334. * the lookahead. @param blk The block to generate @param
  1335. * noTestForSingle If true, then it does not generate a test
  1336. * for a single alternative.
  1337. */
  1338. public JavaBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
  1339. boolean noTestForSingle) {
  1340. int nIF = 0;
  1341. boolean createdLL1Switch = false;
  1342. int closingBracesOfIFSequence = 0;
  1343. JavaBlockFinishingInfo finishingInfo = new JavaBlockFinishingInfo();
  1344. if (DEBUG_CODE_GENERATOR) System.out.println("genCommonBlock(" + blk + ")");
  1345. // Save the AST generation state, and set it to that of the block
  1346. boolean savegenAST = genAST;
  1347. genAST = genAST && blk.getAutoGen();
  1348. boolean oldsaveTest = saveText;
  1349. saveText = saveText && blk.getAutoGen();
  1350. // Is this block inverted? If so, generate special-case code
  1351. if (
  1352. blk.not &&
  1353. analyzer.subruleCanBeInverted(blk, grammar instanceof LexerGrammar)
  1354. ) {
  1355. if (DEBUG_CODE_GENERATOR) System.out.println("special case: ~(subrule)");
  1356. Lookahead p = analyzer.look(1, blk);
  1357. // Variable assignment for labeled elements
  1358. if (blk.getLabel() != null && syntacticPredLevel == 0) {
  1359. println(blk.getLabel() + " = " + lt1Value + ";");
  1360. }
  1361. // AST
  1362. genElementAST(blk);
  1363. String astArgs = "";
  1364. if (grammar instanceof TreeWalkerGrammar) {
  1365. astArgs = "_t,";
  1366. }
  1367. // match the bitset for the alternative
  1368. println("match(" + astArgs + getBitsetName(markBitsetForGen(p.fset)) + ");");
  1369. // tack on tree cursor motion if doing a tree walker
  1370. if (grammar instanceof TreeWalkerGrammar) {
  1371. println("_t = _t.getNextSibling();");
  1372. }
  1373. return finishingInfo;
  1374. }
  1375. // Special handling for single alt
  1376. if (blk.getAlternatives().size() == 1) {
  1377. Alternative alt = blk.getAlternativeAt(0);
  1378. // Generate a warning if there is a synPred for single alt.
  1379. if (alt.synPred != null) {
  1380. antlrTool.warning(
  1381. "Syntactic predicate superfluous for single alternative",
  1382. grammar.getFilename(),
  1383. blk.getAlternativeAt(0).synPred.getLine(),
  1384. blk.getAlternativeAt(0).synPred.getColumn()
  1385. );
  1386. }
  1387. if (noTestForSingle) {
  1388. if (alt.semPred != null) {
  1389. // Generate validating predicate
  1390. genSemPred(alt.semPred, blk.line);
  1391. }
  1392. genAlt(alt, blk);
  1393. return finishingInfo;
  1394. }
  1395. }
  1396. // count number of simple LL(1) cases; only do switch for
  1397. // many LL(1) cases (no preds, no end of token refs)
  1398. // We don't care about exit paths for (...)*, (...)+
  1399. // because we don't explicitly have a test for them
  1400. // as an alt in the loop.
  1401. //
  1402. // Also, we now count how many unicode lookahead sets
  1403. // there are--they must be moved to DEFAULT or ELSE
  1404. // clause.
  1405. int nLL1 = 0;
  1406. for (int i = 0; i < blk.getAlternatives().size(); i++) {
  1407. Alternative a = blk.getAlternativeAt(i);
  1408. if (suitableForCaseExpression(a)) {
  1409. nLL1++;
  1410. }
  1411. }
  1412. // do LL(1) cases
  1413. if (nLL1 >= makeSwitchThreshold) {
  1414. // Determine the name of the item to be compared
  1415. String testExpr = lookaheadString(1);
  1416. createdLL1Switch = true;
  1417. // when parsing trees, convert null to valid tree node with NULL lookahead
  1418. if (grammar instanceof TreeWalkerGrammar) {
  1419. println("if (_t==null) _t=ASTNULL;");
  1420. }
  1421. println("switch ( " + testExpr + ") {");
  1422. for (int i = 0; i < blk.alternatives.size(); i++) {
  1423. Alternative alt = blk.getAlternativeAt(i);
  1424. // ignore any non-LL(1) alts, predicated alts,
  1425. // or end-of-token alts for case expressions
  1426. if (!suitableForCaseExpression(alt)) {
  1427. continue;
  1428. }
  1429. Lookahead p = alt.cache[1];
  1430. if (p.fset.degree() == 0 && !p.containsEpsilon()) {
  1431. antlrTool.warning("Alternate omitted due to empty prediction set",
  1432. grammar.getFilename(),
  1433. alt.head.getLine(), alt.head.getColumn());
  1434. }
  1435. else {
  1436. genCases(p.fset);
  1437. println("{");
  1438. tabs++;
  1439. genAlt(alt, blk);
  1440. println("break;");
  1441. tabs--;
  1442. println("}");
  1443. }
  1444. }
  1445. println("default:");
  1446. tabs++;
  1447. }
  1448. // do non-LL(1) and nondeterministic cases This is tricky in
  1449. // the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
  1450. // : "*="; Since nextToken is generated without a loop, then
  1451. // the STAR will have end-of-token as it's lookahead set for
  1452. // LA(2). So, we must generate the alternatives containing
  1453. // trailing end-of-token in their lookahead sets *after* the
  1454. // alternatives without end-of-token. This implements the
  1455. // usual lexer convention that longer matches come before
  1456. // shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
  1457. //
  1458. // For non-lexer grammars, this does not sort the alternates
  1459. // by depth Note that alts whose lookahead is purely
  1460. // end-of-token at k=1 end up as default or else clauses.
  1461. int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk : 0;
  1462. for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
  1463. if (DEBUG_CODE_GENERATOR) System.out.println("checking depth " + altDepth);
  1464. for (int i = 0; i < blk.alternatives.size(); i++) {
  1465. Alternative alt = blk.getAlternativeAt(i);
  1466. if (DEBUG_CODE_GENERATOR) System.out.println("genAlt: " + i);
  1467. // if we made a switch above, ignore what we already took care
  1468. // of. Specifically, LL(1) alts with no preds
  1469. // that do not have end-of-token in their prediction set
  1470. // and that are not giant unicode sets.
  1471. if (createdLL1Switch && suitableForCaseExpression(alt)) {
  1472. if (DEBUG_CODE_GENERATOR) System.out.println("ignoring alt because it was in the switch");
  1473. continue;
  1474. }
  1475. String e;
  1476. boolean unpredicted = false;
  1477. if (grammar instanceof LexerGrammar) {
  1478. // Calculate the "effective depth" of the alt,
  1479. // which is the max depth at which
  1480. // cache[depth]!=end-of-token
  1481. int effectiveDepth = alt.lookaheadDepth;
  1482. if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
  1483. // use maximum lookahead
  1484. effectiveDepth = grammar.maxk;
  1485. }
  1486. while (effectiveDepth >= 1 &&
  1487. alt.cache[effectiveDepth].containsEpsilon()) {
  1488. effectiveDepth--;
  1489. }
  1490. // Ignore alts whose effective depth is other than
  1491. // the ones we are generating for this iteration.
  1492. if (effectiveDepth != altDepth) {
  1493. if (DEBUG_CODE_GENERATOR)
  1494. System.out.println("ignoring alt because effectiveDepth!=altDepth;" + effectiveDepth + "!=" + altDepth);
  1495. continue;
  1496. }
  1497. unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
  1498. e = getLookaheadTestExpression(alt, effectiveDepth);
  1499. }
  1500. else {
  1501. unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
  1502. e = getLookaheadTestExpression(alt, grammar.maxk);
  1503. }
  1504. // Was it a big unicode range that forced unsuitability
  1505. // for a case expression?
  1506. if (alt.cache[1].fset.degree() > caseSizeThreshold &&
  1507. suitableForCaseExpression(alt)) {
  1508. if (nIF == 0) {
  1509. println("if " + e + " {");
  1510. }
  1511. else {
  1512. println("else if " + e + " {");
  1513. }
  1514. }
  1515. else if (unpredicted &&
  1516. alt.semPred == null &&
  1517. alt.synPred == null) {
  1518. // The alt has empty prediction set and no
  1519. // predicate to help out. if we have not
  1520. // generated a previous if, just put {...} around
  1521. // the end-of-token clause
  1522. if (nIF == 0) {
  1523. println("{");
  1524. }
  1525. else {
  1526. println("else {");
  1527. }
  1528. finishingInfo.needAnErrorClause = false;
  1529. }
  1530. else { // check for sem and syn preds
  1531. // Add any semantic predicate expression to the
  1532. // lookahead test
  1533. if (alt.semPred != null) {
  1534. // if debugging, wrap the evaluation of the
  1535. // predicate in a method translate $ and #
  1536. // references
  1537. ActionTransInfo tInfo = new ActionTransInfo();
  1538. String actionStr =
  1539. processActionForSpecialSymbols(alt.semPred,
  1540. blk.line,
  1541. currentRule,
  1542. tInfo);
  1543. // ignore translation info...we don't need to
  1544. // do anything with it. call that will inform
  1545. // SemanticPredicateListeners of the result
  1546. if (((grammar instanceof ParserGrammar) ||
  1547. (grammar instanceof LexerGrammar)) &&
  1548. grammar.debuggingOutput) {
  1549. e = "(" + e + "&& fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING," +
  1550. addSemPred(charFormatter.escapeString(actionStr)) + "," + actionStr + "))";
  1551. }
  1552. else {
  1553. e = "(" + e + "&&(" + actionStr + "))";
  1554. }
  1555. }
  1556. // Generate any syntactic predicates
  1557. if (nIF > 0) {
  1558. if (alt.synPred != null) {
  1559. println("else {");
  1560. tabs++;
  1561. genSynPred(alt.synPred, e);
  1562. closingBracesOfIFSequence++;
  1563. }
  1564. else {
  1565. println("else if " + e + " {");
  1566. }
  1567. }
  1568. else {
  1569. if (alt.synPred != null) {
  1570. genSynPred(alt.synPred, e);
  1571. }
  1572. else {
  1573. // when parsing trees, convert null to
  1574. // valid tree node with NULL lookahead.
  1575. if (grammar instanceof TreeWalkerGrammar) {
  1576. println("if (_t==null) _t=ASTNULL;");
  1577. }
  1578. println("if " + e + " {");
  1579. }
  1580. }
  1581. }
  1582. nIF++;
  1583. tabs++;
  1584. genAlt(alt, blk);
  1585. tabs--;
  1586. println("}");
  1587. }
  1588. }
  1589. String ps = "";
  1590. for (int i = 1; i <= closingBracesOfIFSequence; i++) {
  1591. ps += "}";
  1592. }
  1593. // Restore the AST generation state
  1594. genAST = savegenAST;
  1595. // restore save text state
  1596. saveText = oldsaveTest;
  1597. // Return the finishing info.
  1598. if (createdLL1Switch) {
  1599. tabs--;
  1600. finishingInfo.postscript = ps + "}";
  1601. finishingInfo.generatedSwitch = true;
  1602. finishingInfo.generatedAnIf = nIF > 0;
  1603. //return new JavaBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
  1604. }
  1605. else {
  1606. finishingInfo.postscript = ps;
  1607. finishingInfo.generatedSwitch = false;
  1608. finishingInfo.generatedAnIf = nIF > 0;
  1609. // return new JavaBlockFinishingInfo(ps, false,nIF>0);
  1610. }
  1611. return finishingInfo;
  1612. }
  1613. private static boolean suitableForCaseExpression(Alternative a) {
  1614. return
  1615. a.lookaheadDepth == 1 &&
  1616. a.semPred == null &&
  1617. !a.cache[1].containsEpsilon() &&
  1618. a.cache[1].fset.degree() <= caseSizeThreshold;
  1619. }
  1620. /** Generate code to link an element reference into the AST */
  1621. private void genElementAST(AlternativeElement el) {
  1622. // handle case where you're not building trees, but are in tree walker.
  1623. // Just need to get labels set up.
  1624. if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
  1625. String elementRef;
  1626. String astName;
  1627. // Generate names and declarations of the AST variable(s)
  1628. if (el.getLabel() == null) {
  1629. elementRef = lt1Value;
  1630. // Generate AST variables for unlabeled stuff
  1631. astName = "tmp" + astVarNumber + "_AST";
  1632. astVarNumber++;
  1633. // Map the generated AST variable in the alternate
  1634. mapTreeVariable(el, astName);
  1635. // Generate an "input" AST variable also
  1636. println(labeledElementASTType + " " + astName + "_in = " + elementRef + ";");
  1637. }
  1638. return;
  1639. }
  1640. if (grammar.buildAST && syntacticPredLevel == 0) {
  1641. boolean needASTDecl =
  1642. (genAST &&
  1643. (el.getLabel() != null ||
  1644. el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
  1645. )
  1646. );
  1647. // RK: if we have a grammar element always generate the decl
  1648. // since some guy can access it from an action and we can't
  1649. // peek ahead (well not without making a mess).
  1650. // I'd prefer taking this out.
  1651. if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG &&
  1652. (el instanceof TokenRefElement))
  1653. {
  1654. needASTDecl = true;
  1655. }
  1656. boolean doNoGuessTest =
  1657. (grammar.hasSyntacticPredicate && needASTDecl);
  1658. String elementRef;
  1659. String astNameBase;
  1660. // Generate names and declarations of the AST variable(s)
  1661. if (el.getLabel() != null) {
  1662. elementRef = el.getLabel();
  1663. astNameBase = el.getLabel();
  1664. }
  1665. else {
  1666. elementRef = lt1Value;
  1667. // Generate AST variables for unlabeled stuff
  1668. astNameBase = "tmp" + astVarNumber;
  1669. ;
  1670. astVarNumber++;
  1671. }
  1672. // Generate the declaration if required.
  1673. if (needASTDecl) {
  1674. // Generate the declaration
  1675. if (el instanceof GrammarAtom) {
  1676. GrammarAtom ga = (GrammarAtom)el;
  1677. if (ga.getASTNodeType() != null) {
  1678. genASTDeclaration(el, astNameBase, ga.getASTNodeType());
  1679. // println(ga.getASTNodeType()+" " + astName+" = null;");
  1680. }
  1681. else {
  1682. genASTDeclaration(el, astNameBase, labeledElementASTType);
  1683. // println(labeledElementASTType+" " + astName + " = null;");
  1684. }
  1685. }
  1686. else {
  1687. genASTDeclaration(el, astNameBase, labeledElementASTType);
  1688. // println(labeledElementASTType+" " + astName + " = null;");
  1689. }
  1690. }
  1691. // for convenience..
  1692. String astName = astNameBase + "_AST";
  1693. // Map the generated AST variable in the alternate
  1694. mapTreeVariable(el, astName);
  1695. if (grammar instanceof TreeWalkerGrammar) {
  1696. // Generate an "input" AST variable also
  1697. println(labeledElementASTType + " " + astName + "_in = null;");
  1698. }
  1699. // Enclose actions with !guessing
  1700. if (doNoGuessTest) {
  1701. // println("if (inputState.guessing==0) {");
  1702. // tabs++;
  1703. }
  1704. // if something has a label assume it will be used
  1705. // so we must initialize the RefAST
  1706. if (el.getLabel() != null) {
  1707. if (el instanceof GrammarAtom) {
  1708. println(astName + " = " + getASTCreateString((GrammarAtom)el, elementRef) + ";");
  1709. }
  1710. else {
  1711. println(astName + " = " + getASTCreateString(elementRef) + ";");
  1712. }
  1713. }
  1714. // if it has no label but a declaration exists initialize it.
  1715. if (el.getLabel() == null && needASTDecl) {
  1716. elementRef = lt1Value;
  1717. if (el instanceof GrammarAtom) {
  1718. println(astName + " = " + getASTCreateString((GrammarAtom)el, elementRef) + ";");
  1719. }
  1720. else {
  1721. println(astName + " = " + getASTCreateString(elementRef) + ";");
  1722. }
  1723. // Map the generated AST variable in the alternate
  1724. if (grammar instanceof TreeWalkerGrammar) {
  1725. // set "input" AST variable also
  1726. println(astName + "_in = " + elementRef + ";");
  1727. }
  1728. }
  1729. if (genAST) {
  1730. switch (el.getAutoGenType()) {
  1731. case GrammarElement.AUTO_GEN_NONE:
  1732. println("astFactory.addASTChild(currentAST, " + astName + ");");
  1733. break;
  1734. case GrammarElement.AUTO_GEN_CARET:
  1735. println("astFactory.makeASTRoot(currentAST, " + astName + ");");
  1736. break;
  1737. default:
  1738. break;
  1739. }
  1740. }
  1741. if (doNoGuessTest) {
  1742. // tabs--;
  1743. // println("}");
  1744. }
  1745. }
  1746. }
  1747. /** Close the try block and generate catch phrases
  1748. * if the element has a labeled handler in the rule
  1749. */
  1750. private void genErrorCatchForElement(AlternativeElement el) {
  1751. if (el.getLabel() == null) return;
  1752. String r = el.enclosingRuleName;
  1753. if (grammar instanceof LexerGrammar) {
  1754. r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
  1755. }
  1756. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r);
  1757. if (rs == null) {
  1758. antlrTool.panic("Enclosing rule not found!");
  1759. }
  1760. ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
  1761. if (ex != null) {
  1762. tabs--;
  1763. println("}");
  1764. genErrorHandler(ex);
  1765. }
  1766. }
  1767. /** Generate the catch phrases for a user-specified error handler */
  1768. private void genErrorHandler(ExceptionSpec ex) {
  1769. // Each ExceptionHandler in the ExceptionSpec is a separate catch
  1770. for (int i = 0; i < ex.handlers.size(); i++) {
  1771. ExceptionHandler handler = (ExceptionHandler)ex.handlers.elementAt(i);
  1772. // Generate catch phrase
  1773. println("catch (" + handler.exceptionTypeAndName.getText() + ") {");
  1774. tabs++;
  1775. if (grammar.hasSyntacticPredicate) {
  1776. println("if (inputState.guessing==0) {");
  1777. tabs++;
  1778. }
  1779. // When not guessing, execute user handler action
  1780. ActionTransInfo tInfo = new ActionTransInfo();
  1781. printAction(
  1782. processActionForSpecialSymbols(handler.action.getText(),
  1783. handler.action.getLine(),
  1784. currentRule, tInfo)
  1785. );
  1786. if (grammar.hasSyntacticPredicate) {
  1787. tabs--;
  1788. println("} else {");
  1789. tabs++;
  1790. // When guessing, rethrow exception
  1791. println(
  1792. "throw " +
  1793. extractIdOfAction(handler.exceptionTypeAndName) +
  1794. ";"
  1795. );
  1796. tabs--;
  1797. println("}");
  1798. }
  1799. // Close catch phrase
  1800. tabs--;
  1801. println("}");
  1802. }
  1803. }
  1804. /** Generate a try { opening if the element has a labeled handler in the rule */
  1805. private void genErrorTryForElement(AlternativeElement el) {
  1806. if (el.getLabel() == null) return;
  1807. String r = el.enclosingRuleName;
  1808. if (grammar instanceof LexerGrammar) {
  1809. r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
  1810. }
  1811. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r);
  1812. if (rs == null) {
  1813. antlrTool.panic("Enclosing rule not found!");
  1814. }
  1815. ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
  1816. if (ex != null) {
  1817. println("try { // for error handling");
  1818. tabs++;
  1819. }
  1820. }
  1821. protected void genASTDeclaration(AlternativeElement el) {
  1822. genASTDeclaration(el, labeledElementASTType);
  1823. }
  1824. protected void genASTDeclaration(AlternativeElement el, String node_type) {
  1825. genASTDeclaration(el, el.getLabel(), node_type);
  1826. }
  1827. protected void genASTDeclaration(AlternativeElement el, String var_name, String node_type) {
  1828. // already declared?
  1829. if (declaredASTVariables.contains(el))
  1830. return;
  1831. // emit code
  1832. println(node_type + " " + var_name + "_AST = null;");
  1833. // mark as declared
  1834. declaredASTVariables.put(el,el);
  1835. }
  1836. /** Generate a header that is common to all Java files */
  1837. protected void genHeader() {
  1838. println("// $ANTLR " + Tool.version + ": " +
  1839. "\"" + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\"" +
  1840. " -> " +
  1841. "\"" + grammar.getClassName() + ".java\"$");
  1842. }
  1843. private void genLiteralsTest() {
  1844. println("_ttype = testLiteralsTable(_ttype);");
  1845. }
  1846. private void genLiteralsTestForPartialToken() {
  1847. println("_ttype = testLiteralsTable(new String(text.getBuffer(),_begin,text.length()-_begin),_ttype);");
  1848. }
  1849. protected void genMatch(BitSet b) {
  1850. }
  1851. protected void genMatch(GrammarAtom atom) {
  1852. if (atom instanceof StringLiteralElement) {
  1853. if (grammar instanceof LexerGrammar) {
  1854. genMatchUsingAtomText(atom);
  1855. }
  1856. else {
  1857. genMatchUsingAtomTokenType(atom);
  1858. }
  1859. }
  1860. else if (atom instanceof CharLiteralElement) {
  1861. if (grammar instanceof LexerGrammar) {
  1862. genMatchUsingAtomText(atom);
  1863. }
  1864. else {
  1865. antlrTool.error("cannot ref character literals in grammar: " + atom);
  1866. }
  1867. }
  1868. else if (atom instanceof TokenRefElement) {
  1869. genMatchUsingAtomText(atom);
  1870. }
  1871. else if (atom instanceof WildcardElement) {
  1872. gen((WildcardElement)atom);
  1873. }
  1874. }
  1875. protected void genMatchUsingAtomText(GrammarAtom atom) {
  1876. // match() for trees needs the _t cursor
  1877. String astArgs = "";
  1878. if (grammar instanceof TreeWalkerGrammar) {
  1879. astArgs = "_t,";
  1880. }
  1881. // if in lexer and ! on element, save buffer index to kill later
  1882. if (grammar instanceof LexerGrammar && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  1883. println("_saveIndex=text.length();");
  1884. }
  1885. print(atom.not ? "matchNot(" : "match(");
  1886. _print(astArgs);
  1887. // print out what to match
  1888. if (atom.atomText.equals("EOF")) {
  1889. // horrible hack to handle EOF case
  1890. _print("Token.EOF_TYPE");
  1891. }
  1892. else {
  1893. _print(atom.atomText);
  1894. }
  1895. _println(");");
  1896. if (grammar instanceof LexerGrammar && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  1897. println("text.setLength(_saveIndex);"); // kill text atom put in buffer
  1898. }
  1899. }
  1900. protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
  1901. // match() for trees needs the _t cursor
  1902. String astArgs = "";
  1903. if (grammar instanceof TreeWalkerGrammar) {
  1904. astArgs = "_t,";
  1905. }
  1906. // If the literal can be mangled, generate the symbolic constant instead
  1907. String mangledName = null;
  1908. String s = astArgs + getValueString(atom.getType());
  1909. // matching
  1910. println((atom.not ? "matchNot(" : "match(") + s + ");");
  1911. }
  1912. /** Generate the nextToken() rule. nextToken() is a synthetic
  1913. * lexer rule that is the implicit OR of all user-defined
  1914. * lexer rules.
  1915. */
  1916. public void genNextToken() {
  1917. // Are there any public rules? If not, then just generate a
  1918. // fake nextToken().
  1919. boolean hasPublicRules = false;
  1920. for (int i = 0; i < grammar.rules.size(); i++) {
  1921. RuleSymbol rs = (RuleSymbol)grammar.rules.elementAt(i);
  1922. if (rs.isDefined() && rs.access.equals("public")) {
  1923. hasPublicRules = true;
  1924. break;
  1925. }
  1926. }
  1927. if (!hasPublicRules) {
  1928. println("");
  1929. println("public Token nextToken() throws TokenStreamException {");
  1930. println("\ttry {uponEOF();}");
  1931. println("\tcatch(CharStreamIOException csioe) {");
  1932. println("\t\tthrow new TokenStreamIOException(csioe.io);");
  1933. println("\t}");
  1934. println("\tcatch(CharStreamException cse) {");
  1935. println("\t\tthrow new TokenStreamException(cse.getMessage());");
  1936. println("\t}");
  1937. println("\treturn new CommonToken(Token.EOF_TYPE, \"\");");
  1938. println("}");
  1939. println("");
  1940. return;
  1941. }
  1942. // Create the synthesized nextToken() rule
  1943. RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken");
  1944. // Define the nextToken rule symbol
  1945. RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
  1946. nextTokenRs.setDefined();
  1947. nextTokenRs.setBlock(nextTokenBlk);
  1948. nextTokenRs.access = "private";
  1949. grammar.define(nextTokenRs);
  1950. // Analyze the nextToken rule
  1951. boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
  1952. // Generate the next token rule
  1953. String filterRule = null;
  1954. if (((LexerGrammar)grammar).filterMode) {
  1955. filterRule = ((LexerGrammar)grammar).filterRule;
  1956. }
  1957. println("");
  1958. println("public Token nextToken() throws TokenStreamException {");
  1959. tabs++;
  1960. println("Token theRetToken=null;");
  1961. _println("tryAgain:");
  1962. println("for (;;) {");
  1963. tabs++;
  1964. println("Token _token = null;");
  1965. println("int _ttype = Token.INVALID_TYPE;");
  1966. if (((LexerGrammar)grammar).filterMode) {
  1967. println("setCommitToPath(false);");
  1968. if (filterRule != null) {
  1969. // Here's a good place to ensure that the filter rule actually exists
  1970. if (!grammar.isDefined(CodeGenerator.encodeLexerRuleName(filterRule))) {
  1971. grammar.antlrTool.error("Filter rule " + filterRule + " does not exist in this lexer");
  1972. }
  1973. else {
  1974. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(CodeGenerator.encodeLexerRuleName(filterRule));
  1975. if (!rs.isDefined()) {
  1976. grammar.antlrTool.error("Filter rule " + filterRule + " does not exist in this lexer");
  1977. }
  1978. else if (rs.access.equals("public")) {
  1979. grammar.antlrTool.error("Filter rule " + filterRule + " must be protected");
  1980. }
  1981. }
  1982. println("int _m;");
  1983. println("_m = mark();");
  1984. }
  1985. }
  1986. println("resetText();");
  1987. println("try { // for char stream error handling");
  1988. tabs++;
  1989. // Generate try around whole thing to trap scanner errors
  1990. println("try { // for lexical error handling");
  1991. tabs++;
  1992. // Test for public lexical rules with empty paths
  1993. for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
  1994. Alternative a = nextTokenBlk.getAlternativeAt(i);
  1995. if (a.cache[1].containsEpsilon()) {
  1996. //String r = a.head.toString();
  1997. RuleRefElement rr = (RuleRefElement)a.head;
  1998. String r = CodeGenerator.decodeLexerRuleName(rr.targetRule);
  1999. antlrTool.warning("public lexical rule "+r+" is optional (can match \"nothing\")");
  2000. }
  2001. }
  2002. // Generate the block
  2003. String newline = System.getProperty("line.separator");
  2004. JavaBlockFinishingInfo howToFinish = genCommonBlock(nextTokenBlk, false);
  2005. String errFinish = "if (LA(1)==EOF_CHAR) {uponEOF(); _returnToken = makeToken(Token.EOF_TYPE);}";
  2006. errFinish += newline + "\t\t\t\t";
  2007. if (((LexerGrammar)grammar).filterMode) {
  2008. if (filterRule == null) {
  2009. errFinish += "else {consume(); continue tryAgain;}";
  2010. }
  2011. else {
  2012. errFinish += "else {" + newline +
  2013. "\t\t\t\t\tcommit();" + newline +
  2014. "\t\t\t\t\ttry {m" + filterRule + "(false);}" + newline +
  2015. "\t\t\t\t\tcatch(RecognitionException e) {" + newline +
  2016. "\t\t\t\t\t // catastrophic failure" + newline +
  2017. "\t\t\t\t\t reportError(e);" + newline +
  2018. "\t\t\t\t\t consume();" + newline +
  2019. "\t\t\t\t\t}" + newline +
  2020. "\t\t\t\t\tcontinue tryAgain;" + newline +
  2021. "\t\t\t\t}";
  2022. }
  2023. }
  2024. else {
  2025. errFinish += "else {" + throwNoViable + "}";
  2026. }
  2027. genBlockFinish(howToFinish, errFinish);
  2028. // at this point a valid token has been matched, undo "mark" that was done
  2029. if (((LexerGrammar)grammar).filterMode && filterRule != null) {
  2030. println("commit();");
  2031. }
  2032. // Generate literals test if desired
  2033. // make sure _ttype is set first; note _returnToken must be
  2034. // non-null as the rule was required to create it.
  2035. println("if ( _returnToken==null ) continue tryAgain; // found SKIP token");
  2036. println("_ttype = _returnToken.getType();");
  2037. if (((LexerGrammar)grammar).getTestLiterals()) {
  2038. genLiteralsTest();
  2039. }
  2040. // return token created by rule reference in switch
  2041. println("_returnToken.setType(_ttype);");
  2042. println("return _returnToken;");
  2043. // Close try block
  2044. tabs--;
  2045. println("}");
  2046. println("catch (RecognitionException e) {");
  2047. tabs++;
  2048. if (((LexerGrammar)grammar).filterMode) {
  2049. if (filterRule == null) {
  2050. println("if ( !getCommitToPath() ) {consume(); continue tryAgain;}");
  2051. }
  2052. else {
  2053. println("if ( !getCommitToPath() ) {");
  2054. tabs++;
  2055. println("rewind(_m);");
  2056. println("resetText();");
  2057. println("try {m" + filterRule + "(false);}");
  2058. println("catch(RecognitionException ee) {");
  2059. println(" // horrendous failure: error in filter rule");
  2060. println(" reportError(ee);");
  2061. println(" consume();");
  2062. println("}");
  2063. println("continue tryAgain;");
  2064. tabs--;
  2065. println("}");
  2066. }
  2067. }
  2068. if (nextTokenBlk.getDefaultErrorHandler()) {
  2069. println("reportError(e);");
  2070. println("consume();");
  2071. }
  2072. else {
  2073. // pass on to invoking routine
  2074. println("throw new TokenStreamRecognitionException(e);");
  2075. }
  2076. tabs--;
  2077. println("}");
  2078. // close CharStreamException try
  2079. tabs--;
  2080. println("}");
  2081. println("catch (CharStreamException cse) {");
  2082. println(" if ( cse instanceof CharStreamIOException ) {");
  2083. println(" throw new TokenStreamIOException(((CharStreamIOException)cse).io);");
  2084. println(" }");
  2085. println(" else {");
  2086. println(" throw new TokenStreamException(cse.getMessage());");
  2087. println(" }");
  2088. println("}");
  2089. // close for-loop
  2090. tabs--;
  2091. println("}");
  2092. // close method nextToken
  2093. tabs--;
  2094. println("}");
  2095. println("");
  2096. }
  2097. /** Gen a named rule block.
  2098. * ASTs are generated for each element of an alternative unless
  2099. * the rule or the alternative have a '!' modifier.
  2100. *
  2101. * If an alternative defeats the default tree construction, it
  2102. * must set <rule>_AST to the root of the returned AST.
  2103. *
  2104. * Each alternative that does automatic tree construction, builds
  2105. * up root and child list pointers in an ASTPair structure.
  2106. *
  2107. * A rule finishes by setting the returnAST variable from the
  2108. * ASTPair.
  2109. *
  2110. * @param rule The name of the rule to generate
  2111. * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
  2112. */
  2113. public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum) {
  2114. tabs = 1;
  2115. if (DEBUG_CODE_GENERATOR) System.out.println("genRule(" + s.getId() + ")");
  2116. if (!s.isDefined()) {
  2117. antlrTool.error("undefined rule: " + s.getId());
  2118. return;
  2119. }
  2120. // Generate rule return type, name, arguments
  2121. RuleBlock rblk = s.getBlock();
  2122. currentRule = rblk;
  2123. currentASTResult = s.getId();
  2124. // clear list of declared ast variables..
  2125. declaredASTVariables.clear();
  2126. // Save the AST generation state, and set it to that of the rule
  2127. boolean savegenAST = genAST;
  2128. genAST = genAST && rblk.getAutoGen();
  2129. // boolean oldsaveTest = saveText;
  2130. saveText = rblk.getAutoGen();
  2131. // print javadoc comment if any
  2132. if (s.comment != null) {
  2133. _println(s.comment);
  2134. }
  2135. // Gen method access and final qualifier
  2136. print(s.access + " final ");
  2137. // Gen method return type (note lexer return action set at rule creation)
  2138. if (rblk.returnAction != null) {
  2139. // Has specified return value
  2140. _print(extractTypeOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + " ");
  2141. }
  2142. else {
  2143. // No specified return value
  2144. _print("void ");
  2145. }
  2146. // Gen method name
  2147. _print(s.getId() + "(");
  2148. // Additional rule parameters common to all rules for this grammar
  2149. _print(commonExtraParams);
  2150. if (commonExtraParams.length() != 0 && rblk.argAction != null) {
  2151. _print(",");
  2152. }
  2153. // Gen arguments
  2154. if (rblk.argAction != null) {
  2155. // Has specified arguments
  2156. _println("");
  2157. tabs++;
  2158. println(rblk.argAction);
  2159. tabs--;
  2160. print(")");
  2161. }
  2162. else {
  2163. // No specified arguments
  2164. _print(")");
  2165. }
  2166. // Gen throws clause and open curly
  2167. _print(" throws " + exceptionThrown);
  2168. if (grammar instanceof ParserGrammar) {
  2169. _print(", TokenStreamException");
  2170. }
  2171. else if (grammar instanceof LexerGrammar) {
  2172. _print(", CharStreamException, TokenStreamException");
  2173. }
  2174. // Add user-defined exceptions unless lexer (for now)
  2175. if (rblk.throwsSpec != null) {
  2176. if (grammar instanceof LexerGrammar) {
  2177. antlrTool.error("user-defined throws spec not allowed (yet) for lexer rule " + rblk.ruleName);
  2178. }
  2179. else {
  2180. _print(", " + rblk.throwsSpec);
  2181. }
  2182. }
  2183. _println(" {");
  2184. tabs++;
  2185. // Convert return action to variable declaration
  2186. if (rblk.returnAction != null)
  2187. println(rblk.returnAction + ";");
  2188. // print out definitions needed by rules for various grammar types
  2189. println(commonLocalVars);
  2190. if (grammar.traceRules) {
  2191. if (grammar instanceof TreeWalkerGrammar) {
  2192. println("traceIn(\"" + s.getId() + "\",_t);");
  2193. }
  2194. else {
  2195. println("traceIn(\"" + s.getId() + "\");");
  2196. }
  2197. }
  2198. if (grammar instanceof LexerGrammar) {
  2199. // lexer rule default return value is the rule's token name
  2200. // This is a horrible hack to support the built-in EOF lexer rule.
  2201. if (s.getId().equals("mEOF"))
  2202. println("_ttype = Token.EOF_TYPE;");
  2203. else
  2204. println("_ttype = " + s.getId().substring(1) + ";");
  2205. println("int _saveIndex;"); // used for element! (so we can kill text matched for element)
  2206. /*
  2207. println("boolean old_saveConsumedInput=saveConsumedInput;");
  2208. if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
  2209. println("saveConsumedInput=false;");
  2210. }
  2211. */
  2212. }
  2213. // if debugging, write code to mark entry to the rule
  2214. if (grammar.debuggingOutput)
  2215. if (grammar instanceof ParserGrammar)
  2216. println("fireEnterRule(" + ruleNum + ",0);");
  2217. else if (grammar instanceof LexerGrammar)
  2218. println("fireEnterRule(" + ruleNum + ",_ttype);");
  2219. // Generate trace code if desired
  2220. if (grammar.debuggingOutput || grammar.traceRules) {
  2221. println("try { // debugging");
  2222. tabs++;
  2223. }
  2224. // Initialize AST variables
  2225. if (grammar instanceof TreeWalkerGrammar) {
  2226. // "Input" value for rule
  2227. println(labeledElementASTType + " " + s.getId() + "_AST_in = (_t == ASTNULL) ? null : (" + labeledElementASTType + ")_t;");
  2228. }
  2229. if (grammar.buildAST) {
  2230. // Parser member used to pass AST returns from rule invocations
  2231. println("returnAST = null;");
  2232. // Tracks AST construction
  2233. // println("ASTPair currentAST = (inputState.guessing==0) ? new ASTPair() : null;");
  2234. println("ASTPair currentAST = new ASTPair();");
  2235. // User-settable return value for rule.
  2236. println(labeledElementASTType + " " + s.getId() + "_AST = null;");
  2237. }
  2238. genBlockPreamble(rblk);
  2239. genBlockInitAction(rblk);
  2240. println("");
  2241. // Search for an unlabeled exception specification attached to the rule
  2242. ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
  2243. // Generate try block around the entire rule for error handling
  2244. if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
  2245. println("try { // for error handling");
  2246. tabs++;
  2247. }
  2248. // Generate the alternatives
  2249. if (rblk.alternatives.size() == 1) {
  2250. // One alternative -- use simple form
  2251. Alternative alt = rblk.getAlternativeAt(0);
  2252. String pred = alt.semPred;
  2253. if (pred != null)
  2254. genSemPred(pred, currentRule.line);
  2255. if (alt.synPred != null) {
  2256. antlrTool.warning(
  2257. "Syntactic predicate ignored for single alternative",
  2258. grammar.getFilename(),
  2259. alt.synPred.getLine(),
  2260. alt.synPred.getColumn()
  2261. );
  2262. }
  2263. genAlt(alt, rblk);
  2264. }
  2265. else {
  2266. // Multiple alternatives -- generate complex form
  2267. boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
  2268. JavaBlockFinishingInfo howToFinish = genCommonBlock(rblk, false);
  2269. genBlockFinish(howToFinish, throwNoViable);
  2270. }
  2271. // Generate catch phrase for error handling
  2272. if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
  2273. // Close the try block
  2274. tabs--;
  2275. println("}");
  2276. }
  2277. // Generate user-defined or default catch phrases
  2278. if (unlabeledUserSpec != null) {
  2279. genErrorHandler(unlabeledUserSpec);
  2280. }
  2281. else if (rblk.getDefaultErrorHandler()) {
  2282. // Generate default catch phrase
  2283. println("catch (" + exceptionThrown + " ex) {");
  2284. tabs++;
  2285. // Generate code to handle error if not guessing
  2286. if (grammar.hasSyntacticPredicate) {
  2287. println("if (inputState.guessing==0) {");
  2288. tabs++;
  2289. }
  2290. println("reportError(ex);");
  2291. if (!(grammar instanceof TreeWalkerGrammar)) {
  2292. // Generate code to consume until token in k==1 follow set
  2293. Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, rblk.endNode);
  2294. String followSetName = getBitsetName(markBitsetForGen(follow.fset));
  2295. println("recover(ex," + followSetName + ");");
  2296. }
  2297. else {
  2298. // Just consume one token
  2299. println("if (_t!=null) {_t = _t.getNextSibling();}");
  2300. }
  2301. if (grammar.hasSyntacticPredicate) {
  2302. tabs--;
  2303. // When guessing, rethrow exception
  2304. println("} else {");
  2305. println(" throw ex;");
  2306. println("}");
  2307. }
  2308. // Close catch phrase
  2309. tabs--;
  2310. println("}");
  2311. }
  2312. // Squirrel away the AST "return" value
  2313. if (grammar.buildAST) {
  2314. println("returnAST = " + s.getId() + "_AST;");
  2315. }
  2316. // Set return tree value for tree walkers
  2317. if (grammar instanceof TreeWalkerGrammar) {
  2318. println("_retTree = _t;");
  2319. }
  2320. // Generate literals test for lexer rules so marked
  2321. if (rblk.getTestLiterals()) {
  2322. if (s.access.equals("protected")) {
  2323. genLiteralsTestForPartialToken();
  2324. }
  2325. else {
  2326. genLiteralsTest();
  2327. }
  2328. }
  2329. // if doing a lexer rule, dump code to create token if necessary
  2330. if (grammar instanceof LexerGrammar) {
  2331. println("if ( _createToken && _token==null && _ttype!=Token.SKIP ) {");
  2332. println(" _token = makeToken(_ttype);");
  2333. println(" _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));");
  2334. println("}");
  2335. println("_returnToken = _token;");
  2336. }
  2337. // Gen the return statement if there is one (lexer has hard-wired return action)
  2338. if (rblk.returnAction != null) {
  2339. println("return " + extractIdOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + ";");
  2340. }
  2341. if (grammar.debuggingOutput || grammar.traceRules) {
  2342. tabs--;
  2343. println("} finally { // debugging");
  2344. tabs++;
  2345. // If debugging, generate calls to mark exit of rule
  2346. if (grammar.debuggingOutput)
  2347. if (grammar instanceof ParserGrammar)
  2348. println("fireExitRule(" + ruleNum + ",0);");
  2349. else if (grammar instanceof LexerGrammar)
  2350. println("fireExitRule(" + ruleNum + ",_ttype);");
  2351. if (grammar.traceRules) {
  2352. if (grammar instanceof TreeWalkerGrammar) {
  2353. println("traceOut(\"" + s.getId() + "\",_t);");
  2354. }
  2355. else {
  2356. println("traceOut(\"" + s.getId() + "\");");
  2357. }
  2358. }
  2359. tabs--;
  2360. println("}");
  2361. }
  2362. tabs--;
  2363. println("}");
  2364. println("");
  2365. // Restore the AST generation state
  2366. genAST = savegenAST;
  2367. // restore char save state
  2368. // saveText = oldsaveTest;
  2369. }
  2370. private void GenRuleInvocation(RuleRefElement rr) {
  2371. // dump rule name
  2372. _print(rr.targetRule + "(");
  2373. // lexers must tell rule if it should set _returnToken
  2374. if (grammar instanceof LexerGrammar) {
  2375. // if labeled, could access Token, so tell rule to create
  2376. if (rr.getLabel() != null) {
  2377. _print("true");
  2378. }
  2379. else {
  2380. _print("false");
  2381. }
  2382. if (commonExtraArgs.length() != 0 || rr.args != null) {
  2383. _print(",");
  2384. }
  2385. }
  2386. // Extra arguments common to all rules for this grammar
  2387. _print(commonExtraArgs);
  2388. if (commonExtraArgs.length() != 0 && rr.args != null) {
  2389. _print(",");
  2390. }
  2391. // Process arguments to method, if any
  2392. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
  2393. if (rr.args != null) {
  2394. // When not guessing, execute user arg action
  2395. ActionTransInfo tInfo = new ActionTransInfo();
  2396. String args = processActionForSpecialSymbols(rr.args, 0, currentRule, tInfo);
  2397. if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
  2398. antlrTool.error("Arguments of rule reference '" + rr.targetRule + "' cannot set or ref #" +
  2399. currentRule.getRuleName(), grammar.getFilename(), rr.getLine(), rr.getColumn());
  2400. }
  2401. _print(args);
  2402. // Warn if the rule accepts no arguments
  2403. if (rs.block.argAction == null) {
  2404. antlrTool.warning("Rule '" + rr.targetRule + "' accepts no arguments", grammar.getFilename(), rr.getLine(), rr.getColumn());
  2405. }
  2406. }
  2407. else {
  2408. // For C++, no warning if rule has parameters, because there may be default
  2409. // values for all of the parameters
  2410. if (rs.block.argAction != null) {
  2411. antlrTool.warning("Missing parameters on reference to rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
  2412. }
  2413. }
  2414. _println(");");
  2415. // move down to the first child while parsing
  2416. if (grammar instanceof TreeWalkerGrammar) {
  2417. println("_t = _retTree;");
  2418. }
  2419. }
  2420. protected void genSemPred(String pred, int line) {
  2421. // translate $ and # references
  2422. ActionTransInfo tInfo = new ActionTransInfo();
  2423. pred = processActionForSpecialSymbols(pred, line, currentRule, tInfo);
  2424. // ignore translation info...we don't need to do anything with it.
  2425. String escapedPred = charFormatter.escapeString(pred);
  2426. // if debugging, wrap the semantic predicate evaluation in a method
  2427. // that can tell SemanticPredicateListeners the result
  2428. if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
  2429. pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING,"
  2430. + addSemPred(escapedPred) + "," + pred + ")";
  2431. println("if (!(" + pred + "))");
  2432. println(" throw new SemanticException(\"" + escapedPred + "\");");
  2433. }
  2434. /** Write an array of Strings which are the semantic predicate
  2435. * expressions. The debugger will reference them by number only
  2436. */
  2437. protected void genSemPredMap() {
  2438. Enumeration e = semPreds.elements();
  2439. println("private String _semPredNames[] = {");
  2440. while (e.hasMoreElements())
  2441. println("\"" + e.nextElement() + "\",");
  2442. println("};");
  2443. }
  2444. protected void genSynPred(SynPredBlock blk, String lookaheadExpr) {
  2445. if (DEBUG_CODE_GENERATOR) System.out.println("gen=>(" + blk + ")");
  2446. // Dump synpred result variable
  2447. println("boolean synPredMatched" + blk.ID + " = false;");
  2448. // Gen normal lookahead test
  2449. println("if (" + lookaheadExpr + ") {");
  2450. tabs++;
  2451. // Save input state
  2452. if (grammar instanceof TreeWalkerGrammar) {
  2453. println("AST __t" + blk.ID + " = _t;");
  2454. }
  2455. else {
  2456. println("int _m" + blk.ID + " = mark();");
  2457. }
  2458. // Once inside the try, assume synpred works unless exception caught
  2459. println("synPredMatched" + blk.ID + " = true;");
  2460. println("inputState.guessing++;");
  2461. // if debugging, tell listeners that a synpred has started
  2462. if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) ||
  2463. (grammar instanceof LexerGrammar))) {
  2464. println("fireSyntacticPredicateStarted();");
  2465. }
  2466. syntacticPredLevel++;
  2467. println("try {");
  2468. tabs++;
  2469. gen((AlternativeBlock)blk); // gen code to test predicate
  2470. tabs--;
  2471. //println("System.out.println(\"pred "+blk+" succeeded\");");
  2472. println("}");
  2473. println("catch (" + exceptionThrown + " pe) {");
  2474. tabs++;
  2475. println("synPredMatched" + blk.ID + " = false;");
  2476. //println("System.out.println(\"pred "+blk+" failed\");");
  2477. tabs--;
  2478. println("}");
  2479. // Restore input state
  2480. if (grammar instanceof TreeWalkerGrammar) {
  2481. println("_t = __t" + blk.ID + ";");
  2482. }
  2483. else {
  2484. println("rewind(_m" + blk.ID + ");");
  2485. }
  2486. println("inputState.guessing--;");
  2487. // if debugging, tell listeners how the synpred turned out
  2488. if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) ||
  2489. (grammar instanceof LexerGrammar))) {
  2490. println("if (synPredMatched" + blk.ID + ")");
  2491. println(" fireSyntacticPredicateSucceeded();");
  2492. println("else");
  2493. println(" fireSyntacticPredicateFailed();");
  2494. }
  2495. syntacticPredLevel--;
  2496. tabs--;
  2497. // Close lookahead test
  2498. println("}");
  2499. // Test synred result
  2500. println("if ( synPredMatched" + blk.ID + " ) {");
  2501. }
  2502. /** Generate a static array containing the names of the tokens,
  2503. * indexed by the token type values. This static array is used
  2504. * to format error messages so that the token identifers or literal
  2505. * strings are displayed instead of the token numbers.
  2506. *
  2507. * If a lexical rule has a paraphrase, use it rather than the
  2508. * token label.
  2509. */
  2510. public void genTokenStrings() {
  2511. // Generate a string for each token. This creates a static
  2512. // array of Strings indexed by token type.
  2513. println("");
  2514. println("public static final String[] _tokenNames = {");
  2515. tabs++;
  2516. // Walk the token vocabulary and generate a Vector of strings
  2517. // from the tokens.
  2518. Vector v = grammar.tokenManager.getVocabulary();
  2519. for (int i = 0; i < v.size(); i++) {
  2520. String s = (String)v.elementAt(i);
  2521. if (s == null) {
  2522. s = "<" + String.valueOf(i) + ">";
  2523. }
  2524. if (!s.startsWith("\"") && !s.startsWith("<")) {
  2525. TokenSymbol ts = (TokenSymbol)grammar.tokenManager.getTokenSymbol(s);
  2526. if (ts != null && ts.getParaphrase() != null) {
  2527. s = StringUtils.stripFrontBack(ts.getParaphrase(), "\"", "\"");
  2528. }
  2529. }
  2530. print(charFormatter.literalString(s));
  2531. if (i != v.size() - 1) {
  2532. _print(",");
  2533. }
  2534. _println("");
  2535. }
  2536. // Close the string array initailizer
  2537. tabs--;
  2538. println("};");
  2539. }
  2540. /** Create and set Integer token type objects that map
  2541. * to Java Class objects (which AST node to create).
  2542. */
  2543. protected void genTokenASTNodeMap() {
  2544. println("");
  2545. println("protected void buildTokenTypeASTClassMap() {");
  2546. // Generate a map.put("T","TNode") for each token
  2547. // if heterogeneous node known for that token T.
  2548. tabs++;
  2549. boolean generatedNewHashtable = false;
  2550. int n = 0;
  2551. // Walk the token vocabulary and generate puts.
  2552. Vector v = grammar.tokenManager.getVocabulary();
  2553. for (int i = 0; i < v.size(); i++) {
  2554. String s = (String)v.elementAt(i);
  2555. if (s != null) {
  2556. TokenSymbol ts = grammar.tokenManager.getTokenSymbol(s);
  2557. if (ts != null && ts.getASTNodeType() != null) {
  2558. n++;
  2559. if ( !generatedNewHashtable ) {
  2560. // only generate if we are going to add a mapping
  2561. println("tokenTypeToASTClassMap = new Hashtable();");
  2562. generatedNewHashtable = true;
  2563. }
  2564. println("tokenTypeToASTClassMap.put(new Integer("+ts.getTokenType()+"), "+
  2565. ts.getASTNodeType()+".class);");
  2566. }
  2567. }
  2568. }
  2569. if ( n==0 ) {
  2570. println("tokenTypeToASTClassMap=null;");
  2571. }
  2572. tabs--;
  2573. println("};");
  2574. }
  2575. /** Generate the token types Java file */
  2576. protected void genTokenTypes(TokenManager tm) throws IOException {
  2577. // Open the token output Java file and set the currentOutput stream
  2578. // SAS: file open was moved to a method so a subclass can override
  2579. // This was mainly for the VAJ interface
  2580. setupOutput(tm.getName() + TokenTypesFileSuffix);
  2581. tabs = 0;
  2582. // Generate the header common to all Java files
  2583. genHeader();
  2584. // Do not use printAction because we assume tabs==0
  2585. println(behavior.getHeaderAction(""));
  2586. // Encapsulate the definitions in an interface. This can be done
  2587. // because they are all constants.
  2588. println("public interface " + tm.getName() + TokenTypesFileSuffix + " {");
  2589. tabs++;
  2590. // Generate a definition for each token type
  2591. Vector v = tm.getVocabulary();
  2592. // Do special tokens manually
  2593. println("int EOF = " + Token.EOF_TYPE + ";");
  2594. println("int NULL_TREE_LOOKAHEAD = " + Token.NULL_TREE_LOOKAHEAD + ";");
  2595. for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
  2596. String s = (String)v.elementAt(i);
  2597. if (s != null) {
  2598. if (s.startsWith("\"")) {
  2599. // a string literal
  2600. StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s);
  2601. if (sl == null) {
  2602. antlrTool.panic("String literal " + s + " not in symbol table");
  2603. }
  2604. else if (sl.label != null) {
  2605. println("int " + sl.label + " = " + i + ";");
  2606. }
  2607. else {
  2608. String mangledName = mangleLiteral(s);
  2609. if (mangledName != null) {
  2610. // We were able to create a meaningful mangled token name
  2611. println("int " + mangledName + " = " + i + ";");
  2612. // if no label specified, make the label equal to the mangled name
  2613. sl.label = mangledName;
  2614. }
  2615. else {
  2616. println("// " + s + " = " + i);
  2617. }
  2618. }
  2619. }
  2620. else if (!s.startsWith("<")) {
  2621. println("int " + s + " = " + i + ";");
  2622. }
  2623. }
  2624. }
  2625. // Close the interface
  2626. tabs--;
  2627. println("}");
  2628. // Close the tokens output file
  2629. currentOutput.close();
  2630. currentOutput = null;
  2631. exitIfError();
  2632. }
  2633. /** Get a string for an expression to generate creation of an AST subtree.
  2634. * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
  2635. */
  2636. public String getASTCreateString(Vector v) {
  2637. if (v.size() == 0) {
  2638. return "";
  2639. }
  2640. StringBuffer buf = new StringBuffer();
  2641. buf.append("(" + labeledElementASTType +
  2642. ")astFactory.make( (new ASTArray(" + v.size() +
  2643. "))");
  2644. for (int i = 0; i < v.size(); i++) {
  2645. buf.append(".add(" + v.elementAt(i) + ")");
  2646. }
  2647. buf.append(")");
  2648. return buf.toString();
  2649. }
  2650. /** Get a string for an expression to generate creating of an AST node
  2651. * @param atom The grammar node for which you are creating the node
  2652. * @param str The arguments to the AST constructor
  2653. */
  2654. public String getASTCreateString(GrammarAtom atom, String astCtorArgs) {
  2655. //System.out.println("getASTCreateString("+atom+","+astCtorArgs+")");
  2656. if (atom != null && atom.getASTNodeType() != null) {
  2657. // they specified a type either on the reference or in tokens{} section
  2658. return "("+atom.getASTNodeType()+")"+
  2659. "astFactory.create("+astCtorArgs+",\""+atom.getASTNodeType()+"\")";
  2660. }
  2661. else {
  2662. // must be an action or something since not referencing an atom
  2663. return getASTCreateString(astCtorArgs);
  2664. }
  2665. }
  2666. /** Get a string for an expression to generate creating of an AST node.
  2667. * Parse the first (possibly only) argument looking for the token type.
  2668. * If the token type is a valid token symbol, ask for it's AST node type
  2669. * and add to the end if only 2 arguments. The forms are #[T], #[T,"t"],
  2670. * and as of 2.7.2 #[T,"t",ASTclassname].
  2671. *
  2672. * @param str The arguments to the AST constructor
  2673. */
  2674. public String getASTCreateString(String astCtorArgs) {
  2675. //System.out.println("AST CTOR: "+astCtorArgs);
  2676. if ( astCtorArgs==null ) {
  2677. astCtorArgs = "";
  2678. }
  2679. int nCommas = 0;
  2680. for (int i=0; i<astCtorArgs.length(); i++) {
  2681. if ( astCtorArgs.charAt(i)==',' ) {
  2682. nCommas++;
  2683. }
  2684. }
  2685. //System.out.println("num commas="+nCommas);
  2686. if ( nCommas<2 ) { // if 1 or 2 args
  2687. int firstComma = astCtorArgs.indexOf(',');
  2688. int lastComma = astCtorArgs.lastIndexOf(',');
  2689. String tokenName = astCtorArgs;
  2690. if ( nCommas>0 ) {
  2691. tokenName = astCtorArgs.substring(0,firstComma);
  2692. }
  2693. //System.out.println("Checking for ast node type of "+tokenName);
  2694. TokenSymbol ts = grammar.tokenManager.getTokenSymbol(tokenName);
  2695. if ( ts!=null ) {
  2696. String astNodeType = ts.getASTNodeType();
  2697. //System.out.println("node type of "+tokenName+" is "+astNodeType);
  2698. String emptyText = "";
  2699. if ( nCommas==0 ) {
  2700. // need to add 2nd arg of blank text for token text
  2701. emptyText = ",\"\"";
  2702. }
  2703. if ( astNodeType!=null ) {
  2704. return "("+astNodeType+")"+
  2705. "astFactory.create("+astCtorArgs+emptyText+",\""+astNodeType+"\")";
  2706. }
  2707. // fall through and just do a regular create with cast on front
  2708. // if necessary (it differs from default "AST").
  2709. }
  2710. if ( labeledElementASTType.equals("AST") ) {
  2711. return "astFactory.create("+astCtorArgs+")";
  2712. }
  2713. return "("+labeledElementASTType+")"+
  2714. "astFactory.create("+astCtorArgs+")";
  2715. }
  2716. // create default type or (since 2.7.2) 3rd arg is classname
  2717. return "(" + labeledElementASTType + ")astFactory.create(" + astCtorArgs + ")";
  2718. }
  2719. protected String getLookaheadTestExpression(Lookahead[] look, int k) {
  2720. StringBuffer e = new StringBuffer(100);
  2721. boolean first = true;
  2722. e.append("(");
  2723. for (int i = 1; i <= k; i++) {
  2724. BitSet p = look[i].fset;
  2725. if (!first) {
  2726. e.append(") && (");
  2727. }
  2728. first = false;
  2729. // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
  2730. // There is no way to predict what that token would be. Just
  2731. // allow anything instead.
  2732. if (look[i].containsEpsilon()) {
  2733. e.append("true");
  2734. }
  2735. else {
  2736. e.append(getLookaheadTestTerm(i, p));
  2737. }
  2738. }
  2739. e.append(")");
  2740. return e.toString();
  2741. }
  2742. /**Generate a lookahead test expression for an alternate. This
  2743. * will be a series of tests joined by '&&' and enclosed by '()',
  2744. * the number of such tests being determined by the depth of the lookahead.
  2745. */
  2746. protected String getLookaheadTestExpression(Alternative alt, int maxDepth) {
  2747. int depth = alt.lookaheadDepth;
  2748. if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
  2749. // if the decision is nondeterministic, do the best we can: LL(k)
  2750. // any predicates that are around will be generated later.
  2751. depth = grammar.maxk;
  2752. }
  2753. if (maxDepth == 0) {
  2754. // empty lookahead can result from alt with sem pred
  2755. // that can see end of token. E.g., A : {pred}? ('a')? ;
  2756. return "( true )";
  2757. }
  2758. return "(" + getLookaheadTestExpression(alt.cache, depth) + ")";
  2759. }
  2760. /**Generate a depth==1 lookahead test expression given the BitSet.
  2761. * This may be one of:
  2762. * 1) a series of 'x==X||' tests
  2763. * 2) a range test using >= && <= where possible,
  2764. * 3) a bitset membership test for complex comparisons
  2765. * @param k The lookahead level
  2766. * @param p The lookahead set for level k
  2767. */
  2768. protected String getLookaheadTestTerm(int k, BitSet p) {
  2769. // Determine the name of the item to be compared
  2770. String ts = lookaheadString(k);
  2771. // Generate a range expression if possible
  2772. int[] elems = p.toArray();
  2773. if (elementsAreRange(elems)) {
  2774. return getRangeExpression(k, elems);
  2775. }
  2776. // Generate a bitset membership test if possible
  2777. StringBuffer e;
  2778. int degree = p.degree();
  2779. if (degree == 0) {
  2780. return "true";
  2781. }
  2782. if (degree >= bitsetTestThreshold) {
  2783. int bitsetIdx = markBitsetForGen(p);
  2784. return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
  2785. }
  2786. // Otherwise, generate the long-winded series of "x==X||" tests
  2787. e = new StringBuffer();
  2788. for (int i = 0; i < elems.length; i++) {
  2789. // Get the compared-to item (token or character value)
  2790. String cs = getValueString(elems[i]);
  2791. // Generate the element comparison
  2792. if (i > 0) e.append("||");
  2793. e.append(ts);
  2794. e.append("==");
  2795. e.append(cs);
  2796. }
  2797. return e.toString();
  2798. }
  2799. /** Return an expression for testing a contiguous renage of elements
  2800. * @param k The lookahead level
  2801. * @param elems The elements representing the set, usually from BitSet.toArray().
  2802. * @return String containing test expression.
  2803. */
  2804. public String getRangeExpression(int k, int[] elems) {
  2805. if (!elementsAreRange(elems)) {
  2806. antlrTool.panic("getRangeExpression called with non-range");
  2807. }
  2808. int begin = elems[0];
  2809. int end = elems[elems.length - 1];
  2810. return
  2811. "(" + lookaheadString(k) + " >= " + getValueString(begin) + " && " +
  2812. lookaheadString(k) + " <= " + getValueString(end) + ")";
  2813. }
  2814. /** getValueString: get a string representation of a token or char value
  2815. * @param value The token or char value
  2816. */
  2817. private String getValueString(int value) {
  2818. String cs;
  2819. if (grammar instanceof LexerGrammar) {
  2820. cs = charFormatter.literalChar(value);
  2821. }
  2822. else {
  2823. TokenSymbol ts = grammar.tokenManager.getTokenSymbolAt(value);
  2824. if (ts == null) {
  2825. return "" + value; // return token type as string
  2826. // tool.panic("vocabulary for token type " + value + " is null");
  2827. }
  2828. String tId = ts.getId();
  2829. if (ts instanceof StringLiteralSymbol) {
  2830. // if string literal, use predefined label if any
  2831. // if no predefined, try to mangle into LITERAL_xxx.
  2832. // if can't mangle, use int value as last resort
  2833. StringLiteralSymbol sl = (StringLiteralSymbol)ts;
  2834. String label = sl.getLabel();
  2835. if (label != null) {
  2836. cs = label;
  2837. }
  2838. else {
  2839. cs = mangleLiteral(tId);
  2840. if (cs == null) {
  2841. cs = String.valueOf(value);
  2842. }
  2843. }
  2844. }
  2845. else {
  2846. cs = tId;
  2847. }
  2848. }
  2849. return cs;
  2850. }
  2851. /**Is the lookahead for this alt empty? */
  2852. protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
  2853. int depth = alt.lookaheadDepth;
  2854. if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
  2855. depth = grammar.maxk;
  2856. }
  2857. for (int i = 1; i <= depth && i <= maxDepth; i++) {
  2858. BitSet p = alt.cache[i].fset;
  2859. if (p.degree() != 0) {
  2860. return false;
  2861. }
  2862. }
  2863. return true;
  2864. }
  2865. private String lookaheadString(int k) {
  2866. if (grammar instanceof TreeWalkerGrammar) {
  2867. return "_t.getType()";
  2868. }
  2869. return "LA(" + k + ")";
  2870. }
  2871. /** Mangle a string literal into a meaningful token name. This is
  2872. * only possible for literals that are all characters. The resulting
  2873. * mangled literal name is literalsPrefix with the text of the literal
  2874. * appended.
  2875. * @return A string representing the mangled literal, or null if not possible.
  2876. */
  2877. private String mangleLiteral(String s) {
  2878. String mangled = antlrTool.literalsPrefix;
  2879. for (int i = 1; i < s.length() - 1; i++) {
  2880. if (!Character.isLetter(s.charAt(i)) &&
  2881. s.charAt(i) != '_') {
  2882. return null;
  2883. }
  2884. mangled += s.charAt(i);
  2885. }
  2886. if (antlrTool.upperCaseMangledLiterals) {
  2887. mangled = mangled.toUpperCase();
  2888. }
  2889. return mangled;
  2890. }
  2891. /** Map an identifier to it's corresponding tree-node variable.
  2892. * This is context-sensitive, depending on the rule and alternative
  2893. * being generated
  2894. * @param idParam The identifier name to map
  2895. * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
  2896. */
  2897. public String mapTreeId(String idParam, ActionTransInfo transInfo) {
  2898. // if not in an action of a rule, nothing to map.
  2899. if (currentRule == null) return idParam;
  2900. boolean in_var = false;
  2901. String id = idParam;
  2902. if (grammar instanceof TreeWalkerGrammar) {
  2903. if (!grammar.buildAST) {
  2904. in_var = true;
  2905. }
  2906. // If the id ends with "_in", then map it to the input variable
  2907. else if (id.length() > 3 && id.lastIndexOf("_in") == id.length() - 3) {
  2908. // Strip off the "_in"
  2909. id = id.substring(0, id.length() - 3);
  2910. in_var = true;
  2911. }
  2912. }
  2913. // Check the rule labels. If id is a label, then the output
  2914. // variable is label_AST, and the input variable is plain label.
  2915. for (int i = 0; i < currentRule.labeledElements.size(); i++) {
  2916. AlternativeElement elt = (AlternativeElement)currentRule.labeledElements.elementAt(i);
  2917. if (elt.getLabel().equals(id)) {
  2918. return in_var ? id : id + "_AST";
  2919. }
  2920. }
  2921. // Failing that, check the id-to-variable map for the alternative.
  2922. // If the id is in the map, then output variable is the name in the
  2923. // map, and input variable is name_in
  2924. String s = (String)treeVariableMap.get(id);
  2925. if (s != null) {
  2926. if (s == NONUNIQUE) {
  2927. // There is more than one element with this id
  2928. antlrTool.error("Ambiguous reference to AST element "+id+
  2929. " in rule "+currentRule.getRuleName());
  2930. return null;
  2931. }
  2932. else if (s.equals(currentRule.getRuleName())) {
  2933. // a recursive call to the enclosing rule is
  2934. // ambiguous with the rule itself.
  2935. antlrTool.error("Ambiguous reference to AST element "+id+
  2936. " in rule "+currentRule.getRuleName());
  2937. return null;
  2938. }
  2939. else {
  2940. return in_var ? s + "_in" : s;
  2941. }
  2942. }
  2943. // Failing that, check the rule name itself. Output variable
  2944. // is rule_AST; input variable is rule_AST_in (treeparsers).
  2945. if (id.equals(currentRule.getRuleName())) {
  2946. String r = in_var ? id + "_AST_in" : id + "_AST";
  2947. if (transInfo != null) {
  2948. if (!in_var) {
  2949. transInfo.refRuleRoot = r;
  2950. }
  2951. }
  2952. return r;
  2953. }
  2954. else {
  2955. // id does not map to anything -- return itself.
  2956. return id;
  2957. }
  2958. }
  2959. /** Given an element and the name of an associated AST variable,
  2960. * create a mapping between the element "name" and the variable name.
  2961. */
  2962. private void mapTreeVariable(AlternativeElement e, String name) {
  2963. // For tree elements, defer to the root
  2964. if (e instanceof TreeElement) {
  2965. mapTreeVariable(((TreeElement)e).root, name);
  2966. return;
  2967. }
  2968. // Determine the name of the element, if any, for mapping purposes
  2969. String elName = null;
  2970. // Don't map labeled items
  2971. if (e.getLabel() == null) {
  2972. if (e instanceof TokenRefElement) {
  2973. // use the token id
  2974. elName = ((TokenRefElement)e).atomText;
  2975. }
  2976. else if (e instanceof RuleRefElement) {
  2977. // use the rule name
  2978. elName = ((RuleRefElement)e).targetRule;
  2979. }
  2980. }
  2981. // Add the element to the tree variable map if it has a name
  2982. if (elName != null) {
  2983. if (treeVariableMap.get(elName) != null) {
  2984. // Name is already in the map -- mark it as duplicate
  2985. treeVariableMap.remove(elName);
  2986. treeVariableMap.put(elName, NONUNIQUE);
  2987. }
  2988. else {
  2989. treeVariableMap.put(elName, name);
  2990. }
  2991. }
  2992. }
  2993. /** Lexically process $var and tree-specifiers in the action.
  2994. * This will replace #id and #(...) with the appropriate
  2995. * function calls and/or variables etc...
  2996. */
  2997. protected String processActionForSpecialSymbols(String actionStr,
  2998. int line,
  2999. RuleBlock currentRule,
  3000. ActionTransInfo tInfo) {
  3001. if (actionStr == null || actionStr.length() == 0) return null;
  3002. // The action trans info tells us (at the moment) whether an
  3003. // assignment was done to the rule's tree root.
  3004. if (grammar == null)
  3005. return actionStr;
  3006. // see if we have anything to do...
  3007. if ((grammar.buildAST && actionStr.indexOf('#') != -1) ||
  3008. grammar instanceof TreeWalkerGrammar ||
  3009. ((grammar instanceof LexerGrammar ||
  3010. grammar instanceof ParserGrammar)
  3011. && actionStr.indexOf('$') != -1)) {
  3012. // Create a lexer to read an action and return the translated version
  3013. antlr.actions.java.ActionLexer lexer =
  3014. new antlr.actions.java.ActionLexer(actionStr,
  3015. currentRule,
  3016. this,
  3017. tInfo);
  3018. lexer.setLineOffset(line);
  3019. lexer.setFilename(grammar.getFilename());
  3020. lexer.setTool(antlrTool);
  3021. try {
  3022. lexer.mACTION(true);
  3023. actionStr = lexer.getTokenObject().getText();
  3024. // System.out.println("action translated: "+actionStr);
  3025. // System.out.println("trans info is "+tInfo);
  3026. }
  3027. catch (RecognitionException ex) {
  3028. lexer.reportError(ex);
  3029. return actionStr;
  3030. }
  3031. catch (TokenStreamException tex) {
  3032. antlrTool.panic("Error reading action:" + actionStr);
  3033. return actionStr;
  3034. }
  3035. catch (CharStreamException io) {
  3036. antlrTool.panic("Error reading action:" + actionStr);
  3037. return actionStr;
  3038. }
  3039. }
  3040. return actionStr;
  3041. }
  3042. private void setupGrammarParameters(Grammar g) {
  3043. if (g instanceof ParserGrammar) {
  3044. labeledElementASTType = "AST";
  3045. if (g.hasOption("ASTLabelType")) {
  3046. Token tsuffix = g.getOption("ASTLabelType");
  3047. if (tsuffix != null) {
  3048. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  3049. if (suffix != null) {
  3050. labeledElementASTType = suffix;
  3051. }
  3052. }
  3053. }
  3054. labeledElementType = "Token ";
  3055. labeledElementInit = "null";
  3056. commonExtraArgs = "";
  3057. commonExtraParams = "";
  3058. commonLocalVars = "";
  3059. lt1Value = "LT(1)";
  3060. exceptionThrown = "RecognitionException";
  3061. throwNoViable = "throw new NoViableAltException(LT(1), getFilename());";
  3062. }
  3063. else if (g instanceof LexerGrammar) {
  3064. labeledElementType = "char ";
  3065. labeledElementInit = "'\\0'";
  3066. commonExtraArgs = "";
  3067. commonExtraParams = "boolean _createToken";
  3068. commonLocalVars = "int _ttype; Token _token=null; int _begin=text.length();";
  3069. lt1Value = "LA(1)";
  3070. exceptionThrown = "RecognitionException";
  3071. throwNoViable = "throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());";
  3072. }
  3073. else if (g instanceof TreeWalkerGrammar) {
  3074. labeledElementASTType = "AST";
  3075. labeledElementType = "AST";
  3076. if (g.hasOption("ASTLabelType")) {
  3077. Token tsuffix = g.getOption("ASTLabelType");
  3078. if (tsuffix != null) {
  3079. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  3080. if (suffix != null) {
  3081. labeledElementASTType = suffix;
  3082. labeledElementType = suffix;
  3083. }
  3084. }
  3085. }
  3086. if (!g.hasOption("ASTLabelType")) {
  3087. g.setOption("ASTLabelType", new Token(ANTLRTokenTypes.STRING_LITERAL, "AST"));
  3088. }
  3089. labeledElementInit = "null";
  3090. commonExtraArgs = "_t";
  3091. commonExtraParams = "AST _t";
  3092. commonLocalVars = "";
  3093. lt1Value = "(" + labeledElementASTType + ")_t";
  3094. exceptionThrown = "RecognitionException";
  3095. throwNoViable = "throw new NoViableAltException(_t);";
  3096. }
  3097. else {
  3098. antlrTool.panic("Unknown grammar type");
  3099. }
  3100. }
  3101. /** This method exists so a subclass, namely VAJCodeGenerator,
  3102. * can open the file in its own evil way. JavaCodeGenerator
  3103. * simply opens a text file...
  3104. */
  3105. public void setupOutput(String className) throws IOException {
  3106. currentOutput = antlrTool.openOutputFile(className + ".java");
  3107. }
  3108. }