PageRenderTime 76ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/antlr-2.7.5/antlr/CppCodeGenerator.java

https://github.com/boo/boo-lang
Java | 4828 lines | 3515 code | 406 blank | 907 comment | 731 complexity | 6958d383017cebc6d256380b2df05dcf MD5 | raw file
Possible License(s): GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. package antlr;
  2. /* ANTLR Translator Generator
  3. * Project led by Terence Parr at http://www.jGuru.com
  4. * Software rights: http://www.antlr.org/license.html
  5. *
  6. * $Id: //depot/code/org.antlr/release/antlr-2.7.5/antlr/CppCodeGenerator.java#1 $
  7. */
  8. // C++ code generator by Pete Wells: pete@yamuna.demon.co.uk
  9. // #line generation contributed by: Ric Klaren <klaren@cs.utwente.nl>
  10. import java.util.Enumeration;
  11. import java.util.Hashtable;
  12. import antlr.collections.impl.BitSet;
  13. import antlr.collections.impl.Vector;
  14. import java.io.PrintWriter; //SAS: changed for proper text file io
  15. import java.io.IOException;
  16. import java.io.FileWriter;
  17. /** Generate MyParser.cpp, MyParser.hpp, MyLexer.cpp, MyLexer.hpp
  18. * and MyParserTokenTypes.hpp
  19. */
  20. public class CppCodeGenerator extends CodeGenerator {
  21. boolean DEBUG_CPP_CODE_GENERATOR = false;
  22. // non-zero if inside syntactic predicate generation
  23. protected int syntacticPredLevel = 0;
  24. // Are we generating ASTs (for parsers and tree parsers) right now?
  25. protected boolean genAST = false;
  26. // Are we saving the text consumed (for lexers) right now?
  27. protected boolean saveText = false;
  28. // Generate #line's
  29. protected boolean genHashLines = true;
  30. // Generate constructors or not
  31. protected boolean noConstructors = false;
  32. // Used to keep track of lineno in output
  33. protected int outputLine;
  34. protected String outputFile;
  35. // Grammar parameters set up to handle different grammar classes.
  36. // These are used to get instanceof tests out of code generation
  37. boolean usingCustomAST = false;
  38. String labeledElementType;
  39. String labeledElementASTType; // mostly the same as labeledElementType except in parsers
  40. String labeledElementASTInit;
  41. String labeledElementInit;
  42. String commonExtraArgs;
  43. String commonExtraParams;
  44. String commonLocalVars;
  45. String lt1Value;
  46. String exceptionThrown;
  47. String throwNoViable;
  48. // Tracks the rule being generated. Used for mapTreeId
  49. RuleBlock currentRule;
  50. // Tracks the rule or labeled subrule being generated. Used for AST generation.
  51. String currentASTResult;
  52. // Mapping between the ids used in the current alt, and the
  53. // names of variables used to represent their AST values.
  54. Hashtable treeVariableMap = new Hashtable();
  55. /** Used to keep track of which AST variables have been defined in a rule
  56. * (except for the #rule_name and #rule_name_in var's
  57. */
  58. Hashtable declaredASTVariables = new Hashtable();
  59. // Count of unnamed generated variables
  60. int astVarNumber = 1;
  61. // Special value used to mark duplicate in treeVariableMap
  62. protected static final String NONUNIQUE = new String();
  63. public static final int caseSizeThreshold = 127; // ascii is max
  64. private Vector semPreds;
  65. // Used to keep track of which (heterogeneous AST types are used)
  66. // which need to be set in the ASTFactory of the generated parser
  67. private Vector astTypes;
  68. private static String namespaceStd = "ANTLR_USE_NAMESPACE(std)";
  69. private static String namespaceAntlr = "ANTLR_USE_NAMESPACE(antlr)";
  70. private static NameSpace nameSpace = null;
  71. private static final String preIncludeCpp = "pre_include_cpp";
  72. private static final String preIncludeHpp = "pre_include_hpp";
  73. private static final String postIncludeCpp = "post_include_cpp";
  74. private static final String postIncludeHpp = "post_include_hpp";
  75. /** Create a C++ code-generator using the given Grammar.
  76. * The caller must still call setTool, setBehavior, and setAnalyzer
  77. * before generating code.
  78. */
  79. public CppCodeGenerator() {
  80. super();
  81. charFormatter = new CppCharFormatter();
  82. }
  83. /** Adds a semantic predicate string to the sem pred vector
  84. These strings will be used to build an array of sem pred names
  85. when building a debugging parser. This method should only be
  86. called when the debug option is specified
  87. */
  88. protected int addSemPred(String predicate) {
  89. semPreds.appendElement(predicate);
  90. return semPreds.size()-1;
  91. }
  92. public void exitIfError()
  93. {
  94. if (antlrTool.hasError())
  95. {
  96. antlrTool.fatalError("Exiting due to errors.");
  97. }
  98. }
  99. protected int countLines( String s )
  100. {
  101. int lines = 0;
  102. for( int i = 0; i < s.length(); i++ )
  103. {
  104. if( s.charAt(i) == '\n' )
  105. lines++;
  106. }
  107. return lines;
  108. }
  109. /** Output a String to the currentOutput stream.
  110. * Ignored if string is null.
  111. * @param s The string to output
  112. */
  113. protected void _print(String s)
  114. {
  115. if (s != null)
  116. {
  117. outputLine += countLines(s);
  118. currentOutput.print(s);
  119. }
  120. }
  121. /** Print an action without leading tabs, attempting to
  122. * preserve the current indentation level for multi-line actions
  123. * Ignored if string is null.
  124. * @param s The action string to output
  125. */
  126. protected void _printAction(String s)
  127. {
  128. if (s != null)
  129. {
  130. outputLine += countLines(s)+1;
  131. super._printAction(s);
  132. }
  133. }
  134. /** Print an action stored in a token surrounded by #line stuff */
  135. public void printAction(Token t)
  136. {
  137. if (t != null)
  138. {
  139. genLineNo(t.getLine());
  140. printTabs();
  141. _printAction(processActionForSpecialSymbols(t.getText(), t.getLine(),
  142. null, null) );
  143. genLineNo2();
  144. }
  145. }
  146. /** Print a header action by #line stuff also process any tree construction
  147. * @param name The name of the header part
  148. */
  149. public void printHeaderAction(String name)
  150. {
  151. Token a = (antlr.Token)behavior.headerActions.get(name);
  152. if (a != null)
  153. {
  154. genLineNo(a.getLine());
  155. println(processActionForSpecialSymbols(a.getText(), a.getLine(),
  156. null, null) );
  157. genLineNo2();
  158. }
  159. }
  160. /** Output a String followed by newline, to the currentOutput stream.
  161. * Ignored if string is null.
  162. * @param s The string to output
  163. */
  164. protected void _println(String s) {
  165. if (s != null) {
  166. outputLine += countLines(s)+1;
  167. currentOutput.println(s);
  168. }
  169. }
  170. /** Output tab indent followed by a String followed by newline,
  171. * to the currentOutput stream. Ignored if string is null.
  172. * @param s The string to output
  173. */
  174. protected void println(String s) {
  175. if (s != null) {
  176. printTabs();
  177. outputLine += countLines(s)+1;
  178. currentOutput.println(s);
  179. }
  180. }
  181. /** Generate a #line or // line depending on options */
  182. public void genLineNo(int line) {
  183. if ( line == 0 ) {
  184. line++;
  185. }
  186. if( genHashLines )
  187. _println("#line "+line+" \""+antlrTool.fileMinusPath(antlrTool.grammarFile)+"\"");
  188. }
  189. /** Generate a #line or // line depending on options */
  190. public void genLineNo(GrammarElement el)
  191. {
  192. if( el != null )
  193. genLineNo(el.getLine());
  194. }
  195. /** Generate a #line or // line depending on options */
  196. public void genLineNo(Token t)
  197. {
  198. if (t != null)
  199. genLineNo(t.getLine());
  200. }
  201. /** Generate a #line or // line depending on options */
  202. public void genLineNo2()
  203. {
  204. if( genHashLines )
  205. {
  206. _println("#line "+(outputLine+1)+" \""+outputFile+"\"");
  207. }
  208. }
  209. /// Bound safe isDigit
  210. private boolean charIsDigit( String s, int i )
  211. {
  212. return (i < s.length()) && Character.isDigit(s.charAt(i));
  213. }
  214. /** Normalize a string coming from antlr's lexer. E.g. translate java
  215. * escapes to values. Check their size (multibyte) bomb out if they are
  216. * multibyte (bit crude). Then reescape to C++ style things.
  217. * Used to generate strings for match() and matchRange()
  218. * @param lit the literal string
  219. * @param isCharLiteral if it's for a character literal
  220. * (enforced to be one length) and enclosed in '
  221. * FIXME: bombing out on mb chars. Should be done in Lexer.
  222. * FIXME: this is another horrible hack.
  223. * FIXME: life would be soooo much easier if the stuff from the lexer was
  224. * normalized in some way.
  225. */
  226. private String convertJavaToCppString( String lit, boolean isCharLiteral )
  227. {
  228. // System.out.println("convertJavaToCppLiteral: "+lit);
  229. String ret = new String();
  230. String s = lit;
  231. int i = 0;
  232. int val = 0;
  233. if( isCharLiteral ) // verify & strip off quotes
  234. {
  235. if( ! lit.startsWith("'") || ! lit.endsWith("'") )
  236. antlrTool.error("Invalid character literal: '"+lit+"'");
  237. }
  238. else
  239. {
  240. if( ! lit.startsWith("\"") || ! lit.endsWith("\"") )
  241. antlrTool.error("Invalid character string: '"+lit+"'");
  242. }
  243. s = lit.substring(1,lit.length()-1);
  244. String prefix="";
  245. int maxsize = 255;
  246. if( grammar instanceof LexerGrammar )
  247. {
  248. // vocab size seems to be 1 bigger than it actually is
  249. maxsize = ((LexerGrammar)grammar).charVocabulary.size() - 1;
  250. if( maxsize > 255 )
  251. prefix= "L";
  252. }
  253. // System.out.println("maxsize "+maxsize+" prefix "+prefix);
  254. while ( i < s.length() )
  255. {
  256. if( s.charAt(i) == '\\' )
  257. {
  258. if( s.length() == i+1 )
  259. antlrTool.error("Invalid escape in char literal: '"+lit+"' looking at '"+s.substring(i)+"'");
  260. // deal with escaped junk
  261. switch ( s.charAt(i+1) ) {
  262. case 'a' :
  263. val = 7;
  264. i += 2;
  265. break;
  266. case 'b' :
  267. val = 8;
  268. i += 2;
  269. break;
  270. case 't' :
  271. val = 9;
  272. i += 2;
  273. break;
  274. case 'n' :
  275. val = 10;
  276. i += 2;
  277. break;
  278. case 'f' :
  279. val = 12;
  280. i += 2;
  281. break;
  282. case 'r' :
  283. val = 13;
  284. i += 2;
  285. break;
  286. case '"' :
  287. case '\'' :
  288. case '\\' :
  289. val = s.charAt(i+1);
  290. i += 2;
  291. break;
  292. case 'u' :
  293. // Unicode char \u1234
  294. if( i+5 < s.length() )
  295. {
  296. val = Character.digit(s.charAt(i+2), 16) * 16 * 16 * 16 +
  297. Character.digit(s.charAt(i+3), 16) * 16 * 16 +
  298. Character.digit(s.charAt(i+4), 16) * 16 +
  299. Character.digit(s.charAt(i+5), 16);
  300. i += 6;
  301. }
  302. else
  303. antlrTool.error("Invalid escape in char literal: '"+lit+"' looking at '"+s.substring(i)+"'");
  304. break;
  305. case '0' : // \123
  306. case '1' :
  307. case '2' :
  308. case '3' :
  309. if( charIsDigit(s, i+2) )
  310. {
  311. if( charIsDigit(s, i+3) )
  312. {
  313. val = (s.charAt(i+1)-'0')*8*8 + (s.charAt(i+2)-'0')*8 +
  314. (s.charAt(i+3)-'0');
  315. i += 4;
  316. }
  317. else
  318. {
  319. val = (s.charAt(i+1)-'0')*8 + (s.charAt(i+2)-'0');
  320. i += 3;
  321. }
  322. }
  323. else
  324. {
  325. val = s.charAt(i+1)-'0';
  326. i += 2;
  327. }
  328. break;
  329. case '4' :
  330. case '5' :
  331. case '6' :
  332. case '7' :
  333. if ( charIsDigit(s, i+2) )
  334. {
  335. val = (s.charAt(i+1)-'0')*8 + (s.charAt(i+2)-'0');
  336. i += 3;
  337. }
  338. else
  339. {
  340. val = s.charAt(i+1)-'0';
  341. i += 2;
  342. }
  343. default:
  344. antlrTool.error("Unhandled escape in char literal: '"+lit+"' looking at '"+s.substring(i)+"'");
  345. val = 0;
  346. }
  347. }
  348. else
  349. val = s.charAt(i++);
  350. if( grammar instanceof LexerGrammar )
  351. {
  352. if( val > maxsize ) // abort if too big
  353. {
  354. String offender;
  355. if( ( 0x20 <= val ) && ( val < 0x7F ) )
  356. offender = charFormatter.escapeChar(val,true);
  357. else
  358. offender = "0x"+Integer.toString(val,16);
  359. antlrTool.error("Character out of range in "+(isCharLiteral?"char literal":"string constant")+": '"+s+"'");
  360. antlrTool.error("Vocabulary size: "+maxsize+" Character "+offender);
  361. }
  362. }
  363. if( isCharLiteral )
  364. {
  365. // we should be at end of char literal here..
  366. if( i != s.length() )
  367. antlrTool.error("Invalid char literal: '"+lit+"'");
  368. if( maxsize <= 255 )
  369. {
  370. if ( (val <= 255) && (val & 0x80) != 0 )
  371. // the joys of sign extension in the support lib *cough*
  372. // actually the support lib needs to be fixed but that's a bit
  373. // hairy too.
  374. ret = "static_cast<unsigned char>('"+charFormatter.escapeChar(val,true)+"')";
  375. else
  376. ret = "'"+charFormatter.escapeChar(val,true)+"'";
  377. }
  378. else
  379. {
  380. // so wchar_t is some implementation defined int like thing
  381. // so this may even lead to having 16 bit or 32 bit cases...
  382. // I smell some extra grammar options in the future :(
  383. ret = "L'"+charFormatter.escapeChar(val,true)+"'";
  384. }
  385. }
  386. else
  387. ret += charFormatter.escapeChar(val,true);
  388. }
  389. if( !isCharLiteral )
  390. ret = prefix+"\""+ret+"\"";
  391. return ret;
  392. }
  393. /** Generate the parser, lexer, treeparser, and token types in C++
  394. */
  395. public void gen() {
  396. // Do the code generation
  397. try {
  398. // Loop over all grammars
  399. Enumeration grammarIter = behavior.grammars.elements();
  400. while (grammarIter.hasMoreElements()) {
  401. Grammar g = (Grammar)grammarIter.nextElement();
  402. if ( g.debuggingOutput ) {
  403. antlrTool.error(g.getFilename()+": C++ mode does not support -debug");
  404. }
  405. // Connect all the components to each other
  406. g.setGrammarAnalyzer(analyzer);
  407. g.setCodeGenerator(this);
  408. analyzer.setGrammar(g);
  409. // To get right overloading behavior across hetrogeneous grammars
  410. setupGrammarParameters(g);
  411. g.generate();
  412. exitIfError();
  413. }
  414. // Loop over all token managers (some of which are lexers)
  415. Enumeration tmIter = behavior.tokenManagers.elements();
  416. while (tmIter.hasMoreElements()) {
  417. TokenManager tm = (TokenManager)tmIter.nextElement();
  418. if (!tm.isReadOnly()) {
  419. // Write the token manager tokens as C++
  420. // this must appear before genTokenInterchange so that
  421. // labels are set on string literals
  422. genTokenTypes(tm);
  423. // Write the token manager tokens as plain text
  424. genTokenInterchange(tm);
  425. }
  426. exitIfError();
  427. }
  428. }
  429. catch (IOException e) {
  430. antlrTool.reportException(e, null);
  431. }
  432. }
  433. /** Generate code for the given grammar element.
  434. * @param blk The {...} action to generate
  435. */
  436. public void gen(ActionElement action) {
  437. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genAction("+action+")");
  438. if ( action.isSemPred ) {
  439. genSemPred(action.actionText, action.line);
  440. }
  441. else {
  442. if ( grammar.hasSyntacticPredicate ) {
  443. println("if ( inputState->guessing==0 ) {");
  444. tabs++;
  445. }
  446. ActionTransInfo tInfo = new ActionTransInfo();
  447. String actionStr = processActionForSpecialSymbols(action.actionText,
  448. action.getLine(),
  449. currentRule, tInfo);
  450. if ( tInfo.refRuleRoot!=null ) {
  451. // Somebody referenced "#rule", make sure translated var is valid
  452. // assignment to #rule is left as a ref also, meaning that assignments
  453. // with no other refs like "#rule = foo();" still forces this code to be
  454. // generated (unnecessarily).
  455. println(tInfo.refRuleRoot + " = "+labeledElementASTType+"(currentAST.root);");
  456. }
  457. // dump the translated action
  458. genLineNo(action);
  459. printAction(actionStr);
  460. genLineNo2();
  461. if ( tInfo.assignToRoot ) {
  462. // Somebody did a "#rule=", reset internal currentAST.root
  463. println("currentAST.root = "+tInfo.refRuleRoot+";");
  464. // reset the child pointer too to be last sibling in sibling list
  465. // now use if else in stead of x ? y : z to shut CC 4.2 up.
  466. println("if ( "+tInfo.refRuleRoot+"!="+labeledElementASTInit+" &&");
  467. tabs++;
  468. println(tInfo.refRuleRoot+"->getFirstChild() != "+labeledElementASTInit+" )");
  469. println(" currentAST.child = "+tInfo.refRuleRoot+"->getFirstChild();");
  470. tabs--;
  471. println("else");
  472. tabs++;
  473. println("currentAST.child = "+tInfo.refRuleRoot+";");
  474. tabs--;
  475. println("currentAST.advanceChildToEnd();");
  476. }
  477. if ( grammar.hasSyntacticPredicate ) {
  478. tabs--;
  479. println("}");
  480. }
  481. }
  482. }
  483. /** Generate code for the given grammar element.
  484. * @param blk The "x|y|z|..." block to generate
  485. */
  486. public void gen(AlternativeBlock blk) {
  487. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen("+blk+")");
  488. println("{");
  489. genBlockPreamble(blk);
  490. genBlockInitAction(blk);
  491. // Tell AST generation to build subrule result
  492. String saveCurrentASTResult = currentASTResult;
  493. if (blk.getLabel() != null) {
  494. currentASTResult = blk.getLabel();
  495. }
  496. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  497. CppBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
  498. genBlockFinish(howToFinish, throwNoViable);
  499. println("}");
  500. // Restore previous AST generation
  501. currentASTResult = saveCurrentASTResult;
  502. }
  503. /** Generate code for the given grammar element.
  504. * @param blk The block-end element to generate. Block-end
  505. * elements are synthesized by the grammar parser to represent
  506. * the end of a block.
  507. */
  508. public void gen(BlockEndElement end) {
  509. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genRuleEnd("+end+")");
  510. }
  511. /** Generate code for the given grammar element.
  512. * Only called from lexer grammars.
  513. * @param blk The character literal reference to generate
  514. */
  515. public void gen(CharLiteralElement atom) {
  516. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR )
  517. System.out.println("genChar("+atom+")");
  518. if ( ! (grammar instanceof LexerGrammar) )
  519. antlrTool.error("cannot ref character literals in grammar: "+atom);
  520. if ( atom.getLabel() != null ) {
  521. println(atom.getLabel() + " = " + lt1Value + ";");
  522. }
  523. boolean oldsaveText = saveText;
  524. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  525. // if in lexer and ! on element, save buffer index to kill later
  526. if ( !saveText||atom.getAutoGenType()==GrammarElement.AUTO_GEN_BANG )
  527. println("_saveIndex = text.length();");
  528. print(atom.not ? "matchNot(" : "match(");
  529. _print(convertJavaToCppString( atom.atomText, true ));
  530. _println(" /* charlit */ );");
  531. if ( !saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG )
  532. println("text.erase(_saveIndex);"); // kill text atom put in buffer
  533. saveText = oldsaveText;
  534. }
  535. /** Generate code for the given grammar element.
  536. * Only called from lexer grammars.
  537. * @param blk The character-range reference to generate
  538. */
  539. public void gen(CharRangeElement r) {
  540. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR )
  541. System.out.println("genCharRangeElement("+r.beginText+".."+r.endText+")");
  542. if ( ! (grammar instanceof LexerGrammar) )
  543. antlrTool.error("cannot ref character range in grammar: "+r);
  544. if ( r.getLabel() != null && syntacticPredLevel == 0) {
  545. println(r.getLabel() + " = " + lt1Value + ";");
  546. }
  547. // Correctly take care of saveIndex stuff...
  548. boolean save = ( grammar instanceof LexerGrammar &&
  549. ( !saveText ||
  550. r.getAutoGenType() == GrammarElement.AUTO_GEN_BANG )
  551. );
  552. if (save)
  553. println("_saveIndex=text.length();");
  554. println("matchRange("+convertJavaToCppString(r.beginText,true)+
  555. ","+convertJavaToCppString(r.endText,true)+");");
  556. if (save)
  557. println("text.erase(_saveIndex);");
  558. }
  559. /** Generate the lexer C++ files */
  560. public void gen(LexerGrammar g) throws IOException {
  561. // If debugging, create a new sempred vector for this grammar
  562. if (g.debuggingOutput)
  563. semPreds = new Vector();
  564. if( g.charVocabulary.size() > 256 )
  565. antlrTool.warning(g.getFilename()+": Vocabularies of this size still experimental in C++ mode (vocabulary size now: "+g.charVocabulary.size()+")");
  566. setGrammar(g);
  567. if (!(grammar instanceof LexerGrammar)) {
  568. antlrTool.panic("Internal error generating lexer");
  569. }
  570. genBody(g);
  571. genInclude(g);
  572. }
  573. /** Generate code for the given grammar element.
  574. * @param blk The (...)+ block to generate
  575. */
  576. public void gen(OneOrMoreBlock blk) {
  577. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen+("+blk+")");
  578. String label;
  579. String cnt;
  580. println("{ // ( ... )+");
  581. genBlockPreamble(blk);
  582. if ( blk.getLabel() != null ) {
  583. cnt = "_cnt_"+blk.getLabel();
  584. }
  585. else {
  586. cnt = "_cnt" + blk.ID;
  587. }
  588. println("int "+cnt+"=0;");
  589. if ( blk.getLabel() != null ) {
  590. label = blk.getLabel();
  591. }
  592. else {
  593. label = "_loop" + blk.ID;
  594. }
  595. println("for (;;) {");
  596. tabs++;
  597. // generate the init action for ()+ ()* inside the loop
  598. // this allows us to do usefull EOF checking...
  599. genBlockInitAction(blk);
  600. // Tell AST generation to build subrule result
  601. String saveCurrentASTResult = currentASTResult;
  602. if (blk.getLabel() != null) {
  603. currentASTResult = blk.getLabel();
  604. }
  605. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  606. // generate exit test if greedy set to false
  607. // and an alt is ambiguous with exit branch
  608. // or when lookahead derived purely from end-of-file
  609. // Lookahead analysis stops when end-of-file is hit,
  610. // returning set {epsilon}. Since {epsilon} is not
  611. // ambig with any real tokens, no error is reported
  612. // by deterministic() routines and we have to check
  613. // for the case where the lookahead depth didn't get
  614. // set to NONDETERMINISTIC (this only happens when the
  615. // FOLLOW contains real atoms + epsilon).
  616. boolean generateNonGreedyExitPath = false;
  617. int nonGreedyExitDepth = grammar.maxk;
  618. if ( !blk.greedy &&
  619. blk.exitLookaheadDepth<=grammar.maxk &&
  620. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon() )
  621. {
  622. generateNonGreedyExitPath = true;
  623. nonGreedyExitDepth = blk.exitLookaheadDepth;
  624. }
  625. else if ( !blk.greedy &&
  626. blk.exitLookaheadDepth==LLkGrammarAnalyzer.NONDETERMINISTIC )
  627. {
  628. generateNonGreedyExitPath = true;
  629. }
  630. // generate exit test if greedy set to false
  631. // and an alt is ambiguous with exit branch
  632. if ( generateNonGreedyExitPath ) {
  633. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) {
  634. System.out.println("nongreedy (...)+ loop; exit depth is "+
  635. blk.exitLookaheadDepth);
  636. }
  637. String predictExit =
  638. getLookaheadTestExpression(blk.exitCache,
  639. nonGreedyExitDepth);
  640. println("// nongreedy exit test");
  641. println("if ( "+cnt+">=1 && "+predictExit+") goto "+label+";");
  642. }
  643. CppBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  644. genBlockFinish(
  645. howToFinish,
  646. "if ( "+cnt+">=1 ) { goto "+label+"; } else {" + throwNoViable + "}"
  647. );
  648. println(cnt+"++;");
  649. tabs--;
  650. println("}");
  651. println(label+":;");
  652. println("} // ( ... )+");
  653. // Restore previous AST generation
  654. currentASTResult = saveCurrentASTResult;
  655. }
  656. /** Generate the parser C++ file */
  657. public void gen(ParserGrammar g) throws IOException {
  658. // if debugging, set up a new vector to keep track of sempred
  659. // strings for this grammar
  660. if (g.debuggingOutput)
  661. semPreds = new Vector();
  662. setGrammar(g);
  663. if (!(grammar instanceof ParserGrammar)) {
  664. antlrTool.panic("Internal error generating parser");
  665. }
  666. genBody(g);
  667. genInclude(g);
  668. }
  669. /** Generate code for the given grammar element.
  670. * @param blk The rule-reference to generate
  671. */
  672. public void gen(RuleRefElement rr)
  673. {
  674. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genRR("+rr+")");
  675. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
  676. if (rs == null || !rs.isDefined())
  677. {
  678. // Is this redundant???
  679. antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
  680. return;
  681. }
  682. if (!(rs instanceof RuleSymbol))
  683. {
  684. // Is this redundant???
  685. antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
  686. return;
  687. }
  688. genErrorTryForElement(rr);
  689. // AST value for labeled rule refs in tree walker.
  690. // This is not AST construction; it is just the input tree node value.
  691. if ( grammar instanceof TreeWalkerGrammar &&
  692. rr.getLabel() != null &&
  693. syntacticPredLevel == 0 )
  694. {
  695. println(rr.getLabel() + " = (_t == ASTNULL) ? "+labeledElementASTInit+" : "+lt1Value+";");
  696. }
  697. // if in lexer and ! on rule ref or alt or rule, save buffer index to
  698. // kill later
  699. if ( grammar instanceof LexerGrammar && (!saveText||rr.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) )
  700. {
  701. println("_saveIndex = text.length();");
  702. }
  703. // Process return value assignment if any
  704. printTabs();
  705. if (rr.idAssign != null)
  706. {
  707. // Warn if the rule has no return type
  708. if (rs.block.returnAction == null)
  709. {
  710. antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
  711. }
  712. _print(rr.idAssign + "=");
  713. } else {
  714. // Warn about return value if any, but not inside syntactic predicate
  715. if ( !(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null)
  716. {
  717. antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
  718. }
  719. }
  720. // Call the rule
  721. GenRuleInvocation(rr);
  722. // if in lexer and ! on element or alt or rule, save buffer index to kill later
  723. if ( grammar instanceof LexerGrammar && (!saveText||rr.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) {
  724. println("text.erase(_saveIndex);");
  725. }
  726. // if not in a syntactic predicate
  727. if (syntacticPredLevel == 0)
  728. {
  729. boolean doNoGuessTest = (
  730. grammar.hasSyntacticPredicate &&
  731. (
  732. grammar.buildAST && rr.getLabel() != null ||
  733. (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
  734. )
  735. );
  736. if (doNoGuessTest) {
  737. println("if (inputState->guessing==0) {");
  738. tabs++;
  739. }
  740. if (grammar.buildAST && rr.getLabel() != null)
  741. {
  742. // always gen variable for rule return on labeled rules
  743. // RK: hmm do I know here if the returnAST needs a cast ?
  744. println(rr.getLabel() + "_AST = returnAST;");
  745. }
  746. if (genAST)
  747. {
  748. switch (rr.getAutoGenType())
  749. {
  750. case GrammarElement.AUTO_GEN_NONE:
  751. if( usingCustomAST )
  752. println("astFactory->addASTChild(currentAST, "+namespaceAntlr+"RefAST(returnAST));");
  753. else
  754. println("astFactory->addASTChild( currentAST, returnAST );");
  755. break;
  756. case GrammarElement.AUTO_GEN_CARET:
  757. // FIXME: RK: I'm not so sure this should be an error..
  758. // I think it might actually work and be usefull at times.
  759. antlrTool.error("Internal: encountered ^ after rule reference");
  760. break;
  761. default:
  762. break;
  763. }
  764. }
  765. // if a lexer and labeled, Token label defined at rule level, just set it here
  766. if ( grammar instanceof LexerGrammar && rr.getLabel() != null )
  767. {
  768. println(rr.getLabel()+"=_returnToken;");
  769. }
  770. if (doNoGuessTest)
  771. {
  772. tabs--;
  773. println("}");
  774. }
  775. }
  776. genErrorCatchForElement(rr);
  777. }
  778. /** Generate code for the given grammar element.
  779. * @param blk The string-literal reference to generate
  780. */
  781. public void gen(StringLiteralElement atom) {
  782. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genString("+atom+")");
  783. // Variable declarations for labeled elements
  784. if (atom.getLabel()!=null && syntacticPredLevel == 0) {
  785. println(atom.getLabel() + " = " + lt1Value + ";");
  786. }
  787. // AST
  788. genElementAST(atom);
  789. // is there a bang on the literal?
  790. boolean oldsaveText = saveText;
  791. saveText = saveText && atom.getAutoGenType()==GrammarElement.AUTO_GEN_NONE;
  792. // matching
  793. genMatch(atom);
  794. saveText = oldsaveText;
  795. // tack on tree cursor motion if doing a tree walker
  796. if (grammar instanceof TreeWalkerGrammar) {
  797. println("_t = _t->getNextSibling();");
  798. }
  799. }
  800. /** Generate code for the given grammar element.
  801. * @param blk The token-range reference to generate
  802. */
  803. public void gen(TokenRangeElement r) {
  804. genErrorTryForElement(r);
  805. if ( r.getLabel()!=null && syntacticPredLevel == 0) {
  806. println(r.getLabel() + " = " + lt1Value + ";");
  807. }
  808. // AST
  809. genElementAST(r);
  810. // match
  811. println("matchRange("+r.beginText+","+r.endText+");");
  812. genErrorCatchForElement(r);
  813. }
  814. /** Generate code for the given grammar element.
  815. * @param blk The token-reference to generate
  816. */
  817. public void gen(TokenRefElement atom) {
  818. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genTokenRef("+atom+")");
  819. if ( grammar instanceof LexerGrammar ) {
  820. antlrTool.panic("Token reference found in lexer");
  821. }
  822. genErrorTryForElement(atom);
  823. // Assign Token value to token label variable
  824. if ( atom.getLabel()!=null && syntacticPredLevel == 0) {
  825. println(atom.getLabel() + " = " + lt1Value + ";");
  826. }
  827. // AST
  828. genElementAST(atom);
  829. // matching
  830. genMatch(atom);
  831. genErrorCatchForElement(atom);
  832. // tack on tree cursor motion if doing a tree walker
  833. if (grammar instanceof TreeWalkerGrammar) {
  834. println("_t = _t->getNextSibling();");
  835. }
  836. }
  837. public void gen(TreeElement t) {
  838. // save AST cursor
  839. println(labeledElementType+" __t" + t.ID + " = _t;");
  840. // If there is a label on the root, then assign that to the variable
  841. if (t.root.getLabel() != null) {
  842. println(t.root.getLabel() + " = (_t == ASTNULL) ? "+labeledElementASTInit+" : _t;");
  843. }
  844. // check for invalid modifiers ! and ^ on tree element roots
  845. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
  846. antlrTool.error("Suffixing a root node with '!' is not implemented",
  847. grammar.getFilename(), t.getLine(), t.getColumn());
  848. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  849. }
  850. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
  851. antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
  852. grammar.getFilename(), t.getLine(), t.getColumn());
  853. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  854. }
  855. // Generate AST variables
  856. genElementAST(t.root);
  857. if (grammar.buildAST) {
  858. // Save the AST construction state
  859. println(namespaceAntlr+"ASTPair __currentAST" + t.ID + " = currentAST;");
  860. // Make the next item added a child of the TreeElement root
  861. println("currentAST.root = currentAST.child;");
  862. println("currentAST.child = "+labeledElementASTInit+";");
  863. }
  864. // match root
  865. if ( t.root instanceof WildcardElement ) {
  866. println("if ( _t == ASTNULL ) throw "+namespaceAntlr+"MismatchedTokenException();");
  867. }
  868. else {
  869. genMatch(t.root);
  870. }
  871. // move to list of children
  872. println("_t = _t->getFirstChild();");
  873. // walk list of children, generating code for each
  874. for (int i=0; i<t.getAlternatives().size(); i++) {
  875. Alternative a = t.getAlternativeAt(i);
  876. AlternativeElement e = a.head;
  877. while ( e != null ) {
  878. e.generate();
  879. e = e.next;
  880. }
  881. }
  882. if (grammar.buildAST) {
  883. // restore the AST construction state to that just after the
  884. // tree root was added
  885. println("currentAST = __currentAST" + t.ID + ";");
  886. }
  887. // restore AST cursor
  888. println("_t = __t" + t.ID + ";");
  889. // move cursor to sibling of tree just parsed
  890. println("_t = _t->getNextSibling();");
  891. }
  892. /** Generate the tree-parser C++ files */
  893. public void gen(TreeWalkerGrammar g) throws IOException {
  894. setGrammar(g);
  895. if (!(grammar instanceof TreeWalkerGrammar)) {
  896. antlrTool.panic("Internal error generating tree-walker");
  897. }
  898. genBody(g);
  899. genInclude(g);
  900. }
  901. /** Generate code for the given grammar element.
  902. * @param wc The wildcard element to generate
  903. */
  904. public void gen(WildcardElement wc) {
  905. // Variable assignment for labeled elements
  906. if (wc.getLabel()!=null && syntacticPredLevel == 0) {
  907. println(wc.getLabel() + " = " + lt1Value + ";");
  908. }
  909. // AST
  910. genElementAST(wc);
  911. // Match anything but EOF
  912. if (grammar instanceof TreeWalkerGrammar) {
  913. println("if ( _t == "+labeledElementASTInit+" ) throw "+namespaceAntlr+"MismatchedTokenException();");
  914. }
  915. else if (grammar instanceof LexerGrammar) {
  916. if ( grammar instanceof LexerGrammar &&
  917. (!saveText||wc.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) {
  918. println("_saveIndex = text.length();");
  919. }
  920. println("matchNot(EOF/*_CHAR*/);");
  921. if ( grammar instanceof LexerGrammar &&
  922. (!saveText||wc.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) {
  923. println("text.erase(_saveIndex);"); // kill text atom put in buffer
  924. }
  925. }
  926. else {
  927. println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
  928. }
  929. // tack on tree cursor motion if doing a tree walker
  930. if (grammar instanceof TreeWalkerGrammar) {
  931. println("_t = _t->getNextSibling();");
  932. }
  933. }
  934. /** Generate code for the given grammar element.
  935. * @param blk The (...)* block to generate
  936. */
  937. public void gen(ZeroOrMoreBlock blk) {
  938. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen*("+blk+")");
  939. println("{ // ( ... )*");
  940. genBlockPreamble(blk);
  941. String label;
  942. if ( blk.getLabel() != null ) {
  943. label = blk.getLabel();
  944. }
  945. else {
  946. label = "_loop" + blk.ID;
  947. }
  948. println("for (;;) {");
  949. tabs++;
  950. // generate the init action for ()+ ()* inside the loop
  951. // this allows us to do usefull EOF checking...
  952. genBlockInitAction(blk);
  953. // Tell AST generation to build subrule result
  954. String saveCurrentASTResult = currentASTResult;
  955. if (blk.getLabel() != null) {
  956. currentASTResult = blk.getLabel();
  957. }
  958. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  959. // generate exit test if greedy set to false
  960. // and an alt is ambiguous with exit branch
  961. // or when lookahead derived purely from end-of-file
  962. // Lookahead analysis stops when end-of-file is hit,
  963. // returning set {epsilon}. Since {epsilon} is not
  964. // ambig with any real tokens, no error is reported
  965. // by deterministic() routines and we have to check
  966. // for the case where the lookahead depth didn't get
  967. // set to NONDETERMINISTIC (this only happens when the
  968. // FOLLOW contains real atoms + epsilon).
  969. boolean generateNonGreedyExitPath = false;
  970. int nonGreedyExitDepth = grammar.maxk;
  971. if ( !blk.greedy &&
  972. blk.exitLookaheadDepth<=grammar.maxk &&
  973. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon() )
  974. {
  975. generateNonGreedyExitPath = true;
  976. nonGreedyExitDepth = blk.exitLookaheadDepth;
  977. }
  978. else if ( !blk.greedy &&
  979. blk.exitLookaheadDepth==LLkGrammarAnalyzer.NONDETERMINISTIC )
  980. {
  981. generateNonGreedyExitPath = true;
  982. }
  983. if ( generateNonGreedyExitPath ) {
  984. if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) {
  985. System.out.println("nongreedy (...)* loop; exit depth is "+
  986. blk.exitLookaheadDepth);
  987. }
  988. String predictExit =
  989. getLookaheadTestExpression(blk.exitCache,
  990. nonGreedyExitDepth);
  991. println("// nongreedy exit test");
  992. println("if ("+predictExit+") goto "+label+";");
  993. }
  994. CppBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  995. genBlockFinish(howToFinish, "goto " + label + ";");
  996. tabs--;
  997. println("}");
  998. println(label+":;");
  999. println("} // ( ... )*");
  1000. // Restore previous AST generation
  1001. currentASTResult = saveCurrentASTResult;
  1002. }
  1003. /** Generate an alternative.
  1004. * @param alt The alternative to generate
  1005. * @param blk The block to which the alternative belongs
  1006. */
  1007. protected void genAlt(Alternative alt, AlternativeBlock blk)
  1008. {
  1009. // Save the AST generation state, and set it to that of the alt
  1010. boolean savegenAST = genAST;
  1011. genAST = genAST && alt.getAutoGen();
  1012. boolean oldsaveTest = saveText;
  1013. saveText = saveText && alt.getAutoGen();
  1014. // Reset the variable name map for the alternative
  1015. Hashtable saveMap = treeVariableMap;
  1016. treeVariableMap = new Hashtable();
  1017. // Generate try block around the alt for error handling
  1018. if (alt.exceptionSpec != null) {
  1019. println("try { // for error handling");
  1020. tabs++;
  1021. }
  1022. AlternativeElement elem = alt.head;
  1023. while ( !(elem instanceof BlockEndElement) ) {
  1024. elem.generate(); // alt can begin with anything. Ask target to gen.
  1025. elem = elem.next;
  1026. }
  1027. if ( genAST)
  1028. {
  1029. if (blk instanceof RuleBlock)
  1030. {
  1031. // Set the AST return value for the rule
  1032. RuleBlock rblk = (RuleBlock)blk;
  1033. if( usingCustomAST )
  1034. println(rblk.getRuleName() + "_AST = "+labeledElementASTType+"(currentAST.root);");
  1035. else
  1036. println(rblk.getRuleName() + "_AST = currentAST.root;");
  1037. }
  1038. else if (blk.getLabel() != null) {
  1039. // ### future: also set AST value for labeled subrules.
  1040. // println(blk.getLabel() + "_AST = "+labeledElementASTType+"(currentAST.root);");
  1041. antlrTool.warning("Labeled subrules are not implemented", grammar.getFilename(), blk.getLine(), blk.getColumn());
  1042. }
  1043. }
  1044. if (alt.exceptionSpec != null)
  1045. {
  1046. // close try block
  1047. tabs--;
  1048. println("}");
  1049. genErrorHandler(alt.exceptionSpec);
  1050. }
  1051. genAST = savegenAST;
  1052. saveText = oldsaveTest;
  1053. treeVariableMap = saveMap;
  1054. }
  1055. /** Generate all the bitsets to be used in the parser or lexer
  1056. * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
  1057. * and the BitSet object declarations like
  1058. * "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
  1059. * Note that most languages do not support object initialization inside a
  1060. * class definition, so other code-generators may have to separate the
  1061. * bitset declarations from the initializations (e.g., put the
  1062. * initializations in the generated constructor instead).
  1063. * @param bitsetList The list of bitsets to generate.
  1064. * @param maxVocabulary Ensure that each generated bitset can contain at
  1065. * least this value.
  1066. * @param prefix string glued in from of bitset names used for namespace
  1067. * qualifications.
  1068. */
  1069. protected void genBitsets(
  1070. Vector bitsetList,
  1071. int maxVocabulary,
  1072. String prefix
  1073. )
  1074. {
  1075. TokenManager tm = grammar.tokenManager;
  1076. println("");
  1077. for (int i = 0; i < bitsetList.size(); i++)
  1078. {
  1079. BitSet p = (BitSet)bitsetList.elementAt(i);
  1080. // Ensure that generated BitSet is large enough for vocabulary
  1081. p.growToInclude(maxVocabulary);
  1082. // initialization data
  1083. println(
  1084. "const unsigned long " + prefix + getBitsetName(i) + "_data_" + "[] = { " +
  1085. p.toStringOfHalfWords() +
  1086. " };"
  1087. );
  1088. // Dump the contents of the bitset in readable format...
  1089. String t = "// ";
  1090. for( int j = 0; j < tm.getVocabulary().size(); j++ )
  1091. {
  1092. if ( p.member( j ) )
  1093. {
  1094. if ( (grammar instanceof LexerGrammar) )
  1095. {
  1096. // only dump out for pure printable ascii.
  1097. if( ( 0x20 <= j ) && ( j < 0x7F ) )
  1098. t += charFormatter.escapeChar(j,true)+" ";
  1099. else
  1100. t += "0x"+Integer.toString(j,16)+" ";
  1101. }
  1102. else
  1103. t += tm.getTokenStringAt(j)+" ";
  1104. if( t.length() > 70 )
  1105. {
  1106. println(t);
  1107. t = "// ";
  1108. }
  1109. }
  1110. }
  1111. if ( t != "// " )
  1112. println(t);
  1113. // BitSet object
  1114. println(
  1115. "const "+namespaceAntlr+"BitSet " + prefix + getBitsetName(i) + "(" +
  1116. getBitsetName(i) + "_data_," + p.size()/32 +
  1117. ");"
  1118. );
  1119. }
  1120. }
  1121. protected void genBitsetsHeader(
  1122. Vector bitsetList,
  1123. int maxVocabulary
  1124. ) {
  1125. println("");
  1126. for (int i = 0; i < bitsetList.size(); i++)
  1127. {
  1128. BitSet p = (BitSet)bitsetList.elementAt(i);
  1129. // Ensure that generated BitSet is large enough for vocabulary
  1130. p.growToInclude(maxVocabulary);
  1131. // initialization data
  1132. println("static const unsigned long " + getBitsetName(i) + "_data_" + "[];");
  1133. // BitSet object
  1134. println("static const "+namespaceAntlr+"BitSet " + getBitsetName(i) + ";");
  1135. }
  1136. }
  1137. /** Generate the finish of a block, using a combination of the info
  1138. * returned from genCommonBlock() and the action to perform when
  1139. * no alts were taken
  1140. * @param howToFinish The return of genCommonBlock()
  1141. * @param noViableAction What to generate when no alt is taken
  1142. */
  1143. private void genBlockFinish(CppBlockFinishingInfo howToFinish, String noViableAction)
  1144. {
  1145. if (howToFinish.needAnErrorClause &&
  1146. (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
  1147. if ( howToFinish.generatedAnIf ) {
  1148. println("else {");
  1149. }
  1150. else {
  1151. println("{");
  1152. }
  1153. tabs++;
  1154. println(noViableAction);
  1155. tabs--;
  1156. println("}");
  1157. }
  1158. if ( howToFinish.postscript!=null ) {
  1159. println(howToFinish.postscript);
  1160. }
  1161. }
  1162. /** Generate the initaction for a block, which may be a RuleBlock or a
  1163. * plain AlternativeBLock.
  1164. * @blk The block for which the preamble is to be generated.
  1165. */
  1166. protected void genBlockInitAction( AlternativeBlock blk )
  1167. {
  1168. // dump out init action
  1169. if ( blk.initAction!=null ) {
  1170. genLineNo(blk);
  1171. printAction(processActionForSpecialSymbols(blk.initAction, blk.line,
  1172. currentRule, null) );
  1173. genLineNo2();
  1174. }
  1175. }
  1176. /** Generate the header for a block, which may be a RuleBlock or a
  1177. * plain AlternativeBlock. This generates any variable declarations
  1178. * and syntactic-predicate-testing variables.
  1179. * @blk The block for which the preamble is to be generated.
  1180. */
  1181. protected void genBlockPreamble(AlternativeBlock blk) {
  1182. // define labels for rule blocks.
  1183. if ( blk instanceof RuleBlock ) {
  1184. RuleBlock rblk = (RuleBlock)blk;
  1185. if ( rblk.labeledElements!=null ) {
  1186. for (int i=0; i<rblk.labeledElements.size(); i++) {
  1187. AlternativeElement a = (AlternativeElement)rblk.labeledElements.elementAt(i);
  1188. //System.out.println("looking at labeled element: "+a);
  1189. // Variables for labeled rule refs and subrules are different than
  1190. // variables for grammar atoms. This test is a little tricky because
  1191. // we want to get all rule refs and ebnf, but not rule blocks or
  1192. // syntactic predicates
  1193. if (
  1194. a instanceof RuleRefElement ||
  1195. a instanceof AlternativeBlock &&
  1196. !(a instanceof RuleBlock) &&
  1197. !(a instanceof SynPredBlock) )
  1198. {
  1199. if ( !(a instanceof RuleRefElement) &&
  1200. ((AlternativeBlock)a).not &&
  1201. analyzer.subruleCanBeInverted(((AlternativeBlock)a), grammar instanceof LexerGrammar)
  1202. ) {
  1203. // Special case for inverted subrules that will be
  1204. // inlined. Treat these like token or char literal
  1205. // references
  1206. println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
  1207. if (grammar.buildAST) {
  1208. genASTDeclaration( a );
  1209. }
  1210. }
  1211. else
  1212. {
  1213. if (grammar.buildAST)
  1214. {
  1215. // Always gen AST variables for labeled elements,
  1216. // even if the element itself is marked with !
  1217. genASTDeclaration( a );
  1218. }
  1219. if ( grammar instanceof LexerGrammar )
  1220. println(namespaceAntlr+"RefToken "+a.getLabel()+";");
  1221. if (grammar instanceof TreeWalkerGrammar) {
  1222. // always generate rule-ref variables for tree walker
  1223. println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
  1224. }
  1225. }
  1226. }
  1227. else
  1228. {
  1229. // It is a token or literal reference. Generate the
  1230. // correct variable type for this grammar
  1231. println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
  1232. // In addition, generate *_AST variables if building ASTs
  1233. if (grammar.buildAST)
  1234. {
  1235. if (a instanceof GrammarAtom &&
  1236. ((GrammarAtom)a).getASTNodeType() != null )
  1237. {
  1238. GrammarAtom ga = (GrammarAtom)a;
  1239. genASTDeclaration( a, "Ref"+ga.getASTNodeType() );
  1240. }
  1241. else
  1242. {
  1243. genASTDeclaration( a );
  1244. }
  1245. }
  1246. }
  1247. }
  1248. }
  1249. }
  1250. }
  1251. public void genBody(LexerGrammar g) throws IOException
  1252. {
  1253. outputFile = grammar.getClassName() + ".cpp";
  1254. outputLine = 1;
  1255. currentOutput = antlrTool.openOutputFile(outputFile);
  1256. //SAS: changed for proper text file io
  1257. genAST = false; // no way to gen trees.
  1258. saveText = true; // save consumed characters.
  1259. tabs=0;
  1260. // Generate header common to all C++ output files
  1261. genHeader(outputFile);
  1262. printHeaderAction(preIncludeCpp);
  1263. // Generate header specific to lexer C++ file
  1264. println("#include \"" + grammar.getClassName() + ".hpp\"");
  1265. println("#include <antlr/CharBuffer.hpp>");
  1266. println("#include <antlr/TokenStreamException.hpp>");
  1267. println("#include <antlr/TokenStreamIOException.hpp>");
  1268. println("#include <antlr/TokenStreamRecognitionException.hpp>");
  1269. println("#include <antlr/CharStreamException.hpp>");
  1270. println("#include <antlr/CharStreamIOException.hpp>");
  1271. println("#include <antlr/NoViableAltForCharException.hpp>");
  1272. if (grammar.debuggingOutput)
  1273. println("#include <antlr/DebuggingInputBuffer.hpp>");
  1274. println("");
  1275. printHeaderAction(postIncludeCpp);
  1276. if (nameSpace != null)
  1277. nameSpace.emitDeclarations(currentOutput);
  1278. // Generate user-defined lexer file preamble
  1279. printAction(grammar.preambleAction);
  1280. // Generate lexer class definition
  1281. String sup=null;
  1282. if ( grammar.superClass!=null ) {
  1283. sup = grammar.superClass;
  1284. }
  1285. else {
  1286. sup = grammar.getSuperClass();
  1287. if (sup.lastIndexOf('.') != -1)
  1288. sup = sup.substring(sup.lastIndexOf('.')+1);
  1289. sup = namespaceAntlr + sup;
  1290. }
  1291. if( noConstructors )
  1292. {
  1293. println("#if 0");
  1294. println("// constructor creation turned of with 'noConstructor' option");
  1295. }
  1296. //
  1297. // Generate the constructor from InputStream
  1298. //
  1299. println(grammar.getClassName() + "::" + grammar.getClassName() + "(" + namespaceStd + "istream& in)");
  1300. tabs++;
  1301. // if debugging, wrap the input buffer in a debugger
  1302. if (grammar.debuggingOutput)
  1303. println(": " + sup + "(new "+namespaceAntlr+"DebuggingInputBuffer(new "+namespaceAntlr+"CharBuffer(in)),"+g.caseSensitive+")");
  1304. else
  1305. println(": " + sup + "(new "+namespaceAntlr+"CharBuffer(in),"+g.caseSensitive+")");
  1306. tabs--;
  1307. println("{");
  1308. tabs++;
  1309. // if debugging, set up array variables and call user-overridable
  1310. // debugging setup method
  1311. if ( grammar.debuggingOutput ) {
  1312. println("setRuleNames(_ruleNames);");
  1313. println("setSemPredNames(_semPredNames);");
  1314. println("setupDebugging();");
  1315. }
  1316. // println("setCaseSensitive("+g.caseSensitive+");");
  1317. println("initLiterals();");
  1318. tabs--;
  1319. println("}");
  1320. println("");
  1321. // Generate the constructor from InputBuffer
  1322. println(grammar.getClassName() + "::" + grammar.getClassName() + "("+namespaceAntlr+"InputBuffer& ib)");
  1323. tabs++;
  1324. // if debugging, wrap the input buffer in a debugger
  1325. if (grammar.debuggingOutput)
  1326. println(": " + sup + "(new "+namespaceAntlr+"DebuggingInputBuffer(ib),"+g.caseSensitive+")");
  1327. else
  1328. println(": " + sup + "(ib,"+g.caseSensitive+")");
  1329. tabs--;
  1330. println("{");
  1331. tabs++;
  1332. // if debugging, set up array variables and call user-overridable
  1333. // debugging setup method
  1334. if ( grammar.debuggingOutput ) {
  1335. println("setRuleNames(_ruleNames);");
  1336. println("setSemPredNames(_semPredNames);");
  1337. println("setupDebugging();");
  1338. }
  1339. // println("setCaseSensitive("+g.caseSensitive+");");
  1340. println("initLiterals();");
  1341. tabs--;
  1342. println("}");
  1343. println("");
  1344. // Generate the constructor from LexerSharedInputState
  1345. println(grammar.getClassName() + "::" + grammar.getClassName() + "(const "+namespaceAntlr+"LexerSharedInputState& state)");
  1346. tabs++;
  1347. println(": " + sup + "(state,"+g.caseSensitive+")");
  1348. tabs--;
  1349. println("{");
  1350. tabs++;
  1351. // if debugging, set up array variables and call user-overridable
  1352. // debugging setup method
  1353. if ( grammar.debuggingOutput ) {
  1354. println("setRuleNames(_ruleNames);");
  1355. println("setSemPredNames(_semPredNames);");
  1356. println("setupDebugging();");
  1357. }
  1358. // println("setCaseSensitive("+g.caseSensitive+");");
  1359. println("initLiterals();");
  1360. tabs--;
  1361. println("}");
  1362. println("");
  1363. if( noConstructors )
  1364. {
  1365. println("// constructor creation turned of with 'noConstructor' option");
  1366. println("#endif");
  1367. }
  1368. println("void " + grammar.getClassName() + "::initLiterals()");
  1369. println("{");
  1370. tabs++;
  1371. // Generate the initialization of the map
  1372. // containing the string literals used in the lexer
  1373. // The literals variable itself is in CharScanner
  1374. Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
  1375. while ( keys.hasMoreElements() ) {
  1376. String key = (String)keys.nextElement();
  1377. if ( key.charAt(0) != '"' ) {
  1378. continue;
  1379. }
  1380. TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
  1381. if ( sym instanceof StringLiteralSymbol ) {
  1382. StringLiteralSymbol s = (StringLiteralSymbol)sym;
  1383. println("literals["+s.getId()+"] = "+s.getTokenType()+";");
  1384. }
  1385. }
  1386. // Generate the setting of various generated options.
  1387. tabs--;
  1388. println("}");
  1389. Enumeration ids;
  1390. // generate the rule name array for debugging
  1391. if (grammar.debuggingOutput) {
  1392. println("const char* "+grammar.getClassName()+"::_ruleNames[] = {");
  1393. tabs++;
  1394. ids = grammar.rules.elements();
  1395. int ruleNum=0;
  1396. while ( ids.hasMoreElements() ) {
  1397. GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
  1398. if ( sym instanceof RuleSymbol)
  1399. println("\""+((RuleSymbol)sym).getId()+"\",");
  1400. }
  1401. println("0");
  1402. tabs--;
  1403. println("};");
  1404. }
  1405. // Generate nextToken() rule.
  1406. // nextToken() is a synthetic lexer rule that is the implicit OR of all
  1407. // user-defined lexer rules.
  1408. genNextToken();
  1409. // Generate code for each rule in the lexer
  1410. ids = grammar.rules.elements();
  1411. int ruleNum=0;
  1412. while ( ids.hasMoreElements() ) {
  1413. RuleSymbol sym = (RuleSymbol) ids.nextElement();
  1414. // Don't generate the synthetic rules
  1415. if (!sym.getId().equals("mnextToken")) {
  1416. genRule(sym, false, ruleNum++, grammar.getClassName() + "::");
  1417. }
  1418. exitIfError();
  1419. }
  1420. // Generate the semantic predicate map for debugging
  1421. if (grammar.debuggingOutput)
  1422. genSemPredMap(grammar.getClassName() + "::");
  1423. // Generate the bitsets used throughout the lexer
  1424. genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size(), grammar.getClassName() + "::" );
  1425. println("");
  1426. if (nameSpace != null)
  1427. nameSpace.emitClosures(currentOutput);
  1428. // Close the lexer output stream
  1429. currentOutput.close();
  1430. currentOutput = null;
  1431. }
  1432. public void genInitFactory( Grammar g )
  1433. {
  1434. // Generate the method to initialize an ASTFactory when we're
  1435. // building AST's
  1436. String param_name = "factory ";
  1437. if( ! g.buildAST )
  1438. param_name = "";
  1439. println("void "+ g.getClassName() + "::initializeASTFactory( "+namespaceAntlr+"ASTFactory& "+param_name+")");
  1440. println("{");
  1441. tabs++;
  1442. if( g.buildAST )
  1443. {
  1444. // sort out custom AST types... synchronize token manager with token
  1445. // specs on rules (and other stuff we were able to see from
  1446. // action.g) (imperfect of course)
  1447. TokenManager tm = grammar.tokenManager;
  1448. Enumeration tokens = tm.getTokenSymbolKeys();
  1449. while( tokens.hasMoreElements() )
  1450. {
  1451. String tok = (String)tokens.nextElement();
  1452. TokenSymbol ts = tm.getTokenSymbol(tok);
  1453. // if we have a custom type and there's not a more local override
  1454. // of the tokentype then mark this as the type for the tokentype
  1455. if( ts.getASTNodeType() != null )
  1456. {
  1457. // ensure capacity with this pseudo vector...
  1458. astTypes.ensureCapacity(ts.getTokenType());
  1459. String type = (String)astTypes.elementAt(ts.getTokenType());
  1460. if( type == null )
  1461. astTypes.setElementAt(ts.getASTNodeType(),ts.getTokenType());
  1462. else
  1463. {
  1464. // give a warning over action taken if the types are unequal
  1465. if( ! ts.getASTNodeType().equals(type) )
  1466. {
  1467. antlrTool.warning("Token "+tok+" taking most specific AST type",grammar.getFilename(),1,1);
  1468. antlrTool.warning(" using "+type+" ignoring "+ts.getASTNod

Large files files are truncated, but you can click here to view the full file