PageRenderTime 76ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/antlr-2.7.5/antlr/PythonCodeGenerator.java

https://github.com/boo/boo-lang
Java | 4107 lines | 3929 code | 78 blank | 100 comment | 55 complexity | 82d84ee163e77c6b587e424dcb8da8da MD5 | raw file
Possible License(s): GPL-2.0
  1. // This file is part of PyANTLR. See LICENSE.txt for license
  2. // details..........Copyright (C) Wolfgang Haefelinger, 2004.
  3. //
  4. // $Id:$
  5. package antlr;
  6. import java.util.Enumeration;
  7. import java.util.Hashtable;
  8. import antlr.collections.impl.BitSet;
  9. import antlr.collections.impl.Vector;
  10. import java.io.PrintWriter; //SAS: changed for proper text file io
  11. import java.io.IOException;
  12. import java.io.FileWriter;
  13. /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
  14. public class PythonCodeGenerator extends CodeGenerator {
  15. // non-zero if inside syntactic predicate generation
  16. protected int syntacticPredLevel = 0;
  17. // Are we generating ASTs (for parsers and tree parsers) right now?
  18. protected boolean genAST = false;
  19. // Are we saving the text consumed (for lexers) right now?
  20. protected boolean saveText = false;
  21. // Grammar parameters set up to handle different grammar classes.
  22. // These are used to get instanceof tests out of code generation
  23. String labeledElementType;
  24. String labeledElementASTType;
  25. String labeledElementInit;
  26. String commonExtraArgs;
  27. String commonExtraParams;
  28. String commonLocalVars;
  29. String lt1Value;
  30. String exceptionThrown;
  31. String throwNoViable;
  32. public static final String initHeaderAction = "__init__";
  33. public static final String mainHeaderAction = "__main__";
  34. String lexerClassName;
  35. String parserClassName;
  36. String treeWalkerClassName;
  37. /** Tracks the rule being generated. Used for mapTreeId */
  38. RuleBlock currentRule;
  39. /** Tracks the rule or labeled subrule being generated. Used for
  40. AST generation. */
  41. String currentASTResult;
  42. /** Mapping between the ids used in the current alt, and the
  43. * names of variables used to represent their AST values.
  44. */
  45. Hashtable treeVariableMap = new Hashtable();
  46. /** Used to keep track of which AST variables have been defined in a rule
  47. * (except for the #rule_name and #rule_name_in var's
  48. */
  49. Hashtable declaredASTVariables = new Hashtable();
  50. /* Count of unnamed generated variables */
  51. int astVarNumber = 1;
  52. /** Special value used to mark duplicate in treeVariableMap */
  53. protected static final String NONUNIQUE = new String();
  54. public static final int caseSizeThreshold = 127; // ascii is max
  55. private Vector semPreds;
  56. /** Create a Java code-generator using the given Grammar.
  57. * The caller must still call setTool, setBehavior, and setAnalyzer
  58. * before generating code.
  59. */
  60. protected void printTabs() {
  61. for (int i = 0; i < tabs; i++) {
  62. // don't print tabs ever - replace a tab by ' '
  63. currentOutput.print(" ");
  64. }
  65. }
  66. public PythonCodeGenerator() {
  67. super();
  68. charFormatter = new antlr.PythonCharFormatter();
  69. DEBUG_CODE_GENERATOR = true;
  70. }
  71. /** Adds a semantic predicate string to the sem pred vector
  72. These strings will be used to build an array of sem pred names
  73. when building a debugging parser. This method should only be
  74. called when the debug option is specified
  75. */
  76. protected int addSemPred(String predicate) {
  77. semPreds.appendElement(predicate);
  78. return semPreds.size() - 1;
  79. }
  80. public void exitIfError() {
  81. if (antlrTool.hasError()) {
  82. antlrTool.fatalError("Exiting due to errors.");
  83. }
  84. }
  85. protected void checkCurrentOutputStream() {
  86. try
  87. {
  88. if(currentOutput == null)
  89. throw new NullPointerException();
  90. }
  91. catch(Exception e)
  92. {
  93. System.err.println("error: current output is not set");
  94. e.printStackTrace(System.err);
  95. System.exit(1);
  96. }
  97. }
  98. /** Get the identifier portion of an argument-action.
  99. * For Python the ID of an action is assumed to be everything before
  100. * the assignment, as Python does not support a type.
  101. * @param s The action text
  102. * @param line Line used for error reporting.
  103. * @param column Line used for error reporting.
  104. * @return A string containing the text of the identifier
  105. */
  106. protected String extractIdOfAction(String s, int line, int column) {
  107. s = removeAssignmentFromDeclaration(s);
  108. //wh: removeAssignmentFromDeclaration returns an indentifier that
  109. //wh: may start with whitespace.
  110. s = s.trim();
  111. // println("###ZZZZZ \""+s+"\"");
  112. return s;
  113. }
  114. /** Get the type portion of an argument-action.
  115. * Python does not have a type declaration before an identifier, so we
  116. * just return the empty string.
  117. * @param s The action text
  118. * @param line Line used for error reporting.
  119. * @return A string containing the text of the type
  120. */
  121. protected String extractTypeOfAction(String s, int line, int column) {
  122. return "";
  123. }
  124. protected void flushTokens() {
  125. try
  126. {
  127. boolean generated = false;
  128. checkCurrentOutputStream();
  129. println("");
  130. println("### import antlr.Token ");
  131. println("from antlr import Token");
  132. println("### >>>The Known Token Types <<<");
  133. /* save current stream */
  134. PrintWriter cout = currentOutput;
  135. // Loop over all token managers (some of which are lexers)
  136. Enumeration tmIter =
  137. behavior.tokenManagers.elements();
  138. while (tmIter.hasMoreElements())
  139. {
  140. TokenManager tm =
  141. (TokenManager)tmIter.nextElement();
  142. if (!tm.isReadOnly())
  143. {
  144. // Write the token manager tokens as Java
  145. // this must appear before genTokenInterchange so that
  146. // labels are set on string literals
  147. if(! generated) {
  148. genTokenTypes(tm);
  149. generated = true;
  150. }
  151. /* restore stream */
  152. currentOutput = cout;
  153. // Write the token manager tokens as plain text
  154. genTokenInterchange(tm);
  155. currentOutput = cout;
  156. }
  157. exitIfError();
  158. }
  159. }
  160. catch(Exception e) {
  161. exitIfError();
  162. }
  163. checkCurrentOutputStream();
  164. println("");
  165. }
  166. /**Generate the parser, lexer, treeparser, and token types in Java */
  167. public void gen() {
  168. // Do the code generation
  169. try {
  170. // Loop over all grammars
  171. Enumeration grammarIter = behavior.grammars.elements();
  172. while (grammarIter.hasMoreElements()) {
  173. Grammar g = (Grammar)grammarIter.nextElement();
  174. // Connect all the components to each other
  175. g.setGrammarAnalyzer(analyzer);
  176. g.setCodeGenerator(this);
  177. analyzer.setGrammar(g);
  178. // To get right overloading behavior across hetrogeneous grammars
  179. setupGrammarParameters(g);
  180. g.generate();
  181. // print out the grammar with lookahead sets (and FOLLOWs)
  182. // System.out.print(g.toString());
  183. exitIfError();
  184. }
  185. }
  186. catch (IOException e) {
  187. antlrTool.reportException(e, null);
  188. }
  189. }
  190. /** Generate code for the given grammar element.
  191. * @param blk The {...} action to generate
  192. */
  193. public void gen(ActionElement action) {
  194. if (action.isSemPred) {
  195. genSemPred(action.actionText, action.line);
  196. }
  197. else
  198. {
  199. if (grammar.hasSyntacticPredicate) {
  200. println("if not self.inputState.guessing:");
  201. tabs++;
  202. }
  203. // get the name of the followSet for the current rule so that we
  204. // can replace $FOLLOW in the .g file.
  205. ActionTransInfo tInfo = new ActionTransInfo();
  206. String actionStr = processActionForSpecialSymbols(action.actionText,
  207. action.getLine(),
  208. currentRule,
  209. tInfo);
  210. if (tInfo.refRuleRoot != null) {
  211. // Somebody referenced "#rule", make sure translated var is valid
  212. // assignment to #rule is left as a ref also, meaning that assignments
  213. // with no other refs like "#rule = foo();" still forces this code to be
  214. // generated (unnecessarily).
  215. println(tInfo.refRuleRoot + " = currentAST.root");
  216. }
  217. // dump the translated action
  218. printAction(actionStr);
  219. if (tInfo.assignToRoot) {
  220. // Somebody did a "#rule=", reset internal currentAST.root
  221. println("currentAST.root = " + tInfo.refRuleRoot + "");
  222. println("if (" + tInfo.refRuleRoot + " != None) and (" + tInfo.refRuleRoot + ".getFirstChild() != None):");
  223. tabs++;
  224. println("currentAST.child = " + tInfo.refRuleRoot + ".getFirstChild()");
  225. tabs--;
  226. println("else:");
  227. tabs++;
  228. println("currentAST.child = " + tInfo.refRuleRoot);
  229. tabs--;
  230. println("currentAST.advanceChildToEnd()");
  231. }
  232. if (grammar.hasSyntacticPredicate) {
  233. tabs--;
  234. }
  235. }
  236. }
  237. /** Generate code for the given grammar element.
  238. * @param blk The "x|y|z|..." block to generate
  239. */
  240. public void gen(AlternativeBlock blk) {
  241. if (DEBUG_CODE_GENERATOR) System.out.println("gen(" + blk + ")");
  242. genBlockPreamble(blk);
  243. genBlockInitAction(blk);
  244. // Tell AST generation to build subrule result
  245. String saveCurrentASTResult = currentASTResult;
  246. if (blk.getLabel() != null) {
  247. currentASTResult = blk.getLabel();
  248. }
  249. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  250. {
  251. int _tabs_ = tabs;
  252. PythonBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
  253. genBlockFinish(howToFinish, throwNoViable);
  254. tabs = _tabs_;
  255. }
  256. // Restore previous AST generation
  257. currentASTResult = saveCurrentASTResult;
  258. }
  259. /** Generate code for the given grammar element.
  260. * @param blk The block-end element to generate. Block-end
  261. * elements are synthesized by the grammar parser to represent
  262. * the end of a block.
  263. */
  264. public void gen(BlockEndElement end) {
  265. if (DEBUG_CODE_GENERATOR) System.out.println("genRuleEnd(" + end + ")");
  266. }
  267. /** Generate code for the given grammar element.
  268. * @param blk The character literal reference to generate
  269. */
  270. public void gen(CharLiteralElement atom) {
  271. if (DEBUG_CODE_GENERATOR) System.out.println("genChar(" + atom + ")");
  272. if (atom.getLabel() != null) {
  273. println(atom.getLabel() + " = " + lt1Value );
  274. }
  275. boolean oldsaveText = saveText;
  276. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  277. genMatch(atom);
  278. saveText = oldsaveText;
  279. }
  280. String toString(boolean v) {
  281. String s;
  282. if(v)
  283. s = "True";
  284. else
  285. s = "False";
  286. return s;
  287. }
  288. /** Generate code for the given grammar element.
  289. * @param blk The character-range reference to generate
  290. */
  291. public void gen(CharRangeElement r) {
  292. if (r.getLabel() != null && syntacticPredLevel == 0) {
  293. println(r.getLabel() + " = " + lt1Value);
  294. }
  295. boolean flag = ( grammar instanceof LexerGrammar &&
  296. ( !saveText ||
  297. r.getAutoGenType() ==
  298. GrammarElement.AUTO_GEN_BANG ) );
  299. if (flag) {
  300. println("_saveIndex = self.text.length()");
  301. }
  302. println("self.matchRange(u" + r.beginText + ", u" + r.endText + ")");
  303. if (flag) {
  304. println("self.text.setLength(_saveIndex)");
  305. }
  306. }
  307. /** Generate the lexer Java file */
  308. public void gen(LexerGrammar g) throws IOException
  309. {
  310. // If debugging, create a new sempred vector for this grammar
  311. if (g.debuggingOutput)
  312. semPreds = new Vector();
  313. setGrammar(g);
  314. if (!(grammar instanceof LexerGrammar)) {
  315. antlrTool.panic("Internal error generating lexer");
  316. }
  317. // SAS: moved output creation to method so a subclass can change
  318. // how the output is generated (for VAJ interface)
  319. setupOutput(grammar.getClassName());
  320. genAST = false; // no way to gen trees.
  321. saveText = true; // save consumed characters.
  322. tabs = 0;
  323. // Generate header common to all Python output files
  324. genHeader();
  325. // Generate header specific to lexer Python file
  326. println("### import antlr and other modules ..");
  327. println("import sys");
  328. println("import antlr");
  329. println("");
  330. println("version = sys.version.split()[0]");
  331. println("if version < '2.2.1':");
  332. tabs++;
  333. println("False = 0");
  334. tabs--;
  335. println("if version < '2.3':");
  336. tabs++;
  337. println("True = not False");
  338. tabs--;
  339. println("### header action >>> ");
  340. printActionCode(behavior.getHeaderAction(""),0);
  341. println("### header action <<< ");
  342. // Generate user-defined lexer file preamble
  343. println("### preamble action >>> ");
  344. printActionCode(grammar.preambleAction.getText(),0);
  345. println("### preamble action <<< ");
  346. // Generate lexer class definition
  347. String sup = null;
  348. if (grammar.superClass != null) {
  349. sup = grammar.superClass;
  350. }
  351. else {
  352. sup = "antlr." + grammar.getSuperClass();
  353. }
  354. // get prefix (replaces "public" and lets user specify)
  355. String prefix = "";
  356. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  357. if (tprefix != null) {
  358. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  359. if (p != null) {
  360. prefix = p;
  361. }
  362. }
  363. // print my literals
  364. println("### >>>The Literals<<<");
  365. println("literals = {}");
  366. Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
  367. while (keys.hasMoreElements()) {
  368. String key = (String)keys.nextElement();
  369. if (key.charAt(0) != '"') {
  370. continue;
  371. }
  372. TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
  373. if (sym instanceof StringLiteralSymbol) {
  374. StringLiteralSymbol s = (StringLiteralSymbol)sym;
  375. println("literals[u" + s.getId() + "] = " + s.getTokenType());
  376. }
  377. }
  378. println("");
  379. flushTokens();
  380. // print javadoc comment if any
  381. genJavadocComment(grammar);
  382. // class name remains the same, it's the module that changes in python.
  383. println("class " + lexerClassName + "(" + sup + ") :");
  384. tabs++;
  385. printGrammarAction(grammar);
  386. // Generate the constructor from InputStream, which in turn
  387. // calls the ByteBuffer constructor
  388. //
  389. println("def __init__(self, *argv, **kwargs) :");
  390. tabs++;
  391. println(sup + ".__init__(self, *argv, **kwargs)");
  392. // Generate the setting of various generated options.
  393. // These need to be before the literals since ANTLRHashString depends on
  394. // the casesensitive stuff.
  395. println("self.caseSensitiveLiterals = " + toString(g.caseSensitiveLiterals));
  396. println("self.setCaseSensitive(" + toString(g.caseSensitive) + ")" );
  397. println("self.literals = literals");
  398. Enumeration ids;
  399. // generate the rule name array for debugging
  400. if (grammar.debuggingOutput) {
  401. println("ruleNames[] = [");
  402. ids = grammar.rules.elements();
  403. int ruleNum = 0;
  404. tabs++;
  405. while (ids.hasMoreElements()) {
  406. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  407. if (sym instanceof RuleSymbol)
  408. println("\"" + ((RuleSymbol)sym).getId() + "\",");
  409. }
  410. tabs--;
  411. println("]");
  412. }
  413. genHeaderInit(grammar);
  414. tabs--;
  415. // wh: iterator moved to base class as proposed by mk.
  416. // println("");
  417. // Generate the __iter__ method for Python CharScanner (sub)classes.
  418. // genIterator();
  419. // Generate nextToken() rule.
  420. // nextToken() is a synthetic lexer rule that is the implicit OR of all
  421. // user-defined lexer rules.
  422. genNextToken();
  423. println("");
  424. // Generate code for each rule in the lexer
  425. ids = grammar.rules.elements();
  426. int ruleNum = 0;
  427. while (ids.hasMoreElements()) {
  428. RuleSymbol sym = (RuleSymbol)ids.nextElement();
  429. // Don't generate the synthetic rules
  430. if (!sym.getId().equals("mnextToken")) {
  431. genRule(sym, false, ruleNum++);
  432. }
  433. exitIfError();
  434. }
  435. // Generate the semantic predicate map for debugging
  436. if (grammar.debuggingOutput)
  437. genSemPredMap();
  438. // Generate the bitsets used throughout the lexer
  439. genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());
  440. println("");
  441. genHeaderMain(grammar);
  442. // Close the lexer output stream
  443. currentOutput.close();
  444. currentOutput = null;
  445. }
  446. protected void genHeaderMain(Grammar grammar)
  447. {
  448. String h = grammar.getClassName() + "." + mainHeaderAction;
  449. String s = behavior.getHeaderAction(h);
  450. if (isEmpty(s)) {
  451. s = behavior.getHeaderAction(mainHeaderAction);
  452. }
  453. if(isEmpty(s)) {
  454. if(grammar instanceof LexerGrammar) {
  455. int _tabs = tabs;
  456. tabs = 0;
  457. println("### __main__ header action >>> ");
  458. genLexerTest();
  459. tabs = 0;
  460. println("### __main__ header action <<< ");
  461. tabs = _tabs;
  462. }
  463. } else {
  464. int _tabs = tabs;
  465. tabs = 0;
  466. println("");
  467. println("### __main__ header action >>> ");
  468. printMainFunc(s);
  469. tabs = 0;
  470. println("### __main__ header action <<< ");
  471. tabs = _tabs;
  472. }
  473. }
  474. protected void genHeaderInit(Grammar grammar)
  475. {
  476. String h = grammar.getClassName() + "." + initHeaderAction;
  477. String s = behavior.getHeaderAction(h);
  478. if (isEmpty(s)) {
  479. s = behavior.getHeaderAction(initHeaderAction);
  480. }
  481. if(isEmpty(s)) {
  482. /* nothing gets generated by default */
  483. } else {
  484. int _tabs = tabs;
  485. println("### __init__ header action >>> ");
  486. printActionCode(s,0);
  487. tabs = _tabs;
  488. println("### __init__ header action <<< ");
  489. }
  490. }
  491. protected void printMainFunc(String s) {
  492. int _tabs = tabs;
  493. tabs = 0;
  494. println("if __name__ == '__main__':");
  495. tabs++;
  496. printActionCode(s,0);
  497. tabs--;
  498. tabs = _tabs;
  499. }
  500. /** Generate code for the given grammar element.
  501. * @param blk The (...)+ block to generate
  502. */
  503. public void gen(OneOrMoreBlock blk) {
  504. String label;
  505. String cnt;
  506. /* save current tabs */
  507. int _tabs_ = tabs;
  508. genBlockPreamble(blk);
  509. if (blk.getLabel() != null)
  510. {
  511. cnt = "_cnt_" + blk.getLabel();
  512. }
  513. else {
  514. cnt = "_cnt" + blk.ID;
  515. }
  516. println("" + cnt + "= 0");
  517. println("while True:");
  518. tabs++;
  519. _tabs_ = tabs;
  520. // generate the init action for ()+ ()* inside the loop
  521. // this allows us to do usefull EOF checking...
  522. genBlockInitAction(blk);
  523. // Tell AST generation to build subrule result
  524. String saveCurrentASTResult = currentASTResult;
  525. if (blk.getLabel() != null) {
  526. currentASTResult = blk.getLabel();
  527. }
  528. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  529. // generate exit test if greedy set to false
  530. // and an alt is ambiguous with exit branch
  531. // or when lookahead derived purely from end-of-file
  532. // Lookahead analysis stops when end-of-file is hit,
  533. // returning set {epsilon}. Since {epsilon} is not
  534. // ambig with any real tokens, no error is reported
  535. // by deterministic() routines and we have to check
  536. // for the case where the lookahead depth didn't get
  537. // set to NONDETERMINISTIC (this only happens when the
  538. // FOLLOW contains real atoms + epsilon).
  539. boolean generateNonGreedyExitPath = false;
  540. int nonGreedyExitDepth = grammar.maxk;
  541. if (!blk.greedy &&
  542. blk.exitLookaheadDepth <= grammar.maxk &&
  543. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon())
  544. {
  545. generateNonGreedyExitPath = true;
  546. nonGreedyExitDepth = blk.exitLookaheadDepth;
  547. }
  548. else
  549. {
  550. if (!blk.greedy &&
  551. blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
  552. generateNonGreedyExitPath = true;
  553. }
  554. }
  555. // generate exit test if greedy set to false
  556. // and an alt is ambiguous with exit branch
  557. if (generateNonGreedyExitPath)
  558. {
  559. println("### nongreedy (...)+ loop; exit depth is " + blk.exitLookaheadDepth);
  560. String predictExit =
  561. getLookaheadTestExpression(
  562. blk.exitCache,
  563. nonGreedyExitDepth);
  564. println("### nongreedy exit test");
  565. println("if " + cnt + " >= 1 and " + predictExit + ":");
  566. tabs++;
  567. println("break");
  568. tabs--;
  569. }
  570. {
  571. int _tabs = tabs;
  572. PythonBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  573. genBlockFinish(howToFinish, "break");
  574. tabs = _tabs;
  575. }
  576. /* no matter what previous block did, here we have to continue
  577. ** one the 'while block' level. Reseting tabs .. */
  578. tabs = _tabs_;
  579. println(cnt + " += 1");
  580. tabs = _tabs_;
  581. tabs--;
  582. println("if " + cnt + " < 1:");
  583. tabs++;
  584. println(throwNoViable);
  585. tabs--;
  586. // Restore previous AST generation
  587. currentASTResult = saveCurrentASTResult;
  588. }
  589. /** Generate the parser Java file */
  590. public void gen(ParserGrammar g)
  591. throws IOException {
  592. // if debugging, set up a new vector to keep track of sempred
  593. // strings for this grammar
  594. if (g.debuggingOutput)
  595. semPreds = new Vector();
  596. setGrammar(g);
  597. if (!(grammar instanceof ParserGrammar)) {
  598. antlrTool.panic("Internal error generating parser");
  599. }
  600. // Open the output stream for the parser and set the currentOutput
  601. // SAS: moved file setup so subclass could do it (for VAJ interface)
  602. setupOutput(grammar.getClassName());
  603. genAST = grammar.buildAST;
  604. tabs = 0;
  605. // Generate the header common to all output files.
  606. genHeader();
  607. // Generate header specific to lexer Java file
  608. println("### import antlr and other modules ..");
  609. println("import sys");
  610. println("import antlr");
  611. println("");
  612. println("version = sys.version.split()[0]");
  613. println("if version < '2.2.1':");
  614. tabs++;
  615. println("False = 0");
  616. tabs--;
  617. println("if version < '2.3':");
  618. tabs++;
  619. println("True = not False");
  620. tabs--;
  621. println("### header action >>> ");
  622. printActionCode(behavior.getHeaderAction(""),0);
  623. println("### header action <<< ");
  624. println("### preamble action>>>");
  625. // Output the user-defined parser preamble
  626. printActionCode(grammar.preambleAction.getText(),0);
  627. println("### preamble action <<<");
  628. flushTokens();
  629. // Generate parser class definition
  630. String sup = null;
  631. if (grammar.superClass != null)
  632. sup = grammar.superClass;
  633. else
  634. sup = "antlr." + grammar.getSuperClass();
  635. // print javadoc comment if any
  636. genJavadocComment(grammar);
  637. // get prefix (replaces "public" and lets user specify)
  638. String prefix = "";
  639. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  640. if (tprefix != null) {
  641. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  642. if (p != null) {
  643. prefix = p;
  644. }
  645. }
  646. print("class " + parserClassName + "(" + sup);
  647. println("):");
  648. tabs++;
  649. // set up an array of all the rule names so the debugger can
  650. // keep track of them only by number -- less to store in tree...
  651. if (grammar.debuggingOutput) {
  652. println("_ruleNames = [");
  653. Enumeration ids = grammar.rules.elements();
  654. int ruleNum = 0;
  655. tabs++;
  656. while (ids.hasMoreElements()) {
  657. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  658. if (sym instanceof RuleSymbol)
  659. println("\"" + ((RuleSymbol)sym).getId() + "\",");
  660. }
  661. tabs--;
  662. println("]");
  663. }
  664. // Generate user-defined parser class members
  665. printGrammarAction(grammar);
  666. // Generate parser class constructor from TokenBuffer
  667. println("");
  668. println("def __init__(self, *args, **kwargs):");
  669. tabs++;
  670. println(sup + ".__init__(self, *args, **kwargs)");
  671. println("self.tokenNames = _tokenNames");
  672. // if debugging, set up arrays and call the user-overridable
  673. // debugging setup method
  674. if (grammar.debuggingOutput) {
  675. println("self.ruleNames = _ruleNames");
  676. println("self.semPredNames = _semPredNames");
  677. println("self.setupDebugging(self.tokenBuf)");
  678. }
  679. if (grammar.buildAST) {
  680. println("self.buildTokenTypeASTClassMap()");
  681. println("self.astFactory = antlr.ASTFactory(self.getTokenTypeToASTClassMap())");
  682. if(labeledElementASTType != null)
  683. {
  684. println("self.astFactory.setASTNodeClass("+
  685. labeledElementASTType+")");
  686. }
  687. }
  688. genHeaderInit(grammar);
  689. println("");
  690. // Generate code for each rule in the grammar
  691. Enumeration ids = grammar.rules.elements();
  692. int ruleNum = 0;
  693. while (ids.hasMoreElements()) {
  694. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  695. if (sym instanceof RuleSymbol) {
  696. RuleSymbol rs = (RuleSymbol)sym;
  697. genRule(rs, rs.references.size() == 0, ruleNum++);
  698. }
  699. exitIfError();
  700. }
  701. if ( grammar.buildAST ) {
  702. genTokenASTNodeMap();
  703. }
  704. // Generate the token names
  705. genTokenStrings();
  706. // Generate the bitsets used throughout the grammar
  707. genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
  708. // Generate the semantic predicate map for debugging
  709. if (grammar.debuggingOutput)
  710. genSemPredMap();
  711. // Close class definition
  712. println("");
  713. tabs = 0;
  714. genHeaderMain(grammar);
  715. // Close the parser output stream
  716. currentOutput.close();
  717. currentOutput = null;
  718. }
  719. /** Generate code for the given grammar element.
  720. * @param blk The rule-reference to generate
  721. */
  722. public void gen(RuleRefElement rr) {
  723. if (DEBUG_CODE_GENERATOR) System.out.println("genRR(" + rr + ")");
  724. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
  725. if (rs == null || !rs.isDefined()) {
  726. // Is this redundant???
  727. antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
  728. return;
  729. }
  730. if (!(rs instanceof RuleSymbol)) {
  731. // Is this redundant???
  732. antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
  733. return;
  734. }
  735. genErrorTryForElement(rr);
  736. // AST value for labeled rule refs in tree walker.
  737. // This is not AST construction; it is just the input tree node value.
  738. if (grammar instanceof TreeWalkerGrammar &&
  739. rr.getLabel() != null &&
  740. syntacticPredLevel == 0) {
  741. println(rr.getLabel() + " = antlr.ifelse(_t == antlr.ASTNULL, None, " + lt1Value + ")");
  742. }
  743. // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
  744. if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  745. println("_saveIndex = self.text.length()");
  746. }
  747. // Process return value assignment if any
  748. printTabs();
  749. if (rr.idAssign != null) {
  750. // Warn if the rule has no return type
  751. if (rs.block.returnAction == null) {
  752. antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
  753. }
  754. _print(rr.idAssign + "=");
  755. }
  756. else {
  757. // Warn about return value if any, but not inside syntactic predicate
  758. if (!(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) {
  759. antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
  760. }
  761. }
  762. // Call the rule
  763. GenRuleInvocation(rr);
  764. // if in lexer and ! on element or alt or rule, save buffer index to kill later
  765. if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  766. println("self.text.setLength(_saveIndex)");
  767. }
  768. // if not in a syntactic predicate
  769. if (syntacticPredLevel == 0) {
  770. boolean doNoGuessTest = (
  771. grammar.hasSyntacticPredicate && (
  772. grammar.buildAST && rr.getLabel() != null ||
  773. (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
  774. )
  775. );
  776. if (doNoGuessTest) {
  777. // println("if (inputState.guessing==0) {");
  778. // tabs++;
  779. }
  780. if (grammar.buildAST && rr.getLabel() != null) {
  781. // always gen variable for rule return on labeled rules
  782. println(rr.getLabel() + "_AST = self.returnAST");
  783. }
  784. if (genAST) {
  785. switch (rr.getAutoGenType()) {
  786. case GrammarElement.AUTO_GEN_NONE:
  787. println("self.addASTChild(currentAST, self.returnAST)");
  788. break;
  789. case GrammarElement.AUTO_GEN_CARET:
  790. antlrTool.error("Internal: encountered ^ after rule reference");
  791. break;
  792. default:
  793. break;
  794. }
  795. }
  796. // if a lexer and labeled, Token label defined at rule level, just set it here
  797. if (grammar instanceof LexerGrammar && rr.getLabel() != null) {
  798. println(rr.getLabel() + " = self._returnToken");
  799. }
  800. if (doNoGuessTest) {
  801. }
  802. }
  803. genErrorCatchForElement(rr);
  804. }
  805. /** Generate code for the given grammar element.
  806. * @param blk The string-literal reference to generate
  807. */
  808. public void gen(StringLiteralElement atom) {
  809. if (DEBUG_CODE_GENERATOR) System.out.println("genString(" + atom + ")");
  810. // Variable declarations for labeled elements
  811. if (atom.getLabel() != null && syntacticPredLevel == 0) {
  812. println(atom.getLabel() + " = " + lt1Value + "");
  813. }
  814. // AST
  815. genElementAST(atom);
  816. // is there a bang on the literal?
  817. boolean oldsaveText = saveText;
  818. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  819. // matching
  820. genMatch(atom);
  821. saveText = oldsaveText;
  822. // tack on tree cursor motion if doing a tree walker
  823. if (grammar instanceof TreeWalkerGrammar) {
  824. println("_t = _t.getNextSibling()");
  825. }
  826. }
  827. /** Generate code for the given grammar element.
  828. * @param blk The token-range reference to generate
  829. */
  830. public void gen(TokenRangeElement r) {
  831. genErrorTryForElement(r);
  832. if (r.getLabel() != null && syntacticPredLevel == 0) {
  833. println(r.getLabel() + " = " + lt1Value);
  834. }
  835. // AST
  836. genElementAST(r);
  837. // match
  838. println("self.matchRange(u" + r.beginText + ", u" + r.endText + ")");
  839. genErrorCatchForElement(r);
  840. }
  841. /** Generate code for the given grammar element.
  842. * @param blk The token-reference to generate
  843. */
  844. public void gen(TokenRefElement atom) {
  845. if (DEBUG_CODE_GENERATOR) System.out.println("genTokenRef(" + atom + ")");
  846. if (grammar instanceof LexerGrammar) {
  847. antlrTool.panic("Token reference found in lexer");
  848. }
  849. genErrorTryForElement(atom);
  850. // Assign Token value to token label variable
  851. if (atom.getLabel() != null && syntacticPredLevel == 0) {
  852. println(atom.getLabel() + " = " + lt1Value + "");
  853. }
  854. // AST
  855. genElementAST(atom);
  856. // matching
  857. genMatch(atom);
  858. genErrorCatchForElement(atom);
  859. // tack on tree cursor motion if doing a tree walker
  860. if (grammar instanceof TreeWalkerGrammar) {
  861. println("_t = _t.getNextSibling()");
  862. }
  863. }
  864. public void gen(TreeElement t) {
  865. // save AST cursor
  866. println("_t" + t.ID + " = _t");
  867. // If there is a label on the root, then assign that to the variable
  868. if (t.root.getLabel() != null) {
  869. println(t.root.getLabel() + " = antlr.ifelse(_t == antlr.ASTNULL, None, _t)");
  870. }
  871. // check for invalid modifiers ! and ^ on tree element roots
  872. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
  873. antlrTool.error("Suffixing a root node with '!' is not implemented",
  874. grammar.getFilename(), t.getLine(), t.getColumn());
  875. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  876. }
  877. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
  878. antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
  879. grammar.getFilename(), t.getLine(), t.getColumn());
  880. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  881. }
  882. // Generate AST variables
  883. genElementAST(t.root);
  884. if (grammar.buildAST) {
  885. // Save the AST construction state
  886. println("_currentAST" + t.ID + " = currentAST.copy()");
  887. // Make the next item added a child of the TreeElement root
  888. println("currentAST.root = currentAST.child");
  889. println("currentAST.child = None");
  890. }
  891. // match root
  892. if ( t.root instanceof WildcardElement ) {
  893. println("if not _t: raise antlr.MismatchedTokenException()");
  894. }
  895. else {
  896. genMatch(t.root);
  897. }
  898. // move to list of children
  899. println("_t = _t.getFirstChild()");
  900. // walk list of children, generating code for each
  901. for (int i = 0; i < t.getAlternatives().size(); i++) {
  902. Alternative a = t.getAlternativeAt(i);
  903. AlternativeElement e = a.head;
  904. while (e != null) {
  905. e.generate();
  906. e = e.next;
  907. }
  908. }
  909. if (grammar.buildAST) {
  910. // restore the AST construction state to that just after the
  911. // tree root was added
  912. println("currentAST = _currentAST" + t.ID + "");
  913. }
  914. // restore AST cursor
  915. println("_t = _t" + t.ID + "");
  916. // move cursor to sibling of tree just parsed
  917. println("_t = _t.getNextSibling()");
  918. }
  919. /** Generate the tree-parser Java file */
  920. public void gen(TreeWalkerGrammar g) throws IOException {
  921. // SAS: debugging stuff removed for now...
  922. setGrammar(g);
  923. if (!(grammar instanceof TreeWalkerGrammar)) {
  924. antlrTool.panic("Internal error generating tree-walker");
  925. }
  926. // Open the output stream for the parser and set the currentOutput
  927. // SAS: move file open to method so subclass can override it
  928. // (mainly for VAJ interface)
  929. setupOutput(grammar.getClassName());
  930. genAST = grammar.buildAST;
  931. tabs = 0;
  932. // Generate the header common to all output files.
  933. genHeader();
  934. // Generate header specific to lexer Java file
  935. println("### import antlr and other modules ..");
  936. println("import sys");
  937. println("import antlr");
  938. println("");
  939. println("version = sys.version.split()[0]");
  940. println("if version < '2.2.1':");
  941. tabs++;
  942. println("False = 0");
  943. tabs--;
  944. println("if version < '2.3':");
  945. tabs++;
  946. println("True = not False");
  947. tabs--;
  948. println("### header action >>> ");
  949. printActionCode(behavior.getHeaderAction(""),0);
  950. println("### header action <<< ");
  951. flushTokens();
  952. println("### user code>>>");
  953. // Output the user-defined parser preamble
  954. printActionCode(grammar.preambleAction.getText(),0);
  955. println("### user code<<<");
  956. // Generate parser class definition
  957. String sup = null;
  958. if (grammar.superClass != null) {
  959. sup = grammar.superClass;
  960. }
  961. else {
  962. sup = "antlr." + grammar.getSuperClass();
  963. }
  964. println("");
  965. // get prefix (replaces "public" and lets user specify)
  966. String prefix = "";
  967. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  968. if (tprefix != null) {
  969. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  970. if (p != null) {
  971. prefix = p;
  972. }
  973. }
  974. // print javadoc comment if any
  975. genJavadocComment(grammar);
  976. println("class " + treeWalkerClassName + "(" + sup + "):");
  977. tabs++;
  978. // Generate default parser class constructor
  979. println("");
  980. println("# ctor ..");
  981. println("def __init__(self, *args, **kwargs):");
  982. tabs++;
  983. println(sup + ".__init__(self, *args, **kwargs)");
  984. println("self.tokenNames = _tokenNames");
  985. genHeaderInit(grammar);
  986. tabs--;
  987. println("");
  988. // print grammar specific action
  989. printGrammarAction(grammar);
  990. // Generate code for each rule in the grammar
  991. Enumeration ids = grammar.rules.elements();
  992. int ruleNum = 0;
  993. String ruleNameInits = "";
  994. while (ids.hasMoreElements()) {
  995. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  996. if (sym instanceof RuleSymbol) {
  997. RuleSymbol rs = (RuleSymbol)sym;
  998. genRule(rs, rs.references.size() == 0, ruleNum++);
  999. }
  1000. exitIfError();
  1001. }
  1002. // Generate the token names
  1003. genTokenStrings();
  1004. // Generate the bitsets used throughout the grammar
  1005. genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
  1006. tabs = 0;
  1007. genHeaderMain(grammar);
  1008. // Close the parser output stream
  1009. currentOutput.close();
  1010. currentOutput = null;
  1011. }
  1012. /** Generate code for the given grammar element.
  1013. * @param wc The wildcard element to generate
  1014. */
  1015. public void gen(WildcardElement wc) {
  1016. // Variable assignment for labeled elements
  1017. if (wc.getLabel() != null && syntacticPredLevel == 0) {
  1018. println(wc.getLabel() + " = " + lt1Value + "");
  1019. }
  1020. // AST
  1021. genElementAST(wc);
  1022. // Match anything but EOF
  1023. if (grammar instanceof TreeWalkerGrammar) {
  1024. println("if not _t:");
  1025. tabs++;
  1026. println("raise MismatchedTokenException()");
  1027. tabs--;
  1028. }
  1029. else if (grammar instanceof LexerGrammar) {
  1030. if (grammar instanceof LexerGrammar &&
  1031. (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  1032. println("_saveIndex = self.text.length()");
  1033. }
  1034. println("self.matchNot(antlr.EOF_CHAR)");
  1035. if (grammar instanceof LexerGrammar &&
  1036. (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  1037. println("self.text.setLength(_saveIndex)"); // kill text atom put in buffer
  1038. }
  1039. }
  1040. else {
  1041. println("self.matchNot(" + getValueString(Token.EOF_TYPE,false) + ")");
  1042. }
  1043. // tack on tree cursor motion if doing a tree walker
  1044. if (grammar instanceof TreeWalkerGrammar) {
  1045. println("_t = _t.getNextSibling()");
  1046. }
  1047. }
  1048. /** Generate code for the given grammar element.
  1049. * @param blk The (...)* block to generate
  1050. */
  1051. public void gen(ZeroOrMoreBlock blk) {
  1052. int _tabs_ = tabs;
  1053. genBlockPreamble(blk);
  1054. String label;
  1055. println("while True:");
  1056. tabs++;
  1057. _tabs_ = tabs;
  1058. // generate the init action for ()* inside the loop
  1059. // this allows us to do usefull EOF checking...
  1060. genBlockInitAction(blk);
  1061. // Tell AST generation to build subrule result
  1062. String saveCurrentASTResult = currentASTResult;
  1063. if (blk.getLabel() != null) {
  1064. currentASTResult = blk.getLabel();
  1065. }
  1066. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  1067. // generate exit test if greedy set to false
  1068. // and an alt is ambiguous with exit branch
  1069. // or when lookahead derived purely from end-of-file
  1070. // Lookahead analysis stops when end-of-file is hit,
  1071. // returning set {epsilon}. Since {epsilon} is not
  1072. // ambig with any real tokens, no error is reported
  1073. // by deterministic() routines and we have to check
  1074. // for the case where the lookahead depth didn't get
  1075. // set to NONDETERMINISTIC (this only happens when the
  1076. // FOLLOW contains real atoms + epsilon).
  1077. boolean generateNonGreedyExitPath = false;
  1078. int nonGreedyExitDepth = grammar.maxk;
  1079. if (!blk.greedy &&
  1080. blk.exitLookaheadDepth <= grammar.maxk &&
  1081. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
  1082. generateNonGreedyExitPath = true;
  1083. nonGreedyExitDepth = blk.exitLookaheadDepth;
  1084. }
  1085. else if (!blk.greedy &&
  1086. blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
  1087. generateNonGreedyExitPath = true;
  1088. }
  1089. if (generateNonGreedyExitPath) {
  1090. if (DEBUG_CODE_GENERATOR) {
  1091. System.out.println("nongreedy (...)* loop; exit depth is " +
  1092. blk.exitLookaheadDepth);
  1093. }
  1094. String predictExit =
  1095. getLookaheadTestExpression(blk.exitCache,
  1096. nonGreedyExitDepth);
  1097. println("### nongreedy exit test");
  1098. println("if (" + predictExit + "):");
  1099. tabs++;
  1100. println("break");
  1101. tabs--;
  1102. }
  1103. {
  1104. int _tabs = tabs;
  1105. PythonBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  1106. genBlockFinish(howToFinish, "break");
  1107. tabs = _tabs;
  1108. }
  1109. tabs = _tabs_; /* no matter where we are */
  1110. tabs--;
  1111. // Restore previous AST generation
  1112. currentASTResult = saveCurrentASTResult;
  1113. }
  1114. /** Generate an alternative.
  1115. * @param alt The alternative to generate
  1116. * @param blk The block to which the alternative belongs
  1117. */
  1118. protected void genAlt(Alternative alt, AlternativeBlock blk) {
  1119. // Save the AST generation state, and set it to that of the alt
  1120. boolean savegenAST = genAST;
  1121. genAST = genAST && alt.getAutoGen();
  1122. boolean oldsaveTest = saveText;
  1123. saveText = saveText && alt.getAutoGen();
  1124. // Reset the variable name map for the alternative
  1125. Hashtable saveMap = treeVariableMap;
  1126. treeVariableMap = new Hashtable();
  1127. // Generate try block around the alt for error handling
  1128. if (alt.exceptionSpec != null) {
  1129. println("try:");
  1130. tabs++;
  1131. }
  1132. println("pass"); // make sure that always something gets generated ..
  1133. AlternativeElement elem = alt.head;
  1134. while (!(elem instanceof BlockEndElement)) {
  1135. elem.generate(); // alt can begin with anything. Ask target to gen.
  1136. elem = elem.next;
  1137. }
  1138. if (genAST) {
  1139. if (blk instanceof RuleBlock) {
  1140. // Set the AST return value for the rule
  1141. RuleBlock rblk = (RuleBlock)blk;
  1142. if (grammar.hasSyntacticPredicate) {
  1143. }
  1144. println(rblk.getRuleName() + "_AST = currentAST.root");
  1145. if (grammar.hasSyntacticPredicate) {
  1146. }
  1147. }
  1148. else if (blk.getLabel() != null) {
  1149. antlrTool.warning(
  1150. "Labeled subrules not yet supported",
  1151. grammar.getFilename(), blk.getLine(), blk.getColumn());
  1152. }
  1153. }
  1154. if (alt.exceptionSpec != null) {
  1155. tabs--;
  1156. genErrorHandler(alt.exceptionSpec);
  1157. }
  1158. genAST = savegenAST;
  1159. saveText = oldsaveTest;
  1160. treeVariableMap = saveMap;
  1161. }
  1162. /** Generate all the bitsets to be used in the parser or lexer
  1163. * Generate the raw bitset data like "long _tokenSet1_data[] = {...}"
  1164. * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data)"
  1165. * Note that most languages do not support object initialization inside a
  1166. * class definition, so other code-generators may have to separate the
  1167. * bitset declarations from the initializations (e.g., put the initializations
  1168. * in the generated constructor instead).
  1169. * @param bitsetList The list of bitsets to generate.
  1170. * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
  1171. */
  1172. protected void genBitsets(Vector bitsetList,
  1173. int maxVocabulary
  1174. ) {
  1175. println("");
  1176. for (int i = 0; i < bitsetList.size(); i++) {
  1177. BitSet p = (BitSet)bitsetList.elementAt(i);
  1178. // Ensure that generated BitSet is large enough for vocabulary
  1179. p.growToInclude(maxVocabulary);
  1180. genBitSet(p, i);
  1181. }
  1182. }
  1183. /** Do something simple like:
  1184. * private static final long[] mk_tokenSet_0() {
  1185. * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
  1186. * return data;
  1187. * }
  1188. * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
  1189. *
  1190. * Or, for large bitsets, optimize init so ranges are collapsed into loops.
  1191. * This is most useful for lexers using unicode.
  1192. */
  1193. private void genBitSet(BitSet p, int id) {
  1194. int _tabs_ = tabs;
  1195. // wanna have bitsets on module scope, so they are available
  1196. // when module gets loaded.
  1197. tabs = 0;
  1198. println("");
  1199. println("### generate bit set");
  1200. println(
  1201. "def mk" + getBitsetName(id) + "(): " );
  1202. tabs++;
  1203. int n = p.lengthInLongWords();
  1204. if ( n<BITSET_OPTIMIZE_INIT_THRESHOLD )
  1205. {
  1206. println("### var1");
  1207. println("data = [ " + p.toStringOfWords() + "]");
  1208. }
  1209. else
  1210. {
  1211. // will init manually, allocate space then set values
  1212. println("data = [0L] * " + n + " ### init list");
  1213. long[] elems = p.toPackedArray();
  1214. for (int i = 0; i < elems.length;)
  1215. {
  1216. if ( elems[i]==0 )
  1217. {
  1218. // done automatically by Java, don't waste time/code
  1219. i++;
  1220. continue;
  1221. }
  1222. if ( (i+1)==elems.length || elems[i]!=elems[i+1] )
  1223. {
  1224. // last number or no run of numbers, just dump assignment
  1225. println("data["+ i + "] =" + elems[i] + "L");
  1226. i++;
  1227. continue;
  1228. }
  1229. // scan to find end of run
  1230. int j;
  1231. for (j = i + 1;j < elems.length && elems[j]==elems[i];j++)
  1232. {}
  1233. long e = elems[i];
  1234. // E0007: fixed
  1235. println("for x in xrange(" + i+", " + j + "):");
  1236. tabs++;
  1237. println("data[x] = " + e + "L");
  1238. tabs--;
  1239. i = j;
  1240. }
  1241. }
  1242. println("return data");
  1243. tabs--;
  1244. // BitSet object
  1245. println(
  1246. getBitsetName(id) + " = antlr.BitSet(mk" + getBitsetName(id) + "())" );
  1247. // restore tabs
  1248. tabs = _tabs_;
  1249. }
  1250. private void genBlockFinish(PythonBlockFinishingInfo howToFinish,
  1251. String noViableAction) {
  1252. if (howToFinish.needAnErrorClause &&
  1253. (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
  1254. if (howToFinish.generatedAnIf)
  1255. {
  1256. println("else:" );
  1257. }
  1258. tabs++;
  1259. println(noViableAction);
  1260. tabs--;
  1261. }
  1262. if (howToFinish.postscript != null) {
  1263. println(howToFinish.postscript);
  1264. }
  1265. }
  1266. /* just to be called by nextToken */
  1267. private void genBlockFinish1(PythonBlockFinishingInfo howToFinish,
  1268. String noViableAction) {
  1269. if (howToFinish.needAnErrorClause &&
  1270. (howToFinish.generatedAnIf || howToFinish.generatedSwitch))
  1271. {
  1272. if (howToFinish.generatedAnIf)
  1273. {
  1274. // tabs++;
  1275. println("else:" );
  1276. }
  1277. tabs++;
  1278. println(noViableAction);
  1279. tabs--;
  1280. if (howToFinish.generatedAnIf)
  1281. {
  1282. // tabs--;
  1283. // println("### tabs--");
  1284. }
  1285. }
  1286. if (howToFinish.postscript != null) {
  1287. println(howToFinish.postscript);
  1288. }
  1289. }
  1290. /** Generate the init action for a block, which may be a RuleBlock or a
  1291. * plain AlternativeBLock.
  1292. * @blk The block for which the preamble is to be generated.
  1293. */
  1294. protected void genBlockInitAction(AlternativeBlock blk) {
  1295. // dump out init action
  1296. if (blk.initAction != null) {
  1297. printAction(processActionForSpecialSymbols(blk.initAction, blk.getLine(), currentRule, null));
  1298. }
  1299. }
  1300. /** Generate the header for a block, which may be a RuleBlock or a
  1301. * plain AlternativeBLock. This generates any variable declarations
  1302. * and syntactic-predicate-testing variables.
  1303. * @blk The block for which the preamble is to be generated.
  1304. */
  1305. protected void genBlockPreamble(AlternativeBlock blk) {
  1306. // define labels for rule blocks.
  1307. if (blk instanceof RuleBlock) {
  1308. RuleBlock rblk = (RuleBlock)blk;
  1309. if (rblk.labeledElements != null) {
  1310. for (int i = 0; i < rblk.labeledElements.size(); i++) {
  1311. AlternativeElement a = (AlternativeElement)rblk.labeledElements.elementAt(i);
  1312. // System.out.println("looking at labeled element: "+a);
  1313. // Variables for labeled rule refs and
  1314. // subrules are different than variables for
  1315. // grammar atoms. This test is a little tricky
  1316. // because we want to get all rule refs and ebnf,
  1317. // but not rule blocks or syntactic predicates
  1318. if (
  1319. a instanceof RuleRefElement ||
  1320. a instanceof AlternativeBlock &&
  1321. !(a instanceof RuleBlock) &&
  1322. !(a instanceof SynPredBlock)
  1323. ) {
  1324. if (
  1325. !(a instanceof RuleRefElement) &&
  1326. ((AlternativeBlock)a).not &&
  1327. analyzer.subruleCanBeInverted(((AlternativeBlock)a), grammar instanceof LexerGrammar)
  1328. ) {
  1329. // Special case for inverted subrules that
  1330. // will be inlined. Treat these like
  1331. // token or char literal references
  1332. println(a.getLabel() + " = " + labeledElementInit);
  1333. if (grammar.buildAST) {
  1334. genASTDeclaration(a);
  1335. }
  1336. }
  1337. else {
  1338. if (grammar.buildAST) {
  1339. // Always gen AST variables for
  1340. // labeled elements, even if the
  1341. // element itself is marked with !
  1342. genASTDeclaration(a);
  1343. }
  1344. if (grammar instanceof LexerGrammar) {
  1345. println(a.getLabel() + " = None");
  1346. }
  1347. if (grammar instanceof TreeWalkerGrammar) {
  1348. // always generate rule-ref variables
  1349. // for tree walker
  1350. println(a.getLabel() + " = " + labeledElementInit);
  1351. }
  1352. }
  1353. }
  1354. else {
  1355. // It is a token or literal reference. Generate the
  1356. // correct variable type for this grammar
  1357. println(a.getLabel() + " = " + labeledElementInit);
  1358. // In addition, generate *_AST variables if
  1359. // building ASTs
  1360. if (grammar.buildAST) {
  1361. if (a instanceof GrammarAtom &&
  1362. ((GrammarAtom)a).getASTNodeType() != null) {
  1363. GrammarAtom ga = (GrammarAtom)a;
  1364. genASTDeclaration(a, ga.getASTNodeType());
  1365. }
  1366. else {
  1367. genASTDeclaration(a);
  1368. }
  1369. }
  1370. }
  1371. }
  1372. }
  1373. }
  1374. }
  1375. /** Generate a series of case statements that implement a BitSet test.
  1376. * @param p The Bitset for which cases are to be generated
  1377. */
  1378. protected void genCases(BitSet p) {
  1379. if (DEBUG_CODE_GENERATOR) System.out.println("genCases(" + p + ")");
  1380. int[] elems;
  1381. elems = p.toArray();
  1382. // Wrap cases four-per-line for lexer, one-per-line for parser
  1383. int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
  1384. int j = 1;
  1385. boolean startOfLine = true;
  1386. print("elif la1 and la1 in ");
  1387. if (grammar instanceof LexerGrammar)
  1388. {
  1389. _print("u'");
  1390. for (int i = 0; i < elems.length; i++) {
  1391. _print(getValueString(elems[i],false));
  1392. }
  1393. _print("':\n");
  1394. return;
  1395. }
  1396. // Parser or TreeParser ..
  1397. _print("[");
  1398. for (int i = 0; i < elems.length; i++) {
  1399. _print(getValueString(elems[i],false));
  1400. if(i+1<elems.length)
  1401. _print(",");
  1402. }
  1403. _print("]:\n");
  1404. }
  1405. /**Generate common code for a block of alternatives; return a
  1406. * postscript that needs to be generated at the end of the
  1407. * block. Other routines may append else-clauses and such for
  1408. * error checking before the postfix is generated. If the
  1409. * grammar is a lexer, then generate alternatives in an order
  1410. * where alternatives requiring deeper lookahead are generated
  1411. * first, and EOF in the lookahead set reduces the depth of
  1412. * the lookahead. @param blk The block to generate @param
  1413. * noTestForSingle If true, then it does not generate a test
  1414. * for a single alternative.
  1415. */
  1416. public PythonBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
  1417. boolean noTestForSingle) {
  1418. int _tabs_ = tabs; // remember where we are ..
  1419. int nIF = 0;
  1420. boolean createdLL1Switch = false;
  1421. int closingBracesOfIFSequence = 0;
  1422. PythonBlockFinishingInfo finishingInfo =
  1423. new PythonBlockFinishingInfo();
  1424. // Save the AST generation state, and set it to that of the block
  1425. boolean savegenAST = genAST;
  1426. genAST = genAST && blk.getAutoGen();
  1427. boolean oldsaveTest = saveText;
  1428. saveText = saveText && blk.getAutoGen();
  1429. // Is this block inverted? If so, generate special-case code
  1430. if (
  1431. blk.not &&
  1432. analyzer.subruleCanBeInverted(blk, grammar instanceof LexerGrammar)
  1433. )
  1434. {
  1435. if (DEBUG_CODE_GENERATOR) System.out.println("special case: ~(subrule)");
  1436. Lookahead p = analyzer.look(1, blk);
  1437. // Variable assignment for labeled elements
  1438. if (blk.getLabel() != null && syntacticPredLevel == 0)
  1439. {
  1440. println(blk.getLabel() + " = " + lt1Value);
  1441. }
  1442. // AST
  1443. genElementAST(blk);
  1444. String astArgs = "";
  1445. if (grammar instanceof TreeWalkerGrammar) {
  1446. astArgs = "_t, ";
  1447. }
  1448. // match the bitset for the alternative
  1449. println("self.match(" + astArgs + getBitsetName(markBitsetForGen(p.fset)) + ")");
  1450. // tack on tree cursor motion if doing a tree walker
  1451. if (grammar instanceof TreeWalkerGrammar) {
  1452. println("_t = _t.getNextSibling()");
  1453. }
  1454. return finishingInfo;
  1455. }
  1456. // Special handling for single alt
  1457. if (blk.getAlternatives().size() == 1)
  1458. {
  1459. Alternative alt = blk.getAlternativeAt(0);
  1460. // Generate a warning if there is a synPred for single alt.
  1461. if (alt.synPred != null) {
  1462. antlrTool.warning(
  1463. "Syntactic predicate superfluous for single alternative",
  1464. grammar.getFilename(),
  1465. blk.getAlternativeAt(0).synPred.getLine(),
  1466. blk.getAlternativeAt(0).synPred.getColumn()
  1467. );
  1468. }
  1469. if (noTestForSingle)
  1470. {
  1471. if (alt.semPred != null) {
  1472. // Generate validating predicate
  1473. genSemPred(alt.semPred, blk.line);
  1474. }
  1475. genAlt(alt, blk);
  1476. return finishingInfo;
  1477. }
  1478. }
  1479. // count number of simple LL(1) cases; only do switch for
  1480. // many LL(1) cases (no preds, no end of token refs)
  1481. // We don't care about exit paths for (...)*, (...)+
  1482. // because we don't explicitly have a test for them
  1483. // as an alt in the loop.
  1484. //
  1485. // Also, we now count how many unicode lookahead sets
  1486. // there are--they must be moved to DEFAULT or ELSE
  1487. // clause.
  1488. int nLL1 = 0;
  1489. for (int i = 0; i < blk.getAlternatives().size(); i++) {
  1490. Alternative a = blk.getAlternativeAt(i);
  1491. if (suitableForCaseExpression(a)) {
  1492. nLL1++;
  1493. }
  1494. }
  1495. // do LL(1) cases
  1496. if (nLL1 >= makeSwitchThreshold)
  1497. {
  1498. // Determine the name of the item to be compared
  1499. String testExpr = lookaheadString(1);
  1500. createdLL1Switch = true;
  1501. // when parsing trees, convert null to valid tree node with NULL lookahead
  1502. if (grammar instanceof TreeWalkerGrammar) {
  1503. println("if not _t:");
  1504. tabs++;
  1505. println("_t = antlr.ASTNULL");
  1506. tabs--;
  1507. }
  1508. println("la1 = " + testExpr);
  1509. // print dummy if to get a regular genCases ..
  1510. println("if False:");
  1511. tabs++;
  1512. println("pass");
  1513. //println("assert 0 # lunatic case");
  1514. tabs--;
  1515. for (int i = 0; i < blk.alternatives.size(); i++)
  1516. {
  1517. Alternative alt = blk.getAlternativeAt(i);
  1518. // ignore any non-LL(1) alts, predicated alts,
  1519. // or end-of-token alts for case expressions
  1520. if (!suitableForCaseExpression(alt)) {
  1521. continue;
  1522. }
  1523. Lookahead p = alt.cache[1];
  1524. if (p.fset.degree() == 0 && !p.containsEpsilon())
  1525. {
  1526. antlrTool.warning(
  1527. "Alternate omitted due to empty prediction set",
  1528. grammar.getFilename(),
  1529. alt.head.getLine(), alt.head.getColumn());
  1530. }
  1531. else
  1532. {
  1533. /* make the case statment, ie. if la1 in .. : */
  1534. genCases(p.fset);
  1535. tabs++;
  1536. genAlt(alt,blk);
  1537. tabs--;
  1538. }
  1539. }
  1540. /* does this else belong here? */
  1541. println("else:");
  1542. tabs++;
  1543. }
  1544. // do non-LL(1) and nondeterministic cases This is tricky in
  1545. // the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
  1546. // : "*="; Since nextToken is generated without a loop, then
  1547. // the STAR will have end-of-token as it's lookahead set for
  1548. // LA(2). So, we must generate the alternatives containing
  1549. // trailing end-of-token in their lookahead sets *after* the
  1550. // alternatives without end-of-token. This implements the
  1551. // usual lexer convention that longer matches come before
  1552. // shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
  1553. //
  1554. // For non-lexer grammars, this does not sort the alternates
  1555. // by depth Note that alts whose lookahead is purely
  1556. // end-of-token at k=1 end up as default or else clauses.
  1557. int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk : 0;
  1558. for (int altDepth = startDepth; altDepth >= 0; altDepth--)
  1559. {
  1560. for (int i = 0; i < blk.alternatives.size(); i++)
  1561. {
  1562. Alternative alt = blk.getAlternativeAt(i);
  1563. if (DEBUG_CODE_GENERATOR) System.out.println("genAlt: " + i);
  1564. // if we made a switch above, ignore what we already took care
  1565. // of. Specifically, LL(1) alts with no preds
  1566. // that do not have end-of-token in their prediction set
  1567. // and that are not giant unicode sets.
  1568. if (createdLL1Switch && suitableForCaseExpression(alt)) {
  1569. if (DEBUG_CODE_GENERATOR)
  1570. System.out.println("ignoring alt because it was in the switch");
  1571. continue;
  1572. }
  1573. String e;
  1574. boolean unpredicted = false;
  1575. if (grammar instanceof LexerGrammar)
  1576. {
  1577. // Calculate the "effective depth" of the alt,
  1578. // which is the max depth at which
  1579. // cache[depth]!=end-of-token
  1580. int effectiveDepth = alt.lookaheadDepth;
  1581. if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
  1582. // use maximum lookahead
  1583. effectiveDepth = grammar.maxk;
  1584. }
  1585. while (effectiveDepth >= 1 &&
  1586. alt.cache[effectiveDepth].containsEpsilon()) {
  1587. effectiveDepth--;
  1588. }
  1589. // Ignore alts whose effective depth is other than
  1590. // the ones we are generating for this iteration.
  1591. if (effectiveDepth != altDepth) {
  1592. if (DEBUG_CODE_GENERATOR)
  1593. System.out.println(
  1594. "ignoring alt because effectiveDepth!=altDepth"
  1595. + effectiveDepth + "!=" + altDepth);
  1596. continue;
  1597. }
  1598. unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
  1599. e = getLookaheadTestExpression(alt, effectiveDepth);
  1600. }
  1601. else
  1602. {
  1603. unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
  1604. e = getLookaheadTestExpression(alt, grammar.maxk);
  1605. }
  1606. // Was it a big unicode range that forced unsuitability
  1607. // for a case expression?
  1608. if (alt.cache[1].fset.degree() > caseSizeThreshold &&
  1609. suitableForCaseExpression(alt)) {
  1610. if (nIF == 0) {
  1611. println("<m1> if " + e + ":");
  1612. }
  1613. else {
  1614. println("<m2> elif " + e + ":");
  1615. }
  1616. }
  1617. else
  1618. {
  1619. if (unpredicted &&
  1620. alt.semPred == null &&
  1621. alt.synPred == null)
  1622. {
  1623. // The alt has empty prediction set and no
  1624. // predicate to help out. if we have not
  1625. // generated a previous if, just put {...} around
  1626. // the end-of-token clause
  1627. if (nIF == 0) {
  1628. println("##<m3> <closing");
  1629. }
  1630. else
  1631. {
  1632. println("else: ## <m4>");
  1633. tabs++;
  1634. // to prevent an empty boyd
  1635. // println("pass");
  1636. }
  1637. finishingInfo.needAnErrorClause = false;
  1638. }
  1639. else
  1640. {
  1641. // check for sem and syn preds
  1642. // Add any semantic predicate expression to the
  1643. // lookahead test
  1644. if (alt.semPred != null)
  1645. {
  1646. // if debugging, wrap the evaluation of the
  1647. // predicate in a method translate $ and #
  1648. // references
  1649. ActionTransInfo tInfo = new ActionTransInfo();
  1650. String actionStr =
  1651. processActionForSpecialSymbols(
  1652. alt.semPred,
  1653. blk.line,
  1654. currentRule,
  1655. tInfo);
  1656. // ignore translation info...we don't need to
  1657. // do anything with it. call that will inform
  1658. // SemanticPredicateListeners of the result
  1659. if (((grammar instanceof ParserGrammar) ||
  1660. (grammar instanceof LexerGrammar)) &&
  1661. grammar.debuggingOutput)
  1662. {
  1663. e = "(" + e +
  1664. " and fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING, " +
  1665. addSemPred(charFormatter.escapeString(actionStr)) + ", " + actionStr + "))";
  1666. }
  1667. else
  1668. {
  1669. e = "(" + e + " and (" + actionStr + "))";
  1670. }
  1671. }
  1672. // Generate any syntactic predicates
  1673. if (nIF > 0)
  1674. {
  1675. if (alt.synPred != null)
  1676. {
  1677. println("else:");
  1678. tabs++; /* who's closing this one? */
  1679. genSynPred(alt.synPred, e);
  1680. closingBracesOfIFSequence++;
  1681. }
  1682. else
  1683. {
  1684. println("elif " + e + ":");
  1685. }
  1686. }
  1687. else
  1688. {
  1689. if (alt.synPred != null)
  1690. {
  1691. genSynPred(alt.synPred, e);
  1692. }
  1693. else
  1694. {
  1695. // when parsing trees, convert null to
  1696. // valid tree node with NULL lookahead.
  1697. if (grammar instanceof TreeWalkerGrammar) {
  1698. println("if not _t:");
  1699. tabs++;
  1700. println("_t = antlr.ASTNULL");
  1701. tabs--;
  1702. }
  1703. println("if " + e + ":");
  1704. }
  1705. }
  1706. }
  1707. }
  1708. nIF++;
  1709. tabs++;
  1710. genAlt(alt, blk); // this should have generated something. If not
  1711. // we could end up in an empty else:
  1712. tabs--;
  1713. }
  1714. }
  1715. String ps = "";
  1716. //for (int i = 1; i <= closingBracesOfIFSequence; i++) {
  1717. // ps += "";
  1718. //}
  1719. // Restore the AST generation state
  1720. genAST = savegenAST;
  1721. // restore save text state
  1722. saveText = oldsaveTest;
  1723. // Return the finishing info.
  1724. if (createdLL1Switch) {
  1725. finishingInfo.postscript = ps;
  1726. finishingInfo.generatedSwitch = true;
  1727. finishingInfo.generatedAnIf = nIF > 0;
  1728. }
  1729. else {
  1730. finishingInfo.postscript = ps;
  1731. finishingInfo.generatedSwitch = false;
  1732. finishingInfo.generatedAnIf = nIF > 0;
  1733. }
  1734. return finishingInfo;
  1735. }
  1736. private static boolean suitableForCaseExpression(Alternative a) {
  1737. return
  1738. a.lookaheadDepth == 1 &&
  1739. a.semPred == null &&
  1740. !a.cache[1].containsEpsilon() &&
  1741. a.cache[1].fset.degree() <= caseSizeThreshold;
  1742. }
  1743. /** Generate code to link an element reference into the AST */
  1744. private void genElementAST(AlternativeElement el) {
  1745. // handle case where you're not building trees, but are in tree walker.
  1746. // Just need to get labels set up.
  1747. if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
  1748. String elementRef;
  1749. String astName;
  1750. // Generate names and declarations of the AST variable(s)
  1751. if (el.getLabel() == null) {
  1752. elementRef = lt1Value;
  1753. // Generate AST variables for unlabeled stuff
  1754. astName = "tmp" + astVarNumber + "_AST";
  1755. astVarNumber++;
  1756. // Map the generated AST variable in the alternate
  1757. mapTreeVariable(el, astName);
  1758. // Generate an "input" AST variable also
  1759. println(astName + "_in = " + elementRef);
  1760. }
  1761. return;
  1762. }
  1763. if (grammar.buildAST && syntacticPredLevel == 0) {
  1764. boolean needASTDecl =
  1765. (genAST &&
  1766. (el.getLabel() != null ||
  1767. el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
  1768. )
  1769. );
  1770. // RK: if we have a grammar element always generate the decl
  1771. // since some guy can access it from an action and we can't
  1772. // peek ahead (well not without making a mess).
  1773. // I'd prefer taking this out.
  1774. if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG &&
  1775. (el instanceof TokenRefElement))
  1776. {
  1777. needASTDecl = true;
  1778. }
  1779. boolean doNoGuessTest =
  1780. (grammar.hasSyntacticPredicate && needASTDecl);
  1781. String elementRef;
  1782. String astNameBase;
  1783. // Generate names and declarations of the AST variable(s)
  1784. if (el.getLabel() != null) {
  1785. elementRef = el.getLabel();
  1786. astNameBase = el.getLabel();
  1787. }
  1788. else {
  1789. elementRef = lt1Value;
  1790. // Generate AST variables for unlabeled stuff
  1791. astNameBase = "tmp" + astVarNumber;
  1792. ;
  1793. astVarNumber++;
  1794. }
  1795. // Generate the declaration if required.
  1796. if (needASTDecl) {
  1797. // Generate the declaration
  1798. if (el instanceof GrammarAtom) {
  1799. GrammarAtom ga = (GrammarAtom)el;
  1800. if (ga.getASTNodeType() != null) {
  1801. genASTDeclaration(el, astNameBase, ga.getASTNodeType());
  1802. }
  1803. else {
  1804. genASTDeclaration(el, astNameBase, labeledElementASTType);
  1805. }
  1806. }
  1807. else {
  1808. genASTDeclaration(el, astNameBase, labeledElementASTType);
  1809. }
  1810. }
  1811. // for convenience..
  1812. String astName = astNameBase + "_AST";
  1813. // Map the generated AST variable in the alternate
  1814. mapTreeVariable(el, astName);
  1815. if (grammar instanceof TreeWalkerGrammar) {
  1816. // Generate an "input" AST variable also
  1817. println(astName + "_in = None");
  1818. }
  1819. // Enclose actions with !guessing
  1820. if (doNoGuessTest) {
  1821. // println("if (inputState.guessing==0) {");
  1822. // tabs++;
  1823. }
  1824. // if something has a label assume it will be used
  1825. // so we must initialize the RefAST
  1826. if (el.getLabel() != null) {
  1827. if (el instanceof GrammarAtom) {
  1828. println(astName + " = " + getASTCreateString((GrammarAtom)el, elementRef) + "");
  1829. }
  1830. else {
  1831. println(astName + " = " + getASTCreateString(elementRef) + "");
  1832. }
  1833. }
  1834. // if it has no label but a declaration exists initialize it.
  1835. if (el.getLabel() == null && needASTDecl) {
  1836. elementRef = lt1Value;
  1837. if (el instanceof GrammarAtom) {
  1838. println(astName + " = " + getASTCreateString((GrammarAtom)el, elementRef) + "");
  1839. }
  1840. else {
  1841. println(astName + " = " + getASTCreateString(elementRef) + "");
  1842. }
  1843. // Map the generated AST variable in the alternate
  1844. if (grammar instanceof TreeWalkerGrammar) {
  1845. // set "input" AST variable also
  1846. println(astName + "_in = " + elementRef + "");
  1847. }
  1848. }
  1849. if (genAST) {
  1850. switch (el.getAutoGenType()) {
  1851. case GrammarElement.AUTO_GEN_NONE:
  1852. println("self.addASTChild(currentAST, " + astName + ")");
  1853. break;
  1854. case GrammarElement.AUTO_GEN_CARET:
  1855. println("self.makeASTRoot(currentAST, " + astName + ")");
  1856. break;
  1857. default:
  1858. break;
  1859. }
  1860. }
  1861. if (doNoGuessTest) {
  1862. // tabs--;
  1863. }
  1864. }
  1865. }
  1866. /** Close the try block and generate catch phrases
  1867. * if the element has a labeled handler in the rule
  1868. */
  1869. private void genErrorCatchForElement(AlternativeElement el) {
  1870. if (el.getLabel() == null) return;
  1871. String r = el.enclosingRuleName;
  1872. if (grammar instanceof LexerGrammar) {
  1873. r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
  1874. }
  1875. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r);
  1876. if (rs == null) {
  1877. antlrTool.panic("Enclosing rule not found!");
  1878. }
  1879. ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
  1880. if (ex != null) {
  1881. tabs--;
  1882. genErrorHandler(ex);
  1883. }
  1884. }
  1885. /** Generate the catch phrases for a user-specified error handler */
  1886. private void genErrorHandler(ExceptionSpec ex) {
  1887. // Each ExceptionHandler in the ExceptionSpec is a separate catch
  1888. for (int i = 0; i < ex.handlers.size(); i++) {
  1889. ExceptionHandler handler = (ExceptionHandler)ex.handlers.elementAt(i);
  1890. // Generate catch phrase
  1891. println("except " + handler.exceptionTypeAndName.getText() + ":");
  1892. tabs++;
  1893. if (grammar.hasSyntacticPredicate) {
  1894. println("if not inputState.guessing:");
  1895. tabs++;
  1896. }
  1897. // When not guessing, execute user handler action
  1898. ActionTransInfo tInfo = new ActionTransInfo();
  1899. printAction(
  1900. processActionForSpecialSymbols(handler.action.getText(),
  1901. handler.action.getLine(),
  1902. currentRule, tInfo)
  1903. );
  1904. if (grammar.hasSyntacticPredicate) {
  1905. tabs--;
  1906. println("else:");
  1907. tabs++;
  1908. // When guessing, rethrow exception
  1909. println(
  1910. "raise " +
  1911. extractIdOfAction(handler.exceptionTypeAndName));
  1912. tabs--;
  1913. }
  1914. // Close catch phrase
  1915. tabs--;
  1916. }
  1917. }
  1918. /** Generate a try { opening if the element has a labeled handler in the rule */
  1919. private void genErrorTryForElement(AlternativeElement el) {
  1920. if (el.getLabel() == null) return;
  1921. String r = el.enclosingRuleName;
  1922. if (grammar instanceof LexerGrammar) {
  1923. r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
  1924. }
  1925. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r);
  1926. if (rs == null) {
  1927. antlrTool.panic("Enclosing rule not found!");
  1928. }
  1929. ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
  1930. if (ex != null) {
  1931. println("try: # for error handling");
  1932. tabs++;
  1933. }
  1934. }
  1935. protected void genASTDeclaration(AlternativeElement el) {
  1936. genASTDeclaration(el, labeledElementASTType);
  1937. }
  1938. protected void genASTDeclaration(AlternativeElement el, String node_type) {
  1939. genASTDeclaration(el, el.getLabel(), node_type);
  1940. }
  1941. protected void genASTDeclaration(AlternativeElement el, String var_name, String node_type) {
  1942. // already declared?
  1943. if (declaredASTVariables.contains(el))
  1944. return;
  1945. // emit code
  1946. println(var_name + "_AST = None");
  1947. // mark as declared
  1948. declaredASTVariables.put(el,el);
  1949. }
  1950. /** Generate a header that is common to all Python files */
  1951. protected void genHeader() {
  1952. println("### $ANTLR " + Tool.version + ": " +
  1953. "\"" + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\"" +
  1954. " -> " +
  1955. "\"" + grammar.getClassName() + ".py\"$");
  1956. }
  1957. /** Generate an iterator method for the Python CharScanner (sub)classes. */
  1958. // protected void genIterator() {
  1959. // println("def __iter__(self):");
  1960. // tabs++;
  1961. // println("return antlr.CharScannerIterator(self)");
  1962. // tabs--;
  1963. // }
  1964. /** Generate an automated test for Python CharScanner (sub)classes. */
  1965. protected void genLexerTest() {
  1966. String className = grammar.getClassName();
  1967. println("if __name__ == '__main__' :");
  1968. tabs++;
  1969. println("import sys");
  1970. println("import antlr");
  1971. println("import " + className);
  1972. println("");
  1973. println("### create lexer - shall read from stdin");
  1974. println("try:");
  1975. tabs++;
  1976. println("for token in " + className + ".Lexer():");
  1977. tabs++;
  1978. println("print token");
  1979. println("");
  1980. tabs--;
  1981. tabs--;
  1982. println("except antlr.TokenStreamException, e:");
  1983. tabs++;
  1984. println("print \"error: exception caught while lexing: \", e");
  1985. tabs--;
  1986. tabs--;
  1987. }
  1988. private void genLiteralsTest()
  1989. {
  1990. println("### option { testLiterals=true } ");
  1991. println("_ttype = self.testLiteralsTable(_ttype)");
  1992. }
  1993. private void genLiteralsTestForPartialToken() {
  1994. println("_ttype = self.testLiteralsTable(self.text.getBuffer(), _begin, self.text.length()-_begin), _ttype)");
  1995. }
  1996. protected void genMatch(BitSet b) {
  1997. }
  1998. protected void genMatch(GrammarAtom atom) {
  1999. if (atom instanceof StringLiteralElement) {
  2000. if (grammar instanceof LexerGrammar) {
  2001. genMatchUsingAtomText(atom);
  2002. }
  2003. else {
  2004. genMatchUsingAtomTokenType(atom);
  2005. }
  2006. }
  2007. else if (atom instanceof CharLiteralElement) {
  2008. if (grammar instanceof LexerGrammar) {
  2009. genMatchUsingAtomText(atom);
  2010. }
  2011. else {
  2012. antlrTool.error("cannot ref character literals in grammar: " + atom);
  2013. }
  2014. }
  2015. else if (atom instanceof TokenRefElement) {
  2016. genMatchUsingAtomText(atom);
  2017. }
  2018. else if (atom instanceof WildcardElement) {
  2019. gen((WildcardElement)atom);
  2020. }
  2021. }
  2022. protected void genMatchUsingAtomText(GrammarAtom atom) {
  2023. // match() for trees needs the _t cursor
  2024. String astArgs = "";
  2025. if (grammar instanceof TreeWalkerGrammar) {
  2026. astArgs = "_t,";
  2027. }
  2028. // if in lexer and ! on element, save buffer index to kill later
  2029. if (grammar instanceof LexerGrammar &&
  2030. (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG))
  2031. {
  2032. println("_saveIndex = self.text.length()");
  2033. }
  2034. print(atom.not ? "self.matchNot(" : "self.match(");
  2035. _print(astArgs);
  2036. // print out what to match
  2037. if (atom.atomText.equals("EOF")) {
  2038. // horrible hack to handle EOF case
  2039. _print("EOF_TYPE");
  2040. }
  2041. else {
  2042. _print(atom.atomText);
  2043. }
  2044. _println(")");
  2045. if (grammar instanceof LexerGrammar && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  2046. println("self.text.setLength(_saveIndex)"); // kill text atom put in buffer
  2047. }
  2048. }
  2049. protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
  2050. // match() for trees needs the _t cursor
  2051. String astArgs = "";
  2052. if (grammar instanceof TreeWalkerGrammar) {
  2053. astArgs = "_t,";
  2054. }
  2055. // If the literal can be mangled, generate the symbolic constant instead
  2056. String mangledName = null;
  2057. String s = astArgs + getValueString(atom.getType(),true);
  2058. // matching
  2059. println((atom.not ? "self.matchNot(" : "self.match(") + s + ")");
  2060. }
  2061. /** Generate the nextToken() rule. nextToken() is a synthetic
  2062. * lexer rule that is the implicit OR of all user-defined
  2063. * lexer rules.
  2064. */
  2065. public
  2066. void genNextToken() {
  2067. // Are there any public rules? If not, then just generate a
  2068. // fake nextToken().
  2069. boolean hasPublicRules = false;
  2070. for (int i = 0; i < grammar.rules.size(); i++) {
  2071. RuleSymbol rs = (RuleSymbol)grammar.rules.elementAt(i);
  2072. if (rs.isDefined() && rs.access.equals("public")) {
  2073. hasPublicRules = true;
  2074. break;
  2075. }
  2076. }
  2077. if (!hasPublicRules) {
  2078. println("");
  2079. println("def nextToken(self): ");
  2080. tabs++;
  2081. println("try:");
  2082. tabs++;
  2083. println("self.uponEOF()");
  2084. tabs--;
  2085. println("except antlr.CharStreamIOException, csioe:");
  2086. tabs++;
  2087. println("raise antlr.TokenStreamIOException(csioe.io)");
  2088. tabs--;
  2089. println("except antlr.CharStreamException, cse:");
  2090. tabs++;
  2091. println("raise antlr.TokenStreamException(str(cse))");
  2092. tabs--;
  2093. println("return antlr.CommonToken(type=EOF_TYPE, text=\"\")");
  2094. tabs--;
  2095. return;
  2096. }
  2097. // Create the synthesized nextToken() rule
  2098. RuleBlock nextTokenBlk =
  2099. MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken");
  2100. // Define the nextToken rule symbol
  2101. RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
  2102. nextTokenRs.setDefined();
  2103. nextTokenRs.setBlock(nextTokenBlk);
  2104. nextTokenRs.access = "private";
  2105. grammar.define(nextTokenRs);
  2106. // Analyze the nextToken rule
  2107. boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
  2108. // Generate the next token rule
  2109. String filterRule = null;
  2110. if (((LexerGrammar)grammar).filterMode) {
  2111. filterRule = ((LexerGrammar)grammar).filterRule;
  2112. }
  2113. println("");
  2114. println("def nextToken(self):");
  2115. tabs++;
  2116. println("while True:");
  2117. tabs++;
  2118. println("try: ### try again ..");
  2119. tabs++;
  2120. println("while True:");
  2121. tabs++;
  2122. int _tabs_ = tabs; // while block
  2123. println("_token = None");
  2124. println("_ttype = INVALID_TYPE");
  2125. if (((LexerGrammar)grammar).filterMode)
  2126. {
  2127. println("self.setCommitToPath(False)");
  2128. if (filterRule != null)
  2129. {
  2130. // Here's a good place to ensure that the filter rule actually exists
  2131. if (!grammar.isDefined(CodeGenerator.encodeLexerRuleName(filterRule))) {
  2132. grammar.antlrTool.error(
  2133. "Filter rule " + filterRule + " does not exist in this lexer");
  2134. }
  2135. else
  2136. {
  2137. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(
  2138. CodeGenerator.encodeLexerRuleName(filterRule));
  2139. if (!rs.isDefined()) {
  2140. grammar.antlrTool.error(
  2141. "Filter rule " + filterRule + " does not exist in this lexer");
  2142. }
  2143. else if (rs.access.equals("public")) {
  2144. grammar.antlrTool.error(
  2145. "Filter rule " + filterRule + " must be protected");
  2146. }
  2147. }
  2148. println("_m = self.mark()");
  2149. }
  2150. }
  2151. println("self.resetText()");
  2152. println("try: ## for char stream error handling");
  2153. tabs++;
  2154. _tabs_ = tabs; // inner try
  2155. // Generate try around whole thing to trap scanner errors
  2156. println("try: ##for lexical error handling");
  2157. tabs++;
  2158. _tabs_ = tabs; // inner try
  2159. // Test for public lexical rules with empty paths
  2160. for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++)
  2161. {
  2162. Alternative a = nextTokenBlk.getAlternativeAt(i);
  2163. if (a.cache[1].containsEpsilon())
  2164. {
  2165. //String r = a.head.toString();
  2166. RuleRefElement rr = (RuleRefElement)a.head;
  2167. String r = CodeGenerator.decodeLexerRuleName(rr.targetRule);
  2168. antlrTool.warning("public lexical rule "+r+" is optional (can match \"nothing\")");
  2169. }
  2170. }
  2171. // Generate the block
  2172. String newline = System.getProperty("line.separator");
  2173. /* generate the common block */
  2174. PythonBlockFinishingInfo howToFinish =
  2175. genCommonBlock(nextTokenBlk, false);
  2176. /* how to finish the block */
  2177. String errFinish = "";
  2178. // Is this a filter? if so we need to change the default handling.
  2179. // In non filter mode we generate EOF token on EOF and stop, other-
  2180. // wise an error gets generated. In filter mode we just continue
  2181. // by consuming the unknown character till EOF is seen.
  2182. if (((LexerGrammar)grammar).filterMode)
  2183. {
  2184. /* filter */
  2185. if (filterRule == null)
  2186. {
  2187. /* no specical filter rule has been given. */
  2188. errFinish += "self.filterdefault(self.LA(1))";
  2189. }
  2190. else
  2191. {
  2192. errFinish +=
  2193. "self.filterdefault(self.LA(1), self.m" + filterRule + ", False)";
  2194. }
  2195. }
  2196. else
  2197. {
  2198. /* non filter */
  2199. /* if an IF has been generated (in the default clause), indendation
  2200. ** is not correct. In that case we need to close on level++.
  2201. **/
  2202. errFinish = "self.default(self.LA(1))" ;
  2203. }
  2204. /* finish the block */
  2205. genBlockFinish1(howToFinish, errFinish);
  2206. // alt block has finished .., reset tabs!
  2207. tabs = _tabs_;
  2208. // at this point a valid token has been matched, undo "mark" that was done
  2209. if (((LexerGrammar)grammar).filterMode && filterRule != null) {
  2210. println("self.commit()");
  2211. }
  2212. // Generate literals test if desired
  2213. // make sure _ttype is set first; note _returnToken must be
  2214. // non-null as the rule was required to create it.
  2215. println("if not self._returnToken:");
  2216. tabs++;
  2217. println("raise antlr.TryAgain ### found SKIP token");
  2218. tabs--;
  2219. // There's one literal test (in Lexer) after the large switch
  2220. // in 'nextToken'.
  2221. if (((LexerGrammar)grammar).getTestLiterals())
  2222. {
  2223. println("### option { testLiterals=true } ");
  2224. //genLiteralsTest();
  2225. println("self.testForLiteral(self._returnToken)");
  2226. }
  2227. // return token created by rule reference in switch
  2228. println("### return token to caller");
  2229. println("return self._returnToken");
  2230. // Close try block
  2231. tabs--;
  2232. println("### handle lexical errors ....");
  2233. println("except antlr.RecognitionException, e:");
  2234. tabs++;
  2235. if (((LexerGrammar)grammar).filterMode)
  2236. {
  2237. if (filterRule == null)
  2238. {
  2239. println("if not self.getCommitToPath():");
  2240. tabs++;
  2241. println("self.consume()");
  2242. println("raise antlr.TryAgain()");
  2243. tabs--;
  2244. }
  2245. else
  2246. {
  2247. println("if not self.getCommitToPath(): ");
  2248. tabs++;
  2249. println("self.rewind(_m)");
  2250. println("self.resetText()");
  2251. println("try:");
  2252. tabs++;
  2253. println("self.m" + filterRule + "(False)");
  2254. tabs--;
  2255. println("except antlr.RecognitionException, ee:");
  2256. tabs++;
  2257. println("### horrendous failure: error in filter rule");
  2258. println("self.reportError(ee)");
  2259. println("self.consume()");
  2260. tabs--;
  2261. println("raise antlr.TryAgain()");
  2262. tabs--;
  2263. }
  2264. }
  2265. if (nextTokenBlk.getDefaultErrorHandler()) {
  2266. println("self.reportError(e)");
  2267. println("self.consume()");
  2268. }
  2269. else {
  2270. // pass on to invoking routine
  2271. println("raise antlr.TokenStreamRecognitionException(e)");
  2272. }
  2273. tabs--;
  2274. //println("");
  2275. //println("### shall never be reached ");
  2276. //println("assert 0");
  2277. // close CharStreamException try
  2278. tabs--;
  2279. println("### handle char stream errors ...");
  2280. println("except antlr.CharStreamException,cse:");
  2281. tabs++;
  2282. println("if isinstance(cse, antlr.CharStreamIOException):");
  2283. tabs++;
  2284. println("raise antlr.TokenStreamIOException(cse.io)");
  2285. tabs--;
  2286. println("else:");
  2287. tabs++;
  2288. println("raise antlr.TokenStreamException(str(cse))");
  2289. tabs--;
  2290. tabs--;
  2291. //println("### shall never be reached ");
  2292. //println("assert 0");
  2293. // close for-loop
  2294. tabs--;
  2295. //println("### <end of inner while>");
  2296. //println("### shall never be reached ");
  2297. //println("assert 0");
  2298. tabs--;
  2299. //println("### <matching outer try>");
  2300. println("except antlr.TryAgain:");
  2301. tabs++;
  2302. println("pass");
  2303. tabs--;
  2304. // close method nextToken
  2305. tabs--;
  2306. //println("### <end of outer while>");
  2307. //println("### shall never be reached");
  2308. //println("assert 0");
  2309. //println("### <end of method nextToken>");
  2310. }
  2311. /** Gen a named rule block.
  2312. * ASTs are generated for each element of an alternative unless
  2313. * the rule or the alternative have a '!' modifier.
  2314. *
  2315. * If an alternative defeats the default tree construction, it
  2316. * must set <rule>_AST to the root of the returned AST.
  2317. *
  2318. * Each alternative that does automatic tree construction, builds
  2319. * up root and child list pointers in an ASTPair structure.
  2320. *
  2321. * A rule finishes by setting the returnAST variable from the
  2322. * ASTPair.
  2323. *
  2324. * @param rule The name of the rule to generate
  2325. * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
  2326. */
  2327. public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum) {
  2328. tabs=1;
  2329. if (!s.isDefined()) {
  2330. antlrTool.error("undefined rule: " + s.getId());
  2331. return;
  2332. }
  2333. // Generate rule return type, name, arguments
  2334. RuleBlock rblk = s.getBlock();
  2335. currentRule = rblk;
  2336. currentASTResult = s.getId();
  2337. // clear list of declared ast variables..
  2338. declaredASTVariables.clear();
  2339. // Save the AST generation state, and set it to that of the rule
  2340. boolean savegenAST = genAST;
  2341. genAST = genAST && rblk.getAutoGen();
  2342. // boolean oldsaveTest = saveText;
  2343. saveText = rblk.getAutoGen();
  2344. // print javadoc comment if any
  2345. genJavadocComment(s);
  2346. // Gen method name
  2347. print("def " + s.getId() + "(");
  2348. // Additional rule parameters common to all rules for this grammar
  2349. _print(commonExtraParams);
  2350. if (commonExtraParams.length() != 0 && rblk.argAction != null) {
  2351. _print(",");
  2352. }
  2353. // Gen arguments
  2354. if (rblk.argAction != null) {
  2355. // Has specified arguments
  2356. _println("");
  2357. tabs++;
  2358. println(rblk.argAction);
  2359. tabs--;
  2360. print("):");
  2361. }
  2362. else {
  2363. // No specified arguments
  2364. _print("):");
  2365. }
  2366. println("");
  2367. tabs++;
  2368. // Convert return action to variable declaration
  2369. if (rblk.returnAction != null) {
  2370. if (rblk.returnAction.indexOf('=') >= 0)
  2371. println(rblk.returnAction);
  2372. else {
  2373. // mx
  2374. println(extractIdOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + " = None"); }
  2375. }
  2376. // print out definitions needed by rules for various grammar types
  2377. println(commonLocalVars);
  2378. if (grammar.traceRules) {
  2379. if (grammar instanceof TreeWalkerGrammar) {
  2380. println("self.traceIn(\"" + s.getId() + "\",_t)");
  2381. }
  2382. else {
  2383. println("self.traceIn(\"" + s.getId() + "\")");
  2384. }
  2385. }
  2386. if (grammar instanceof LexerGrammar) {
  2387. // lexer rule default return value is the rule's token name
  2388. // This is a horrible hack to support the built-in EOF lexer rule.
  2389. if (s.getId().equals("mEOF"))
  2390. println("_ttype = EOF_TYPE");
  2391. else
  2392. println("_ttype = " + s.getId().substring(1));
  2393. println("_saveIndex = 0"); // used for element! (so we can kill text matched for element)
  2394. }
  2395. // if debugging, write code to mark entry to the rule
  2396. if (grammar.debuggingOutput)
  2397. if (grammar instanceof ParserGrammar)
  2398. println("self.fireEnterRule(" + ruleNum + ", 0)");
  2399. else if (grammar instanceof LexerGrammar)
  2400. println("self.fireEnterRule(" + ruleNum + ", _ttype)");
  2401. // Generate trace code if desired
  2402. if (grammar.debuggingOutput || grammar.traceRules) {
  2403. println("try: ### debugging");
  2404. tabs++;
  2405. }
  2406. // Initialize AST variables
  2407. if (grammar instanceof TreeWalkerGrammar) {
  2408. // "Input" value for rule
  2409. println(s.getId() + "_AST_in = None");
  2410. println("if _t != antlr.ASTNULL:");
  2411. tabs++;
  2412. println(s.getId() + "_AST_in = _t");
  2413. tabs--;
  2414. }
  2415. if (grammar.buildAST)
  2416. {
  2417. // Parser member used to pass AST returns from rule invocations
  2418. println("self.returnAST = None");
  2419. println("currentAST = antlr.ASTPair()");
  2420. // User-settable return value for rule.
  2421. println(s.getId() + "_AST = None");
  2422. }
  2423. genBlockPreamble(rblk);
  2424. genBlockInitAction(rblk);
  2425. // Search for an unlabeled exception specification attached to the rule
  2426. ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
  2427. // Generate try block around the entire rule for error handling
  2428. if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
  2429. println("try: ## for error handling");
  2430. tabs++;
  2431. }
  2432. int _tabs_ = tabs;
  2433. // Generate the alternatives
  2434. if (rblk.alternatives.size() == 1)
  2435. {
  2436. // One alternative -- use simple form
  2437. Alternative alt = rblk.getAlternativeAt(0);
  2438. String pred = alt.semPred;
  2439. if (pred != null)
  2440. genSemPred(pred, currentRule.line);
  2441. if (alt.synPred != null)
  2442. {
  2443. antlrTool.warning(
  2444. "Syntactic predicate ignored for single alternative",
  2445. grammar.getFilename(),
  2446. alt.synPred.getLine(),
  2447. alt.synPred.getColumn()
  2448. );
  2449. }
  2450. genAlt(alt, rblk);
  2451. }
  2452. else
  2453. {
  2454. // Multiple alternatives -- generate complex form
  2455. boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
  2456. PythonBlockFinishingInfo howToFinish = genCommonBlock(rblk, false);
  2457. genBlockFinish(howToFinish, throwNoViable);
  2458. }
  2459. tabs = _tabs_;
  2460. // Generate catch phrase for error handling
  2461. if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
  2462. // Close the try block
  2463. tabs--;
  2464. println("");
  2465. }
  2466. // Generate user-defined or default catch phrases
  2467. if (unlabeledUserSpec != null) {
  2468. genErrorHandler(unlabeledUserSpec);
  2469. }
  2470. else if (rblk.getDefaultErrorHandler()) {
  2471. // Generate default catch phrase
  2472. println("except " + exceptionThrown + ", ex:");
  2473. tabs++;
  2474. // Generate code to handle error if not guessing
  2475. if (grammar.hasSyntacticPredicate) {
  2476. println("if not self.inputState.guessing:");
  2477. tabs++;
  2478. }
  2479. println("self.reportError(ex)");
  2480. if (!(grammar instanceof TreeWalkerGrammar)) {
  2481. // Generate code to consume until token in k==1 follow set
  2482. Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, rblk.endNode);
  2483. String followSetName = getBitsetName(markBitsetForGen(follow.fset));
  2484. println("self.consume()");
  2485. println("self.consumeUntil(" + followSetName + ")");
  2486. }
  2487. else {
  2488. // Just consume one token
  2489. println("if _t:");
  2490. tabs++;
  2491. println("_t = _t.getNextSibling()");
  2492. tabs--;
  2493. }
  2494. if (grammar.hasSyntacticPredicate) {
  2495. tabs--;
  2496. // When guessing, rethrow exception
  2497. println("else:");
  2498. tabs++;
  2499. println("raise ex");
  2500. tabs--;
  2501. }
  2502. // Close catch phrase
  2503. tabs--;
  2504. println("");
  2505. }
  2506. // Squirrel away the AST "return" value
  2507. if (grammar.buildAST) {
  2508. println("self.returnAST = " + s.getId() + "_AST");
  2509. }
  2510. // Set return tree value for tree walkers
  2511. if (grammar instanceof TreeWalkerGrammar) {
  2512. println("self._retTree = _t");
  2513. }
  2514. // Generate literals test for lexer rules so marked
  2515. if (rblk.getTestLiterals()) {
  2516. if (s.access.equals("protected")) {
  2517. genLiteralsTestForPartialToken();
  2518. }
  2519. else {
  2520. genLiteralsTest();
  2521. }
  2522. }
  2523. // if doing a lexer rule, dump code to create token if necessary
  2524. if (grammar instanceof LexerGrammar)
  2525. {
  2526. println("self.set_return_token(_createToken, _token, _ttype, _begin)");
  2527. }
  2528. if(rblk.returnAction != null)
  2529. {
  2530. // if(grammar instanceof LexerGrammar)
  2531. // {
  2532. println("return " +
  2533. extractIdOfAction(rblk.returnAction,
  2534. rblk.getLine(),
  2535. rblk.getColumn()) + "");
  2536. // }
  2537. // else
  2538. // {
  2539. // println("return r");
  2540. // }
  2541. }
  2542. if (grammar.debuggingOutput || grammar.traceRules) {
  2543. tabs--;
  2544. println("finally: ### debugging");
  2545. tabs++;
  2546. // If debugging, generate calls to mark exit of rule
  2547. if (grammar.debuggingOutput)
  2548. if (grammar instanceof ParserGrammar)
  2549. println("self.fireExitRule(" + ruleNum + ", 0)");
  2550. else if (grammar instanceof LexerGrammar)
  2551. println("self.fireExitRule(" + ruleNum + ", _ttype)");
  2552. if (grammar.traceRules) {
  2553. if (grammar instanceof TreeWalkerGrammar) {
  2554. println("self.traceOut(\"" + s.getId() + "\", _t)");
  2555. }
  2556. else {
  2557. println("self.traceOut(\"" + s.getId() + "\")");
  2558. }
  2559. }
  2560. tabs--;
  2561. }
  2562. tabs--;
  2563. println("");
  2564. // Restore the AST generation state
  2565. genAST = savegenAST;
  2566. // restore char save state
  2567. // saveText = oldsaveTest;
  2568. }
  2569. private void GenRuleInvocation(RuleRefElement rr) {
  2570. // dump rule name
  2571. _print("self." + rr.targetRule + "(");
  2572. // lexers must tell rule if it should set _returnToken
  2573. if (grammar instanceof LexerGrammar) {
  2574. // if labeled, could access Token, so tell rule to create
  2575. if (rr.getLabel() != null) {
  2576. _print("True");
  2577. }
  2578. else {
  2579. _print("False");
  2580. }
  2581. if (commonExtraArgs.length() != 0 || rr.args != null) {
  2582. _print(", ");
  2583. }
  2584. }
  2585. // Extra arguments common to all rules for this grammar
  2586. _print(commonExtraArgs);
  2587. if (commonExtraArgs.length() != 0 && rr.args != null) {
  2588. _print(", ");
  2589. }
  2590. // Process arguments to method, if any
  2591. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
  2592. if (rr.args != null) {
  2593. // When not guessing, execute user arg action
  2594. ActionTransInfo tInfo = new ActionTransInfo();
  2595. String args = processActionForSpecialSymbols(rr.args, 0, currentRule, tInfo);
  2596. if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
  2597. antlrTool.error("Arguments of rule reference '" + rr.targetRule + "' cannot set or ref #" +
  2598. currentRule.getRuleName(), grammar.getFilename(), rr.getLine(), rr.getColumn());
  2599. }
  2600. _print(args);
  2601. // Warn if the rule accepts no arguments
  2602. if (rs.block.argAction == null) {
  2603. antlrTool.warning("Rule '" + rr.targetRule + "' accepts no arguments", grammar.getFilename(), rr.getLine(), rr.getColumn());
  2604. }
  2605. }
  2606. else {
  2607. // For C++, no warning if rule has parameters, because there may be default
  2608. // values for all of the parameters
  2609. if (rs.block.argAction != null) {
  2610. antlrTool.warning("Missing parameters on reference to rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
  2611. }
  2612. }
  2613. _println(")");
  2614. // move down to the first child while parsing
  2615. if (grammar instanceof TreeWalkerGrammar) {
  2616. println("_t = self._retTree");
  2617. }
  2618. }
  2619. protected void genSemPred(String pred, int line) {
  2620. // translate $ and # references
  2621. ActionTransInfo tInfo = new ActionTransInfo();
  2622. pred = processActionForSpecialSymbols(pred, line, currentRule, tInfo);
  2623. // ignore translation info...we don't need to do anything with it.
  2624. String escapedPred = charFormatter.escapeString(pred);
  2625. // if debugging, wrap the semantic predicate evaluation in a method
  2626. // that can tell SemanticPredicateListeners the result
  2627. if (grammar.debuggingOutput &&
  2628. ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
  2629. pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING,"
  2630. + addSemPred(escapedPred) + ", " + pred + ")";
  2631. /* always .. */
  2632. println("if not " + pred + ":");
  2633. tabs++;
  2634. println("raise SemanticException(\"" + escapedPred + "\")");
  2635. tabs--;
  2636. }
  2637. /** Write an array of Strings which are the semantic predicate
  2638. * expressions. The debugger will reference them by number only
  2639. */
  2640. protected void genSemPredMap() {
  2641. Enumeration e = semPreds.elements();
  2642. println("_semPredNames = [");
  2643. tabs++;
  2644. while (e.hasMoreElements()) {
  2645. println("\"" + e.nextElement() + "\",");
  2646. }
  2647. tabs--;
  2648. println("]");
  2649. }
  2650. protected void genSynPred(SynPredBlock blk, String lookaheadExpr) {
  2651. if (DEBUG_CODE_GENERATOR) System.out.println("gen=>(" + blk + ")");
  2652. // Dump synpred result variable
  2653. println("synPredMatched" + blk.ID + " = False");
  2654. // Gen normal lookahead test
  2655. println("if " + lookaheadExpr + ":");
  2656. tabs++;
  2657. // Save input state
  2658. if (grammar instanceof TreeWalkerGrammar) {
  2659. println("_t" + blk.ID + " = _t");
  2660. }
  2661. else {
  2662. println("_m" + blk.ID + " = self.mark()");
  2663. }
  2664. // Once inside the try, assume synpred works unless exception caught
  2665. println("synPredMatched" + blk.ID + " = True");
  2666. println("self.inputState.guessing += 1");
  2667. // if debugging, tell listeners that a synpred has started
  2668. if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) ||
  2669. (grammar instanceof LexerGrammar))) {
  2670. println("self.fireSyntacticPredicateStarted()");
  2671. }
  2672. syntacticPredLevel++;
  2673. println("try:");
  2674. tabs++;
  2675. gen((AlternativeBlock)blk); // gen code to test predicate
  2676. tabs--;
  2677. println("except " + exceptionThrown + ", pe:");
  2678. tabs++;
  2679. println("synPredMatched" + blk.ID + " = False");
  2680. tabs--;
  2681. // Restore input state
  2682. if (grammar instanceof TreeWalkerGrammar) {
  2683. println("_t = _t" + blk.ID + "");
  2684. }
  2685. else {
  2686. println("self.rewind(_m" + blk.ID + ")");
  2687. }
  2688. println("self.inputState.guessing -= 1");
  2689. // if debugging, tell listeners how the synpred turned out
  2690. if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) ||
  2691. (grammar instanceof LexerGrammar))) {
  2692. println("if synPredMatched" + blk.ID + ":");
  2693. tabs++;
  2694. println("self.fireSyntacticPredicateSucceeded()");
  2695. tabs--;
  2696. println("else:");
  2697. tabs++;
  2698. println("self.fireSyntacticPredicateFailed()");
  2699. tabs--;
  2700. }
  2701. syntacticPredLevel--;
  2702. tabs--;
  2703. // Close lookahead test
  2704. // Test synred result
  2705. println("if synPredMatched" + blk.ID + ":");
  2706. }
  2707. /** Generate a static array containing the names of the tokens,
  2708. * indexed by the token type values. This static array is used
  2709. * to format error messages so that the token identifers or literal
  2710. * strings are displayed instead of the token numbers.
  2711. *
  2712. * If a lexical rule has a paraphrase, use it rather than the
  2713. * token label.
  2714. */
  2715. public void genTokenStrings() {
  2716. // Generate a string for each token. This creates a static
  2717. // array of Strings indexed by token type.
  2718. int save_tabs = tabs;
  2719. tabs = 0;
  2720. println("");
  2721. println("_tokenNames = [");
  2722. tabs++;
  2723. // Walk the token vocabulary and generate a Vector of strings
  2724. // from the tokens.
  2725. Vector v = grammar.tokenManager.getVocabulary();
  2726. for (int i = 0; i < v.size(); i++) {
  2727. String s = (String)v.elementAt(i);
  2728. if (s == null) {
  2729. s = "<" + String.valueOf(i) + ">";
  2730. }
  2731. if (!s.startsWith("\"") && !s.startsWith("<")) {
  2732. TokenSymbol ts = (TokenSymbol)grammar.tokenManager.getTokenSymbol(s);
  2733. if (ts != null && ts.getParaphrase() != null) {
  2734. s = StringUtils.stripFrontBack(ts.getParaphrase(), "\"", "\"");
  2735. }
  2736. }
  2737. print(charFormatter.literalString(s));
  2738. if (i != v.size() - 1) {
  2739. _print(", ");
  2740. }
  2741. _println("");
  2742. }
  2743. // Close the string array initailizer
  2744. tabs--;
  2745. println("]");
  2746. tabs = save_tabs;
  2747. }
  2748. /** Create and set Integer token type objects that map
  2749. * to Java Class objects (which AST node to create).
  2750. */
  2751. protected void genTokenASTNodeMap() {
  2752. println("");
  2753. println("def buildTokenTypeASTClassMap(self):");
  2754. // Generate a map.put("T","TNode") for each token
  2755. // if heterogeneous node known for that token T.
  2756. tabs++;
  2757. boolean generatedNewHashtable = false;
  2758. int n = 0;
  2759. // Walk the token vocabulary and generate puts.
  2760. Vector v = grammar.tokenManager.getVocabulary();
  2761. for (int i = 0; i < v.size(); i++) {
  2762. String s = (String)v.elementAt(i);
  2763. if (s != null) {
  2764. TokenSymbol ts = grammar.tokenManager.getTokenSymbol(s);
  2765. if (ts != null && ts.getASTNodeType() != null) {
  2766. n++;
  2767. if ( !generatedNewHashtable ) {
  2768. // only generate if we are going to add a mapping
  2769. println("self.tokenTypeToASTClassMap = {}");
  2770. generatedNewHashtable = true;
  2771. }
  2772. println(
  2773. "self.tokenTypeToASTClassMap["
  2774. + ts.getTokenType()
  2775. + "] = "
  2776. + ts.getASTNodeType() );
  2777. }
  2778. }
  2779. }
  2780. if ( n==0 ) {
  2781. println("self.tokenTypeToASTClassMap = None");
  2782. }
  2783. tabs--;
  2784. }
  2785. /** Generate the token types Java file */
  2786. protected void genTokenTypes(TokenManager tm) throws IOException {
  2787. // Open the token output Python file and set the currentOutput
  2788. // stream
  2789. // SAS: file open was moved to a method so a subclass can override
  2790. // This was mainly for the VAJ interface
  2791. // setupOutput(tm.getName() + TokenTypesFileSuffix);
  2792. tabs = 0;
  2793. // Generate the header common to all Python files
  2794. // genHeader();
  2795. // Do not use printAction because we assume tabs==0
  2796. // println(behavior.getHeaderAction(""));
  2797. // Generate a definition for each token type
  2798. Vector v = tm.getVocabulary();
  2799. // Do special tokens manually
  2800. println("SKIP = antlr.SKIP");
  2801. println("INVALID_TYPE = antlr.INVALID_TYPE");
  2802. println("EOF_TYPE = antlr.EOF_TYPE");
  2803. println("EOF = antlr.EOF");
  2804. println("NULL_TREE_LOOKAHEAD = antlr.NULL_TREE_LOOKAHEAD");
  2805. println("MIN_USER_TYPE = antlr.MIN_USER_TYPE");
  2806. for (int i = Token.MIN_USER_TYPE; i < v.size(); i++)
  2807. {
  2808. String s = (String)v.elementAt(i);
  2809. if (s != null)
  2810. {
  2811. if (s.startsWith("\""))
  2812. {
  2813. // a string literal
  2814. StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s);
  2815. if (sl == null)
  2816. antlrTool.panic("String literal " + s + " not in symbol table");
  2817. if (sl.label != null)
  2818. {
  2819. println(sl.label + " = " + i);
  2820. }
  2821. else
  2822. {
  2823. String mangledName = mangleLiteral(s);
  2824. if (mangledName != null) {
  2825. // We were able to create a meaningful mangled token name
  2826. println(mangledName + " = " + i);
  2827. // if no label specified, make the label equal to the mangled name
  2828. sl.label = mangledName;
  2829. }
  2830. else
  2831. {
  2832. println("### " + s + " = " + i);
  2833. }
  2834. }
  2835. }
  2836. else if (!s.startsWith("<")) {
  2837. println(s + " = " + i);
  2838. }
  2839. }
  2840. }
  2841. // Close the interface
  2842. tabs--;
  2843. exitIfError();
  2844. }
  2845. /** Get a string for an expression to generate creation of an AST subtree.
  2846. * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
  2847. */
  2848. public String getASTCreateString(Vector v) {
  2849. if (v.size() == 0) {
  2850. return "";
  2851. }
  2852. StringBuffer buf = new StringBuffer();
  2853. buf.append("antlr.make(");
  2854. for (int i = 0; i < v.size(); i++) {
  2855. buf.append(v.elementAt(i));
  2856. if(i+1<v.size()) {
  2857. buf.append(", ");
  2858. }
  2859. }
  2860. buf.append(")");
  2861. return buf.toString();
  2862. }
  2863. /** Get a string for an expression to generate creating of an AST node
  2864. * @param atom The grammar node for which you are creating the node
  2865. * @param str The arguments to the AST constructor
  2866. */
  2867. public String getASTCreateString(GrammarAtom atom, String astCtorArgs)
  2868. {
  2869. if (atom != null && atom.getASTNodeType() != null)
  2870. {
  2871. // they specified a type either on the reference or in tokens{} section
  2872. return
  2873. "self.astFactory.create(" + astCtorArgs + ", " + atom.getASTNodeType() + ")";
  2874. }
  2875. else {
  2876. // must be an action or something since not referencing an atom
  2877. return getASTCreateString(astCtorArgs);
  2878. }
  2879. }
  2880. /** Get a string for an expression to generate creating of an AST node.
  2881. * Parse the first (possibly only) argument looking for the token type.
  2882. * If the token type is a valid token symbol, ask for it's AST node type
  2883. * and add to the end if only 2 arguments. The forms are #[T], #[T,"t"],
  2884. * and as of 2.7.2 #[T,"t",ASTclassname].
  2885. *
  2886. * @param str The arguments to the AST constructor
  2887. */
  2888. public String getASTCreateString(String astCtorArgs) {
  2889. if ( astCtorArgs==null ) {
  2890. astCtorArgs = "";
  2891. }
  2892. int nCommas = 0;
  2893. for (int i=0; i<astCtorArgs.length(); i++) {
  2894. if ( astCtorArgs.charAt(i)==',' ) {
  2895. nCommas++;
  2896. }
  2897. }
  2898. if ( nCommas<2 ) { // if 1 or 2 args
  2899. int firstComma = astCtorArgs.indexOf(',');
  2900. int lastComma = astCtorArgs.lastIndexOf(',');
  2901. String tokenName = astCtorArgs;
  2902. if ( nCommas>0 ) {
  2903. tokenName = astCtorArgs.substring(0,firstComma);
  2904. }
  2905. TokenSymbol ts = grammar.tokenManager.getTokenSymbol(tokenName);
  2906. if ( ts!=null ) {
  2907. String astNodeType = ts.getASTNodeType();
  2908. String emptyText = "";
  2909. if ( nCommas==0 ) {
  2910. // need to add 2nd arg of blank text for token text
  2911. emptyText = ", \"\"";
  2912. }
  2913. if ( astNodeType!=null ) {
  2914. return "self.astFactory.create(" + astCtorArgs + emptyText + ", " + astNodeType + ")";
  2915. }
  2916. // fall through and just do a regular create with cast on front
  2917. // if necessary (it differs from default "AST").
  2918. }
  2919. if ( labeledElementASTType.equals("AST") ) {
  2920. return "self.astFactory.create("+astCtorArgs+")";
  2921. }
  2922. return
  2923. "self.astFactory.create("+astCtorArgs+")";
  2924. }
  2925. // create default type or (since 2.7.2) 3rd arg is classname
  2926. return "self.astFactory.create(" + astCtorArgs + ")";
  2927. }
  2928. protected String getLookaheadTestExpression(Lookahead[] look, int k) {
  2929. StringBuffer e = new StringBuffer(100);
  2930. boolean first = true;
  2931. e.append("(");
  2932. for (int i = 1; i <= k; i++) {
  2933. BitSet p = look[i].fset;
  2934. if (!first) {
  2935. e.append(") and (");
  2936. }
  2937. first = false;
  2938. // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
  2939. // There is no way to predict what that token would be. Just
  2940. // allow anything instead.
  2941. if (look[i].containsEpsilon()) {
  2942. e.append("True");
  2943. }
  2944. else {
  2945. e.append(getLookaheadTestTerm(i, p));
  2946. }
  2947. }
  2948. e.append(")");
  2949. String s = e.toString();
  2950. return s;
  2951. }
  2952. /**Generate a lookahead test expression for an alternate. This
  2953. * will be a series of tests joined by '&&' and enclosed by '()',
  2954. * the number of such tests being determined by the depth of the lookahead.
  2955. */
  2956. protected String getLookaheadTestExpression(Alternative alt, int maxDepth) {
  2957. int depth = alt.lookaheadDepth;
  2958. if (depth == GrammarAnalyzer.NONDETERMINISTIC)
  2959. {
  2960. // if the decision is nondeterministic, do the best we can: LL(k)
  2961. // any predicates that are around will be generated later.
  2962. depth = grammar.maxk;
  2963. }
  2964. if (maxDepth == 0)
  2965. {
  2966. // empty lookahead can result from alt with sem pred
  2967. // that can see end of token. E.g., A : {pred}? ('a')? ;
  2968. return "True";
  2969. }
  2970. return getLookaheadTestExpression(alt.cache, depth);
  2971. }
  2972. /**Generate a depth==1 lookahead test expression given the BitSet.
  2973. * This may be one of:
  2974. * 1) a series of 'x==X||' tests
  2975. * 2) a range test using >= && <= where possible,
  2976. * 3) a bitset membership test for complex comparisons
  2977. * @param k The lookahead level
  2978. * @param p The lookahead set for level k
  2979. */
  2980. protected String getLookaheadTestTerm(int k, BitSet p) {
  2981. // Determine the name of the item to be compared
  2982. String ts = lookaheadString(k);
  2983. // Generate a range expression if possible
  2984. int[] elems = p.toArray();
  2985. if (elementsAreRange(elems)) {
  2986. String s = getRangeExpression(k, elems);
  2987. return s;
  2988. }
  2989. // Generate a bitset membership test if possible
  2990. StringBuffer e;
  2991. int degree = p.degree();
  2992. if (degree == 0) {
  2993. return "True";
  2994. }
  2995. if (degree >= bitsetTestThreshold) {
  2996. int bitsetIdx = markBitsetForGen(p);
  2997. return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
  2998. }
  2999. // Otherwise, generate the long-winded series of "x==X||" tests
  3000. e = new StringBuffer();
  3001. for (int i = 0; i < elems.length; i++) {
  3002. // Get the compared-to item (token or character value)
  3003. String cs = getValueString(elems[i],true);
  3004. // Generate the element comparison
  3005. if (i > 0) e.append(" or ");
  3006. e.append(ts);
  3007. e.append("==");
  3008. e.append(cs);
  3009. }
  3010. String x = e.toString();
  3011. return e.toString();
  3012. }
  3013. /** Return an expression for testing a contiguous renage of elements
  3014. * @param k The lookahead level
  3015. * @param elems The elements representing the set, usually from BitSet.toArray().
  3016. * @return String containing test expression.
  3017. */
  3018. public String getRangeExpression(int k, int[] elems) {
  3019. if (!elementsAreRange(elems)) {
  3020. antlrTool.panic("getRangeExpression called with non-range");
  3021. }
  3022. int begin = elems[0];
  3023. int end = elems[elems.length - 1];
  3024. return
  3025. "(" + lookaheadString(k) + " >= " + getValueString(begin,true) + " and " +
  3026. lookaheadString(k) + " <= " + getValueString(end,true) + ")";
  3027. }
  3028. /** getValueString: get a string representation of a token or char value
  3029. * @param value The token or char value
  3030. */
  3031. private String getValueString(int value,boolean wrap) {
  3032. String cs;
  3033. if (grammar instanceof LexerGrammar) {
  3034. cs = charFormatter.literalChar(value);
  3035. if(wrap)
  3036. cs = "u'" + cs + "'";
  3037. return cs;
  3038. }
  3039. // Parser or TreeParser => tokens ..
  3040. TokenSymbol ts =
  3041. grammar.tokenManager.getTokenSymbolAt(
  3042. value);
  3043. if (ts == null) {
  3044. cs = "" + value; // return token type as string
  3045. return cs;
  3046. }
  3047. String tId = ts.getId();
  3048. if (!(ts instanceof StringLiteralSymbol))
  3049. {
  3050. cs = tId;
  3051. return cs;
  3052. }
  3053. // if string literal, use predefined label if any
  3054. // if no predefined, try to mangle into LITERAL_xxx.
  3055. // if can't mangle, use int value as last resort
  3056. StringLiteralSymbol sl = (StringLiteralSymbol)ts;
  3057. String label = sl.getLabel();
  3058. if (label != null) {
  3059. cs = label;
  3060. }
  3061. else
  3062. {
  3063. cs = mangleLiteral(tId);
  3064. if (cs == null) {
  3065. cs = String.valueOf(value);
  3066. }
  3067. }
  3068. return cs;
  3069. }
  3070. /**Is the lookahead for this alt empty? */
  3071. protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
  3072. int depth = alt.lookaheadDepth;
  3073. if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
  3074. depth = grammar.maxk;
  3075. }
  3076. for (int i = 1; i <= depth && i <= maxDepth; i++) {
  3077. BitSet p = alt.cache[i].fset;
  3078. if (p.degree() != 0) {
  3079. return false;
  3080. }
  3081. }
  3082. return true;
  3083. }
  3084. private String lookaheadString(int k) {
  3085. if (grammar instanceof TreeWalkerGrammar) {
  3086. return "_t.getType()";
  3087. }
  3088. return "self.LA(" + k + ")";
  3089. }
  3090. /** Mangle a string literal into a meaningful token name. This is
  3091. * only possible for literals that are all characters. The resulting
  3092. * mangled literal name is literalsPrefix with the text of the literal
  3093. * appended.
  3094. * @return A string representing the mangled literal, or null if not possible.
  3095. */
  3096. private String mangleLiteral(String s) {
  3097. String mangled = antlrTool.literalsPrefix;
  3098. for (int i = 1; i < s.length() - 1; i++) {
  3099. if (!Character.isLetter(s.charAt(i)) &&
  3100. s.charAt(i) != '_') {
  3101. return null;
  3102. }
  3103. mangled += s.charAt(i);
  3104. }
  3105. if (antlrTool.upperCaseMangledLiterals) {
  3106. mangled = mangled.toUpperCase();
  3107. }
  3108. return mangled;
  3109. }
  3110. /** Map an identifier to it's corresponding tree-node variable.
  3111. * This is context-sensitive, depending on the rule and alternative
  3112. * being generated
  3113. * @param idParam The identifier name to map
  3114. * @return The mapped id (which may be the same as the input), or null if
  3115. * the mapping is invalid due to duplicates
  3116. */
  3117. public String mapTreeId(String idParam, ActionTransInfo transInfo) {
  3118. // if not in an action of a rule, nothing to map.
  3119. if (currentRule == null) return idParam;
  3120. boolean in_var = false;
  3121. String id = idParam;
  3122. if (grammar instanceof TreeWalkerGrammar) {
  3123. if (!grammar.buildAST) {
  3124. in_var = true;
  3125. }
  3126. // If the id ends with "_in", then map it to the input variable
  3127. else if (id.length() > 3 && id.lastIndexOf("_in") == id.length() - 3) {
  3128. // Strip off the "_in"
  3129. id = id.substring(0, id.length() - 3);
  3130. in_var = true;
  3131. }
  3132. }
  3133. // Check the rule labels. If id is a label, then the output
  3134. // variable is label_AST, and the input variable is plain label.
  3135. for (int i = 0; i < currentRule.labeledElements.size(); i++) {
  3136. AlternativeElement elt = (AlternativeElement)currentRule.labeledElements.elementAt(i);
  3137. if (elt.getLabel().equals(id)) {
  3138. return in_var ? id : id + "_AST";
  3139. }
  3140. }
  3141. // Failing that, check the id-to-variable map for the alternative.
  3142. // If the id is in the map, then output variable is the name in the
  3143. // map, and input variable is name_in
  3144. String s = (String)treeVariableMap.get(id);
  3145. if (s != null) {
  3146. if (s == NONUNIQUE) {
  3147. // There is more than one element with this id
  3148. antlrTool.error("Ambiguous reference to AST element "+id+
  3149. " in rule "+currentRule.getRuleName());
  3150. return null;
  3151. }
  3152. else if (s.equals(currentRule.getRuleName())) {
  3153. // a recursive call to the enclosing rule is
  3154. // ambiguous with the rule itself.
  3155. antlrTool.error("Ambiguous reference to AST element "+id+
  3156. " in rule "+currentRule.getRuleName());
  3157. return null;
  3158. }
  3159. else {
  3160. return in_var ? s + "_in" : s;
  3161. }
  3162. }
  3163. // Failing that, check the rule name itself. Output variable
  3164. // is rule_AST; input variable is rule_AST_in (treeparsers).
  3165. if (id.equals(currentRule.getRuleName())) {
  3166. String r = in_var ? id + "_AST_in" : id + "_AST";
  3167. if (transInfo != null) {
  3168. if (!in_var) {
  3169. transInfo.refRuleRoot = r;
  3170. }
  3171. }
  3172. return r;
  3173. }
  3174. else {
  3175. // id does not map to anything -- return itself.
  3176. return id;
  3177. }
  3178. }
  3179. /** Given an element and the name of an associated AST variable,
  3180. * create a mapping between the element "name" and the variable name.
  3181. */
  3182. private void mapTreeVariable(AlternativeElement e, String name) {
  3183. // For tree elements, defer to the root
  3184. if (e instanceof TreeElement) {
  3185. mapTreeVariable(((TreeElement)e).root, name);
  3186. return;
  3187. }
  3188. // Determine the name of the element, if any, for mapping purposes
  3189. String elName = null;
  3190. // Don't map labeled items
  3191. if (e.getLabel() == null) {
  3192. if (e instanceof TokenRefElement) {
  3193. // use the token id
  3194. elName = ((TokenRefElement)e).atomText;
  3195. }
  3196. else if (e instanceof RuleRefElement) {
  3197. // use the rule name
  3198. elName = ((RuleRefElement)e).targetRule;
  3199. }
  3200. }
  3201. // Add the element to the tree variable map if it has a name
  3202. if (elName != null) {
  3203. if (treeVariableMap.get(elName) != null) {
  3204. // Name is already in the map -- mark it as duplicate
  3205. treeVariableMap.remove(elName);
  3206. treeVariableMap.put(elName, NONUNIQUE);
  3207. }
  3208. else {
  3209. treeVariableMap.put(elName, name);
  3210. }
  3211. }
  3212. }
  3213. /** Lexically process $var and tree-specifiers in the action.
  3214. * This will replace #id and #(...) with the appropriate
  3215. * function calls and/or variables etc...
  3216. */
  3217. protected String processActionForSpecialSymbols(String actionStr, int line,
  3218. RuleBlock currentRule,
  3219. ActionTransInfo tInfo) {
  3220. if (actionStr == null || actionStr.length() == 0)
  3221. return null;
  3222. if(isEmpty(actionStr))
  3223. return "";
  3224. // The action trans info tells us (at the moment) whether an
  3225. // assignment was done to the rule's tree root.
  3226. if (grammar == null) {
  3227. // to be processd by PyCodeFmt??
  3228. return actionStr;
  3229. }
  3230. // Create a lexer to read an action and return the translated version
  3231. antlr.actions.python.ActionLexer lexer =
  3232. new antlr.actions.python.ActionLexer(
  3233. actionStr,
  3234. currentRule,
  3235. this,
  3236. tInfo);
  3237. lexer.setLineOffset(line);
  3238. lexer.setFilename(grammar.getFilename());
  3239. lexer.setTool(antlrTool);
  3240. try {
  3241. lexer.mACTION(true);
  3242. actionStr = lexer.getTokenObject().getText();
  3243. }
  3244. catch (RecognitionException ex) {
  3245. lexer.reportError(ex);
  3246. }
  3247. catch (TokenStreamException tex) {
  3248. antlrTool.panic("Error reading action:" + actionStr);
  3249. }
  3250. catch (CharStreamException io) {
  3251. antlrTool.panic("Error reading action:" + actionStr);
  3252. }
  3253. return actionStr;
  3254. }
  3255. static boolean isEmpty(String s) {
  3256. char c;
  3257. boolean ws = true;
  3258. /* figure out whether there's something to be done */
  3259. for(int i=0;ws && i<s.length();++i) {
  3260. c = s.charAt(i);
  3261. switch(c) {
  3262. case '\n':
  3263. case '\r':
  3264. case ' ' :
  3265. case '\t':
  3266. case '\f':
  3267. {
  3268. break;
  3269. }
  3270. default: {
  3271. ws = false;
  3272. }
  3273. }
  3274. }
  3275. return ws;
  3276. }
  3277. protected String processActionCode(String actionStr,int line) {
  3278. /* shall process action code unconditionally */
  3279. if(actionStr == null || isEmpty(actionStr))
  3280. return "";
  3281. antlr.actions.python.CodeLexer lexer =
  3282. new antlr.actions.python.CodeLexer(
  3283. actionStr,
  3284. grammar.getFilename(),
  3285. line,
  3286. antlrTool
  3287. );
  3288. try {
  3289. lexer.mACTION(true);
  3290. actionStr = lexer.getTokenObject().getText();
  3291. }
  3292. catch (RecognitionException ex) {
  3293. lexer.reportError(ex);
  3294. }
  3295. catch (TokenStreamException tex) {
  3296. antlrTool.panic("Error reading action:" + actionStr);
  3297. }
  3298. catch (CharStreamException io) {
  3299. antlrTool.panic("Error reading action:" + actionStr);
  3300. }
  3301. return actionStr;
  3302. }
  3303. protected void printActionCode(String actionStr,int line) {
  3304. actionStr = processActionCode(actionStr,line);
  3305. printAction(actionStr);
  3306. }
  3307. private void setupGrammarParameters(Grammar g) {
  3308. if (g instanceof ParserGrammar)
  3309. {
  3310. labeledElementASTType = "";
  3311. if (g.hasOption("ASTLabelType"))
  3312. {
  3313. Token tsuffix = g.getOption("ASTLabelType");
  3314. if (tsuffix != null) {
  3315. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  3316. if (suffix != null) {
  3317. labeledElementASTType = suffix;
  3318. }
  3319. }
  3320. }
  3321. labeledElementType = "";
  3322. labeledElementInit = "None";
  3323. commonExtraArgs = "";
  3324. commonExtraParams = "self";
  3325. commonLocalVars = "";
  3326. lt1Value = "self.LT(1)";
  3327. exceptionThrown = "antlr.RecognitionException";
  3328. throwNoViable = "raise antlr.NoViableAltException(self.LT(1), self.getFilename())";
  3329. parserClassName = "Parser";
  3330. if (g.hasOption("className"))
  3331. {
  3332. Token tcname = g.getOption("className");
  3333. if (tcname != null) {
  3334. String cname = StringUtils.stripFrontBack(tcname.getText(), "\"", "\"");
  3335. if (cname != null) {
  3336. parserClassName = cname;
  3337. }
  3338. }
  3339. }
  3340. return;
  3341. }
  3342. if (g instanceof LexerGrammar)
  3343. {
  3344. labeledElementType = "char ";
  3345. labeledElementInit = "'\\0'";
  3346. commonExtraArgs = "";
  3347. commonExtraParams = "self, _createToken";
  3348. commonLocalVars = "_ttype = 0\n _token = None\n _begin = self.text.length()";
  3349. lt1Value = "self.LA(1)";
  3350. exceptionThrown = "antlr.RecognitionException";
  3351. throwNoViable = "self.raise_NoViableAlt(self.LA(1))";
  3352. lexerClassName = "Lexer";
  3353. if (g.hasOption("className"))
  3354. {
  3355. Token tcname = g.getOption("className");
  3356. if (tcname != null) {
  3357. String cname = StringUtils.stripFrontBack(tcname.getText(), "\"", "\"");
  3358. if (cname != null) {
  3359. lexerClassName = cname;
  3360. }
  3361. }
  3362. }
  3363. return;
  3364. }
  3365. if (g instanceof TreeWalkerGrammar)
  3366. {
  3367. labeledElementASTType = "";
  3368. labeledElementType = "";
  3369. if (g.hasOption("ASTLabelType")) {
  3370. Token tsuffix = g.getOption("ASTLabelType");
  3371. if (tsuffix != null) {
  3372. String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
  3373. if (suffix != null) {
  3374. labeledElementASTType = suffix;
  3375. labeledElementType = suffix;
  3376. }
  3377. }
  3378. }
  3379. if (!g.hasOption("ASTLabelType")) {
  3380. g.setOption("ASTLabelType", new Token(ANTLRTokenTypes.STRING_LITERAL, "<4>AST"));
  3381. }
  3382. labeledElementInit = "None";
  3383. commonExtraArgs = "_t";
  3384. commonExtraParams = "self, _t";
  3385. commonLocalVars = "";
  3386. lt1Value = "_t";
  3387. exceptionThrown = "antlr.RecognitionException";
  3388. throwNoViable = "raise antlr.NoViableAltException(_t)";
  3389. treeWalkerClassName = "Walker";
  3390. if (g.hasOption("className"))
  3391. {
  3392. Token tcname = g.getOption("className");
  3393. if (tcname != null) {
  3394. String cname = StringUtils.stripFrontBack(tcname.getText(), "\"", "\"");
  3395. if (cname != null) {
  3396. treeWalkerClassName = cname;
  3397. }
  3398. }
  3399. }
  3400. return;
  3401. }
  3402. /* serious error */
  3403. antlrTool.panic("Unknown grammar type");
  3404. }
  3405. /** This method exists so a subclass, namely VAJCodeGenerator,
  3406. * can open the file in its own evil way. JavaCodeGenerator
  3407. * simply opens a text file...
  3408. */
  3409. public void setupOutput(String className) throws IOException {
  3410. currentOutput = antlrTool.openOutputFile(className + ".py");
  3411. }
  3412. protected boolean isspace(char c) {
  3413. boolean r = true;
  3414. switch (c) {
  3415. case '\n' :
  3416. case '\r' :
  3417. case ' ' :
  3418. case '\t' :
  3419. break;
  3420. default:
  3421. r = false;
  3422. break;
  3423. }
  3424. return r;
  3425. }
  3426. protected void _printAction(String s) {
  3427. if (s == null) {
  3428. return;
  3429. }
  3430. char c;
  3431. int offset; // shall be first no ws character in 's'. We are
  3432. // going to remove at most this number of ws chars after
  3433. // each newline. This will keep the block formatted as it is.
  3434. // When going to figure out the offset, we need to rese the
  3435. // counter after each newline has been seen.
  3436. // Skip leading newlines, tabs and spaces
  3437. int start = 0;
  3438. int end = s.length();
  3439. boolean ws;
  3440. offset = 0;
  3441. ws = true;
  3442. while (start < end && ws)
  3443. {
  3444. c = s.charAt(start++);
  3445. switch (c) {
  3446. case '\n' :
  3447. offset = start;
  3448. break;
  3449. case '\r':
  3450. if( (start)<=end && s.charAt(start) == '\n')
  3451. start++;
  3452. offset = start;
  3453. break;
  3454. case ' ' :
  3455. break;
  3456. case '\t':
  3457. default:
  3458. ws = false;
  3459. break;
  3460. }
  3461. }
  3462. if(ws == false) {
  3463. start--;
  3464. }
  3465. offset = start - offset;
  3466. // Skip leading newlines, tabs and spaces
  3467. end = end - 1;
  3468. while ((end > start) && isspace(s.charAt(end))) {
  3469. end--;
  3470. }
  3471. boolean newline = false;
  3472. int absorbed;
  3473. for (int i = start; i <= end; ++i)
  3474. {
  3475. c = s.charAt(i);
  3476. switch (c) {
  3477. case '\n':
  3478. newline = true;
  3479. break;
  3480. case '\r':
  3481. newline = true;
  3482. if ((i+1) <= end && s.charAt(i+1) == '\n') {
  3483. i++;
  3484. }
  3485. break;
  3486. case '\t':
  3487. System.err.println("warning: tab characters used in Python action");
  3488. currentOutput.print(" ");
  3489. break;
  3490. case ' ':
  3491. currentOutput.print(" ");
  3492. break;
  3493. default:
  3494. currentOutput.print(c);
  3495. break;
  3496. }
  3497. if (newline)
  3498. {
  3499. currentOutput.print("\n");
  3500. printTabs();
  3501. absorbed = 0;
  3502. newline = false;
  3503. // Absorb leading whitespace
  3504. for(i=i+1;i<=end;++i)
  3505. {
  3506. c = s.charAt(i);
  3507. if (!isspace(c)) {
  3508. i--;
  3509. break;
  3510. }
  3511. switch(c) {
  3512. case '\n' :
  3513. newline = true;
  3514. break;
  3515. case '\r':
  3516. if ((i+1) <= end && s.charAt(i+1) == '\n') {
  3517. i++;
  3518. }
  3519. newline = true;
  3520. break;
  3521. }
  3522. if(newline)
  3523. {
  3524. currentOutput.print("\n");
  3525. printTabs();
  3526. absorbed = 0;
  3527. newline = false;
  3528. continue;
  3529. }
  3530. if(absorbed<offset) {
  3531. absorbed++;
  3532. continue;
  3533. }
  3534. /* stop this loop */
  3535. break;
  3536. }
  3537. }
  3538. }
  3539. currentOutput.println();
  3540. }
  3541. protected void od(String s,int i,int end,String msg) {
  3542. System.out.println(msg);
  3543. char c;
  3544. for(int j=i;j<=end;++j)
  3545. {
  3546. c = s.charAt(j);
  3547. switch(c) {
  3548. case '\n':
  3549. System.out.print(" nl ");
  3550. break;
  3551. case '\t':
  3552. System.out.print(" ht ");
  3553. break;
  3554. case ' ':
  3555. System.out.print(" sp ");
  3556. break;
  3557. default:
  3558. System.out.print(" " + c + " ");
  3559. }
  3560. }
  3561. System.out.println("");
  3562. }
  3563. protected void printAction(String s) {
  3564. if (s != null) {
  3565. printTabs();
  3566. _printAction(s);
  3567. }
  3568. }
  3569. protected void printGrammarAction(Grammar grammar) {
  3570. println("### user action >>>");
  3571. printAction(
  3572. processActionForSpecialSymbols(
  3573. grammar.classMemberAction.getText(),
  3574. grammar.classMemberAction.getLine(),
  3575. currentRule,
  3576. null)
  3577. );
  3578. println("### user action <<<");
  3579. }
  3580. protected void _printJavadoc(String s) {
  3581. char c;
  3582. int end = s.length();
  3583. int start = 0;
  3584. boolean newline = false;
  3585. currentOutput.print("\n");
  3586. printTabs();
  3587. currentOutput.print("###");
  3588. for (int i = start; i < end; ++i)
  3589. {
  3590. c = s.charAt(i);
  3591. switch (c) {
  3592. case '\n':
  3593. newline = true;
  3594. break;
  3595. case '\r':
  3596. newline = true;
  3597. if ((i+1) <= end && s.charAt(i+1) == '\n') {
  3598. i++;
  3599. }
  3600. break;
  3601. case '\t':
  3602. currentOutput.print("\t");
  3603. break;
  3604. case ' ':
  3605. currentOutput.print(" ");
  3606. break;
  3607. default:
  3608. currentOutput.print(c);
  3609. break;
  3610. }
  3611. if (newline)
  3612. {
  3613. currentOutput.print("\n");
  3614. printTabs();
  3615. currentOutput.print("###");
  3616. newline = false;
  3617. }
  3618. }
  3619. currentOutput.println();
  3620. }
  3621. protected void genJavadocComment(Grammar g) {
  3622. // print javadoc comment if any
  3623. if (g.comment != null) {
  3624. _printJavadoc(g.comment);
  3625. }
  3626. }
  3627. protected void genJavadocComment(RuleSymbol g) {
  3628. // print javadoc comment if any
  3629. if (g.comment != null) {
  3630. _printJavadoc(g.comment);
  3631. }
  3632. }
  3633. }