PageRenderTime 55ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/Dependencies/boo/lib/antlr-2.7.5/antlr/PythonCodeGenerator.java

https://github.com/w4x/boolangstudio
Java | 4107 lines | 3929 code | 78 blank | 100 comment | 55 complexity | 82d84ee163e77c6b587e424dcb8da8da MD5 | raw file
Possible License(s): GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. // This file is part of PyANTLR. See LICENSE.txt for license
  2. // details..........Copyright (C) Wolfgang Haefelinger, 2004.
  3. //
  4. // $Id:$
  5. package antlr;
  6. import java.util.Enumeration;
  7. import java.util.Hashtable;
  8. import antlr.collections.impl.BitSet;
  9. import antlr.collections.impl.Vector;
  10. import java.io.PrintWriter; //SAS: changed for proper text file io
  11. import java.io.IOException;
  12. import java.io.FileWriter;
  13. /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
  14. public class PythonCodeGenerator extends CodeGenerator {
  15. // non-zero if inside syntactic predicate generation
  16. protected int syntacticPredLevel = 0;
  17. // Are we generating ASTs (for parsers and tree parsers) right now?
  18. protected boolean genAST = false;
  19. // Are we saving the text consumed (for lexers) right now?
  20. protected boolean saveText = false;
  21. // Grammar parameters set up to handle different grammar classes.
  22. // These are used to get instanceof tests out of code generation
  23. String labeledElementType;
  24. String labeledElementASTType;
  25. String labeledElementInit;
  26. String commonExtraArgs;
  27. String commonExtraParams;
  28. String commonLocalVars;
  29. String lt1Value;
  30. String exceptionThrown;
  31. String throwNoViable;
  32. public static final String initHeaderAction = "__init__";
  33. public static final String mainHeaderAction = "__main__";
  34. String lexerClassName;
  35. String parserClassName;
  36. String treeWalkerClassName;
  37. /** Tracks the rule being generated. Used for mapTreeId */
  38. RuleBlock currentRule;
  39. /** Tracks the rule or labeled subrule being generated. Used for
  40. AST generation. */
  41. String currentASTResult;
  42. /** Mapping between the ids used in the current alt, and the
  43. * names of variables used to represent their AST values.
  44. */
  45. Hashtable treeVariableMap = new Hashtable();
  46. /** Used to keep track of which AST variables have been defined in a rule
  47. * (except for the #rule_name and #rule_name_in var's
  48. */
  49. Hashtable declaredASTVariables = new Hashtable();
  50. /* Count of unnamed generated variables */
  51. int astVarNumber = 1;
  52. /** Special value used to mark duplicate in treeVariableMap */
  53. protected static final String NONUNIQUE = new String();
  54. public static final int caseSizeThreshold = 127; // ascii is max
  55. private Vector semPreds;
  56. /** Create a Java code-generator using the given Grammar.
  57. * The caller must still call setTool, setBehavior, and setAnalyzer
  58. * before generating code.
  59. */
  60. protected void printTabs() {
  61. for (int i = 0; i < tabs; i++) {
  62. // don't print tabs ever - replace a tab by ' '
  63. currentOutput.print(" ");
  64. }
  65. }
  66. public PythonCodeGenerator() {
  67. super();
  68. charFormatter = new antlr.PythonCharFormatter();
  69. DEBUG_CODE_GENERATOR = true;
  70. }
  71. /** Adds a semantic predicate string to the sem pred vector
  72. These strings will be used to build an array of sem pred names
  73. when building a debugging parser. This method should only be
  74. called when the debug option is specified
  75. */
  76. protected int addSemPred(String predicate) {
  77. semPreds.appendElement(predicate);
  78. return semPreds.size() - 1;
  79. }
  80. public void exitIfError() {
  81. if (antlrTool.hasError()) {
  82. antlrTool.fatalError("Exiting due to errors.");
  83. }
  84. }
  85. protected void checkCurrentOutputStream() {
  86. try
  87. {
  88. if(currentOutput == null)
  89. throw new NullPointerException();
  90. }
  91. catch(Exception e)
  92. {
  93. System.err.println("error: current output is not set");
  94. e.printStackTrace(System.err);
  95. System.exit(1);
  96. }
  97. }
  98. /** Get the identifier portion of an argument-action.
  99. * For Python the ID of an action is assumed to be everything before
  100. * the assignment, as Python does not support a type.
  101. * @param s The action text
  102. * @param line Line used for error reporting.
  103. * @param column Line used for error reporting.
  104. * @return A string containing the text of the identifier
  105. */
  106. protected String extractIdOfAction(String s, int line, int column) {
  107. s = removeAssignmentFromDeclaration(s);
  108. //wh: removeAssignmentFromDeclaration returns an indentifier that
  109. //wh: may start with whitespace.
  110. s = s.trim();
  111. // println("###ZZZZZ \""+s+"\"");
  112. return s;
  113. }
  114. /** Get the type portion of an argument-action.
  115. * Python does not have a type declaration before an identifier, so we
  116. * just return the empty string.
  117. * @param s The action text
  118. * @param line Line used for error reporting.
  119. * @return A string containing the text of the type
  120. */
  121. protected String extractTypeOfAction(String s, int line, int column) {
  122. return "";
  123. }
  124. protected void flushTokens() {
  125. try
  126. {
  127. boolean generated = false;
  128. checkCurrentOutputStream();
  129. println("");
  130. println("### import antlr.Token ");
  131. println("from antlr import Token");
  132. println("### >>>The Known Token Types <<<");
  133. /* save current stream */
  134. PrintWriter cout = currentOutput;
  135. // Loop over all token managers (some of which are lexers)
  136. Enumeration tmIter =
  137. behavior.tokenManagers.elements();
  138. while (tmIter.hasMoreElements())
  139. {
  140. TokenManager tm =
  141. (TokenManager)tmIter.nextElement();
  142. if (!tm.isReadOnly())
  143. {
  144. // Write the token manager tokens as Java
  145. // this must appear before genTokenInterchange so that
  146. // labels are set on string literals
  147. if(! generated) {
  148. genTokenTypes(tm);
  149. generated = true;
  150. }
  151. /* restore stream */
  152. currentOutput = cout;
  153. // Write the token manager tokens as plain text
  154. genTokenInterchange(tm);
  155. currentOutput = cout;
  156. }
  157. exitIfError();
  158. }
  159. }
  160. catch(Exception e) {
  161. exitIfError();
  162. }
  163. checkCurrentOutputStream();
  164. println("");
  165. }
  166. /**Generate the parser, lexer, treeparser, and token types in Java */
  167. public void gen() {
  168. // Do the code generation
  169. try {
  170. // Loop over all grammars
  171. Enumeration grammarIter = behavior.grammars.elements();
  172. while (grammarIter.hasMoreElements()) {
  173. Grammar g = (Grammar)grammarIter.nextElement();
  174. // Connect all the components to each other
  175. g.setGrammarAnalyzer(analyzer);
  176. g.setCodeGenerator(this);
  177. analyzer.setGrammar(g);
  178. // To get right overloading behavior across hetrogeneous grammars
  179. setupGrammarParameters(g);
  180. g.generate();
  181. // print out the grammar with lookahead sets (and FOLLOWs)
  182. // System.out.print(g.toString());
  183. exitIfError();
  184. }
  185. }
  186. catch (IOException e) {
  187. antlrTool.reportException(e, null);
  188. }
  189. }
  190. /** Generate code for the given grammar element.
  191. * @param blk The {...} action to generate
  192. */
  193. public void gen(ActionElement action) {
  194. if (action.isSemPred) {
  195. genSemPred(action.actionText, action.line);
  196. }
  197. else
  198. {
  199. if (grammar.hasSyntacticPredicate) {
  200. println("if not self.inputState.guessing:");
  201. tabs++;
  202. }
  203. // get the name of the followSet for the current rule so that we
  204. // can replace $FOLLOW in the .g file.
  205. ActionTransInfo tInfo = new ActionTransInfo();
  206. String actionStr = processActionForSpecialSymbols(action.actionText,
  207. action.getLine(),
  208. currentRule,
  209. tInfo);
  210. if (tInfo.refRuleRoot != null) {
  211. // Somebody referenced "#rule", make sure translated var is valid
  212. // assignment to #rule is left as a ref also, meaning that assignments
  213. // with no other refs like "#rule = foo();" still forces this code to be
  214. // generated (unnecessarily).
  215. println(tInfo.refRuleRoot + " = currentAST.root");
  216. }
  217. // dump the translated action
  218. printAction(actionStr);
  219. if (tInfo.assignToRoot) {
  220. // Somebody did a "#rule=", reset internal currentAST.root
  221. println("currentAST.root = " + tInfo.refRuleRoot + "");
  222. println("if (" + tInfo.refRuleRoot + " != None) and (" + tInfo.refRuleRoot + ".getFirstChild() != None):");
  223. tabs++;
  224. println("currentAST.child = " + tInfo.refRuleRoot + ".getFirstChild()");
  225. tabs--;
  226. println("else:");
  227. tabs++;
  228. println("currentAST.child = " + tInfo.refRuleRoot);
  229. tabs--;
  230. println("currentAST.advanceChildToEnd()");
  231. }
  232. if (grammar.hasSyntacticPredicate) {
  233. tabs--;
  234. }
  235. }
  236. }
  237. /** Generate code for the given grammar element.
  238. * @param blk The "x|y|z|..." block to generate
  239. */
  240. public void gen(AlternativeBlock blk) {
  241. if (DEBUG_CODE_GENERATOR) System.out.println("gen(" + blk + ")");
  242. genBlockPreamble(blk);
  243. genBlockInitAction(blk);
  244. // Tell AST generation to build subrule result
  245. String saveCurrentASTResult = currentASTResult;
  246. if (blk.getLabel() != null) {
  247. currentASTResult = blk.getLabel();
  248. }
  249. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  250. {
  251. int _tabs_ = tabs;
  252. PythonBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
  253. genBlockFinish(howToFinish, throwNoViable);
  254. tabs = _tabs_;
  255. }
  256. // Restore previous AST generation
  257. currentASTResult = saveCurrentASTResult;
  258. }
  259. /** Generate code for the given grammar element.
  260. * @param blk The block-end element to generate. Block-end
  261. * elements are synthesized by the grammar parser to represent
  262. * the end of a block.
  263. */
  264. public void gen(BlockEndElement end) {
  265. if (DEBUG_CODE_GENERATOR) System.out.println("genRuleEnd(" + end + ")");
  266. }
  267. /** Generate code for the given grammar element.
  268. * @param blk The character literal reference to generate
  269. */
  270. public void gen(CharLiteralElement atom) {
  271. if (DEBUG_CODE_GENERATOR) System.out.println("genChar(" + atom + ")");
  272. if (atom.getLabel() != null) {
  273. println(atom.getLabel() + " = " + lt1Value );
  274. }
  275. boolean oldsaveText = saveText;
  276. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  277. genMatch(atom);
  278. saveText = oldsaveText;
  279. }
  280. String toString(boolean v) {
  281. String s;
  282. if(v)
  283. s = "True";
  284. else
  285. s = "False";
  286. return s;
  287. }
  288. /** Generate code for the given grammar element.
  289. * @param blk The character-range reference to generate
  290. */
  291. public void gen(CharRangeElement r) {
  292. if (r.getLabel() != null && syntacticPredLevel == 0) {
  293. println(r.getLabel() + " = " + lt1Value);
  294. }
  295. boolean flag = ( grammar instanceof LexerGrammar &&
  296. ( !saveText ||
  297. r.getAutoGenType() ==
  298. GrammarElement.AUTO_GEN_BANG ) );
  299. if (flag) {
  300. println("_saveIndex = self.text.length()");
  301. }
  302. println("self.matchRange(u" + r.beginText + ", u" + r.endText + ")");
  303. if (flag) {
  304. println("self.text.setLength(_saveIndex)");
  305. }
  306. }
  307. /** Generate the lexer Java file */
  308. public void gen(LexerGrammar g) throws IOException
  309. {
  310. // If debugging, create a new sempred vector for this grammar
  311. if (g.debuggingOutput)
  312. semPreds = new Vector();
  313. setGrammar(g);
  314. if (!(grammar instanceof LexerGrammar)) {
  315. antlrTool.panic("Internal error generating lexer");
  316. }
  317. // SAS: moved output creation to method so a subclass can change
  318. // how the output is generated (for VAJ interface)
  319. setupOutput(grammar.getClassName());
  320. genAST = false; // no way to gen trees.
  321. saveText = true; // save consumed characters.
  322. tabs = 0;
  323. // Generate header common to all Python output files
  324. genHeader();
  325. // Generate header specific to lexer Python file
  326. println("### import antlr and other modules ..");
  327. println("import sys");
  328. println("import antlr");
  329. println("");
  330. println("version = sys.version.split()[0]");
  331. println("if version < '2.2.1':");
  332. tabs++;
  333. println("False = 0");
  334. tabs--;
  335. println("if version < '2.3':");
  336. tabs++;
  337. println("True = not False");
  338. tabs--;
  339. println("### header action >>> ");
  340. printActionCode(behavior.getHeaderAction(""),0);
  341. println("### header action <<< ");
  342. // Generate user-defined lexer file preamble
  343. println("### preamble action >>> ");
  344. printActionCode(grammar.preambleAction.getText(),0);
  345. println("### preamble action <<< ");
  346. // Generate lexer class definition
  347. String sup = null;
  348. if (grammar.superClass != null) {
  349. sup = grammar.superClass;
  350. }
  351. else {
  352. sup = "antlr." + grammar.getSuperClass();
  353. }
  354. // get prefix (replaces "public" and lets user specify)
  355. String prefix = "";
  356. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  357. if (tprefix != null) {
  358. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  359. if (p != null) {
  360. prefix = p;
  361. }
  362. }
  363. // print my literals
  364. println("### >>>The Literals<<<");
  365. println("literals = {}");
  366. Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
  367. while (keys.hasMoreElements()) {
  368. String key = (String)keys.nextElement();
  369. if (key.charAt(0) != '"') {
  370. continue;
  371. }
  372. TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
  373. if (sym instanceof StringLiteralSymbol) {
  374. StringLiteralSymbol s = (StringLiteralSymbol)sym;
  375. println("literals[u" + s.getId() + "] = " + s.getTokenType());
  376. }
  377. }
  378. println("");
  379. flushTokens();
  380. // print javadoc comment if any
  381. genJavadocComment(grammar);
  382. // class name remains the same, it's the module that changes in python.
  383. println("class " + lexerClassName + "(" + sup + ") :");
  384. tabs++;
  385. printGrammarAction(grammar);
  386. // Generate the constructor from InputStream, which in turn
  387. // calls the ByteBuffer constructor
  388. //
  389. println("def __init__(self, *argv, **kwargs) :");
  390. tabs++;
  391. println(sup + ".__init__(self, *argv, **kwargs)");
  392. // Generate the setting of various generated options.
  393. // These need to be before the literals since ANTLRHashString depends on
  394. // the casesensitive stuff.
  395. println("self.caseSensitiveLiterals = " + toString(g.caseSensitiveLiterals));
  396. println("self.setCaseSensitive(" + toString(g.caseSensitive) + ")" );
  397. println("self.literals = literals");
  398. Enumeration ids;
  399. // generate the rule name array for debugging
  400. if (grammar.debuggingOutput) {
  401. println("ruleNames[] = [");
  402. ids = grammar.rules.elements();
  403. int ruleNum = 0;
  404. tabs++;
  405. while (ids.hasMoreElements()) {
  406. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  407. if (sym instanceof RuleSymbol)
  408. println("\"" + ((RuleSymbol)sym).getId() + "\",");
  409. }
  410. tabs--;
  411. println("]");
  412. }
  413. genHeaderInit(grammar);
  414. tabs--;
  415. // wh: iterator moved to base class as proposed by mk.
  416. // println("");
  417. // Generate the __iter__ method for Python CharScanner (sub)classes.
  418. // genIterator();
  419. // Generate nextToken() rule.
  420. // nextToken() is a synthetic lexer rule that is the implicit OR of all
  421. // user-defined lexer rules.
  422. genNextToken();
  423. println("");
  424. // Generate code for each rule in the lexer
  425. ids = grammar.rules.elements();
  426. int ruleNum = 0;
  427. while (ids.hasMoreElements()) {
  428. RuleSymbol sym = (RuleSymbol)ids.nextElement();
  429. // Don't generate the synthetic rules
  430. if (!sym.getId().equals("mnextToken")) {
  431. genRule(sym, false, ruleNum++);
  432. }
  433. exitIfError();
  434. }
  435. // Generate the semantic predicate map for debugging
  436. if (grammar.debuggingOutput)
  437. genSemPredMap();
  438. // Generate the bitsets used throughout the lexer
  439. genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());
  440. println("");
  441. genHeaderMain(grammar);
  442. // Close the lexer output stream
  443. currentOutput.close();
  444. currentOutput = null;
  445. }
  446. protected void genHeaderMain(Grammar grammar)
  447. {
  448. String h = grammar.getClassName() + "." + mainHeaderAction;
  449. String s = behavior.getHeaderAction(h);
  450. if (isEmpty(s)) {
  451. s = behavior.getHeaderAction(mainHeaderAction);
  452. }
  453. if(isEmpty(s)) {
  454. if(grammar instanceof LexerGrammar) {
  455. int _tabs = tabs;
  456. tabs = 0;
  457. println("### __main__ header action >>> ");
  458. genLexerTest();
  459. tabs = 0;
  460. println("### __main__ header action <<< ");
  461. tabs = _tabs;
  462. }
  463. } else {
  464. int _tabs = tabs;
  465. tabs = 0;
  466. println("");
  467. println("### __main__ header action >>> ");
  468. printMainFunc(s);
  469. tabs = 0;
  470. println("### __main__ header action <<< ");
  471. tabs = _tabs;
  472. }
  473. }
  474. protected void genHeaderInit(Grammar grammar)
  475. {
  476. String h = grammar.getClassName() + "." + initHeaderAction;
  477. String s = behavior.getHeaderAction(h);
  478. if (isEmpty(s)) {
  479. s = behavior.getHeaderAction(initHeaderAction);
  480. }
  481. if(isEmpty(s)) {
  482. /* nothing gets generated by default */
  483. } else {
  484. int _tabs = tabs;
  485. println("### __init__ header action >>> ");
  486. printActionCode(s,0);
  487. tabs = _tabs;
  488. println("### __init__ header action <<< ");
  489. }
  490. }
  491. protected void printMainFunc(String s) {
  492. int _tabs = tabs;
  493. tabs = 0;
  494. println("if __name__ == '__main__':");
  495. tabs++;
  496. printActionCode(s,0);
  497. tabs--;
  498. tabs = _tabs;
  499. }
  500. /** Generate code for the given grammar element.
  501. * @param blk The (...)+ block to generate
  502. */
  503. public void gen(OneOrMoreBlock blk) {
  504. String label;
  505. String cnt;
  506. /* save current tabs */
  507. int _tabs_ = tabs;
  508. genBlockPreamble(blk);
  509. if (blk.getLabel() != null)
  510. {
  511. cnt = "_cnt_" + blk.getLabel();
  512. }
  513. else {
  514. cnt = "_cnt" + blk.ID;
  515. }
  516. println("" + cnt + "= 0");
  517. println("while True:");
  518. tabs++;
  519. _tabs_ = tabs;
  520. // generate the init action for ()+ ()* inside the loop
  521. // this allows us to do usefull EOF checking...
  522. genBlockInitAction(blk);
  523. // Tell AST generation to build subrule result
  524. String saveCurrentASTResult = currentASTResult;
  525. if (blk.getLabel() != null) {
  526. currentASTResult = blk.getLabel();
  527. }
  528. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  529. // generate exit test if greedy set to false
  530. // and an alt is ambiguous with exit branch
  531. // or when lookahead derived purely from end-of-file
  532. // Lookahead analysis stops when end-of-file is hit,
  533. // returning set {epsilon}. Since {epsilon} is not
  534. // ambig with any real tokens, no error is reported
  535. // by deterministic() routines and we have to check
  536. // for the case where the lookahead depth didn't get
  537. // set to NONDETERMINISTIC (this only happens when the
  538. // FOLLOW contains real atoms + epsilon).
  539. boolean generateNonGreedyExitPath = false;
  540. int nonGreedyExitDepth = grammar.maxk;
  541. if (!blk.greedy &&
  542. blk.exitLookaheadDepth <= grammar.maxk &&
  543. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon())
  544. {
  545. generateNonGreedyExitPath = true;
  546. nonGreedyExitDepth = blk.exitLookaheadDepth;
  547. }
  548. else
  549. {
  550. if (!blk.greedy &&
  551. blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
  552. generateNonGreedyExitPath = true;
  553. }
  554. }
  555. // generate exit test if greedy set to false
  556. // and an alt is ambiguous with exit branch
  557. if (generateNonGreedyExitPath)
  558. {
  559. println("### nongreedy (...)+ loop; exit depth is " + blk.exitLookaheadDepth);
  560. String predictExit =
  561. getLookaheadTestExpression(
  562. blk.exitCache,
  563. nonGreedyExitDepth);
  564. println("### nongreedy exit test");
  565. println("if " + cnt + " >= 1 and " + predictExit + ":");
  566. tabs++;
  567. println("break");
  568. tabs--;
  569. }
  570. {
  571. int _tabs = tabs;
  572. PythonBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  573. genBlockFinish(howToFinish, "break");
  574. tabs = _tabs;
  575. }
  576. /* no matter what previous block did, here we have to continue
  577. ** one the 'while block' level. Reseting tabs .. */
  578. tabs = _tabs_;
  579. println(cnt + " += 1");
  580. tabs = _tabs_;
  581. tabs--;
  582. println("if " + cnt + " < 1:");
  583. tabs++;
  584. println(throwNoViable);
  585. tabs--;
  586. // Restore previous AST generation
  587. currentASTResult = saveCurrentASTResult;
  588. }
  589. /** Generate the parser Java file */
  590. public void gen(ParserGrammar g)
  591. throws IOException {
  592. // if debugging, set up a new vector to keep track of sempred
  593. // strings for this grammar
  594. if (g.debuggingOutput)
  595. semPreds = new Vector();
  596. setGrammar(g);
  597. if (!(grammar instanceof ParserGrammar)) {
  598. antlrTool.panic("Internal error generating parser");
  599. }
  600. // Open the output stream for the parser and set the currentOutput
  601. // SAS: moved file setup so subclass could do it (for VAJ interface)
  602. setupOutput(grammar.getClassName());
  603. genAST = grammar.buildAST;
  604. tabs = 0;
  605. // Generate the header common to all output files.
  606. genHeader();
  607. // Generate header specific to lexer Java file
  608. println("### import antlr and other modules ..");
  609. println("import sys");
  610. println("import antlr");
  611. println("");
  612. println("version = sys.version.split()[0]");
  613. println("if version < '2.2.1':");
  614. tabs++;
  615. println("False = 0");
  616. tabs--;
  617. println("if version < '2.3':");
  618. tabs++;
  619. println("True = not False");
  620. tabs--;
  621. println("### header action >>> ");
  622. printActionCode(behavior.getHeaderAction(""),0);
  623. println("### header action <<< ");
  624. println("### preamble action>>>");
  625. // Output the user-defined parser preamble
  626. printActionCode(grammar.preambleAction.getText(),0);
  627. println("### preamble action <<<");
  628. flushTokens();
  629. // Generate parser class definition
  630. String sup = null;
  631. if (grammar.superClass != null)
  632. sup = grammar.superClass;
  633. else
  634. sup = "antlr." + grammar.getSuperClass();
  635. // print javadoc comment if any
  636. genJavadocComment(grammar);
  637. // get prefix (replaces "public" and lets user specify)
  638. String prefix = "";
  639. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  640. if (tprefix != null) {
  641. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  642. if (p != null) {
  643. prefix = p;
  644. }
  645. }
  646. print("class " + parserClassName + "(" + sup);
  647. println("):");
  648. tabs++;
  649. // set up an array of all the rule names so the debugger can
  650. // keep track of them only by number -- less to store in tree...
  651. if (grammar.debuggingOutput) {
  652. println("_ruleNames = [");
  653. Enumeration ids = grammar.rules.elements();
  654. int ruleNum = 0;
  655. tabs++;
  656. while (ids.hasMoreElements()) {
  657. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  658. if (sym instanceof RuleSymbol)
  659. println("\"" + ((RuleSymbol)sym).getId() + "\",");
  660. }
  661. tabs--;
  662. println("]");
  663. }
  664. // Generate user-defined parser class members
  665. printGrammarAction(grammar);
  666. // Generate parser class constructor from TokenBuffer
  667. println("");
  668. println("def __init__(self, *args, **kwargs):");
  669. tabs++;
  670. println(sup + ".__init__(self, *args, **kwargs)");
  671. println("self.tokenNames = _tokenNames");
  672. // if debugging, set up arrays and call the user-overridable
  673. // debugging setup method
  674. if (grammar.debuggingOutput) {
  675. println("self.ruleNames = _ruleNames");
  676. println("self.semPredNames = _semPredNames");
  677. println("self.setupDebugging(self.tokenBuf)");
  678. }
  679. if (grammar.buildAST) {
  680. println("self.buildTokenTypeASTClassMap()");
  681. println("self.astFactory = antlr.ASTFactory(self.getTokenTypeToASTClassMap())");
  682. if(labeledElementASTType != null)
  683. {
  684. println("self.astFactory.setASTNodeClass("+
  685. labeledElementASTType+")");
  686. }
  687. }
  688. genHeaderInit(grammar);
  689. println("");
  690. // Generate code for each rule in the grammar
  691. Enumeration ids = grammar.rules.elements();
  692. int ruleNum = 0;
  693. while (ids.hasMoreElements()) {
  694. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  695. if (sym instanceof RuleSymbol) {
  696. RuleSymbol rs = (RuleSymbol)sym;
  697. genRule(rs, rs.references.size() == 0, ruleNum++);
  698. }
  699. exitIfError();
  700. }
  701. if ( grammar.buildAST ) {
  702. genTokenASTNodeMap();
  703. }
  704. // Generate the token names
  705. genTokenStrings();
  706. // Generate the bitsets used throughout the grammar
  707. genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
  708. // Generate the semantic predicate map for debugging
  709. if (grammar.debuggingOutput)
  710. genSemPredMap();
  711. // Close class definition
  712. println("");
  713. tabs = 0;
  714. genHeaderMain(grammar);
  715. // Close the parser output stream
  716. currentOutput.close();
  717. currentOutput = null;
  718. }
  719. /** Generate code for the given grammar element.
  720. * @param blk The rule-reference to generate
  721. */
  722. public void gen(RuleRefElement rr) {
  723. if (DEBUG_CODE_GENERATOR) System.out.println("genRR(" + rr + ")");
  724. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
  725. if (rs == null || !rs.isDefined()) {
  726. // Is this redundant???
  727. antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
  728. return;
  729. }
  730. if (!(rs instanceof RuleSymbol)) {
  731. // Is this redundant???
  732. antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
  733. return;
  734. }
  735. genErrorTryForElement(rr);
  736. // AST value for labeled rule refs in tree walker.
  737. // This is not AST construction; it is just the input tree node value.
  738. if (grammar instanceof TreeWalkerGrammar &&
  739. rr.getLabel() != null &&
  740. syntacticPredLevel == 0) {
  741. println(rr.getLabel() + " = antlr.ifelse(_t == antlr.ASTNULL, None, " + lt1Value + ")");
  742. }
  743. // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
  744. if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  745. println("_saveIndex = self.text.length()");
  746. }
  747. // Process return value assignment if any
  748. printTabs();
  749. if (rr.idAssign != null) {
  750. // Warn if the rule has no return type
  751. if (rs.block.returnAction == null) {
  752. antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
  753. }
  754. _print(rr.idAssign + "=");
  755. }
  756. else {
  757. // Warn about return value if any, but not inside syntactic predicate
  758. if (!(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) {
  759. antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
  760. }
  761. }
  762. // Call the rule
  763. GenRuleInvocation(rr);
  764. // if in lexer and ! on element or alt or rule, save buffer index to kill later
  765. if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  766. println("self.text.setLength(_saveIndex)");
  767. }
  768. // if not in a syntactic predicate
  769. if (syntacticPredLevel == 0) {
  770. boolean doNoGuessTest = (
  771. grammar.hasSyntacticPredicate && (
  772. grammar.buildAST && rr.getLabel() != null ||
  773. (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
  774. )
  775. );
  776. if (doNoGuessTest) {
  777. // println("if (inputState.guessing==0) {");
  778. // tabs++;
  779. }
  780. if (grammar.buildAST && rr.getLabel() != null) {
  781. // always gen variable for rule return on labeled rules
  782. println(rr.getLabel() + "_AST = self.returnAST");
  783. }
  784. if (genAST) {
  785. switch (rr.getAutoGenType()) {
  786. case GrammarElement.AUTO_GEN_NONE:
  787. println("self.addASTChild(currentAST, self.returnAST)");
  788. break;
  789. case GrammarElement.AUTO_GEN_CARET:
  790. antlrTool.error("Internal: encountered ^ after rule reference");
  791. break;
  792. default:
  793. break;
  794. }
  795. }
  796. // if a lexer and labeled, Token label defined at rule level, just set it here
  797. if (grammar instanceof LexerGrammar && rr.getLabel() != null) {
  798. println(rr.getLabel() + " = self._returnToken");
  799. }
  800. if (doNoGuessTest) {
  801. }
  802. }
  803. genErrorCatchForElement(rr);
  804. }
  805. /** Generate code for the given grammar element.
  806. * @param blk The string-literal reference to generate
  807. */
  808. public void gen(StringLiteralElement atom) {
  809. if (DEBUG_CODE_GENERATOR) System.out.println("genString(" + atom + ")");
  810. // Variable declarations for labeled elements
  811. if (atom.getLabel() != null && syntacticPredLevel == 0) {
  812. println(atom.getLabel() + " = " + lt1Value + "");
  813. }
  814. // AST
  815. genElementAST(atom);
  816. // is there a bang on the literal?
  817. boolean oldsaveText = saveText;
  818. saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
  819. // matching
  820. genMatch(atom);
  821. saveText = oldsaveText;
  822. // tack on tree cursor motion if doing a tree walker
  823. if (grammar instanceof TreeWalkerGrammar) {
  824. println("_t = _t.getNextSibling()");
  825. }
  826. }
  827. /** Generate code for the given grammar element.
  828. * @param blk The token-range reference to generate
  829. */
  830. public void gen(TokenRangeElement r) {
  831. genErrorTryForElement(r);
  832. if (r.getLabel() != null && syntacticPredLevel == 0) {
  833. println(r.getLabel() + " = " + lt1Value);
  834. }
  835. // AST
  836. genElementAST(r);
  837. // match
  838. println("self.matchRange(u" + r.beginText + ", u" + r.endText + ")");
  839. genErrorCatchForElement(r);
  840. }
  841. /** Generate code for the given grammar element.
  842. * @param blk The token-reference to generate
  843. */
  844. public void gen(TokenRefElement atom) {
  845. if (DEBUG_CODE_GENERATOR) System.out.println("genTokenRef(" + atom + ")");
  846. if (grammar instanceof LexerGrammar) {
  847. antlrTool.panic("Token reference found in lexer");
  848. }
  849. genErrorTryForElement(atom);
  850. // Assign Token value to token label variable
  851. if (atom.getLabel() != null && syntacticPredLevel == 0) {
  852. println(atom.getLabel() + " = " + lt1Value + "");
  853. }
  854. // AST
  855. genElementAST(atom);
  856. // matching
  857. genMatch(atom);
  858. genErrorCatchForElement(atom);
  859. // tack on tree cursor motion if doing a tree walker
  860. if (grammar instanceof TreeWalkerGrammar) {
  861. println("_t = _t.getNextSibling()");
  862. }
  863. }
  864. public void gen(TreeElement t) {
  865. // save AST cursor
  866. println("_t" + t.ID + " = _t");
  867. // If there is a label on the root, then assign that to the variable
  868. if (t.root.getLabel() != null) {
  869. println(t.root.getLabel() + " = antlr.ifelse(_t == antlr.ASTNULL, None, _t)");
  870. }
  871. // check for invalid modifiers ! and ^ on tree element roots
  872. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
  873. antlrTool.error("Suffixing a root node with '!' is not implemented",
  874. grammar.getFilename(), t.getLine(), t.getColumn());
  875. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  876. }
  877. if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
  878. antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
  879. grammar.getFilename(), t.getLine(), t.getColumn());
  880. t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
  881. }
  882. // Generate AST variables
  883. genElementAST(t.root);
  884. if (grammar.buildAST) {
  885. // Save the AST construction state
  886. println("_currentAST" + t.ID + " = currentAST.copy()");
  887. // Make the next item added a child of the TreeElement root
  888. println("currentAST.root = currentAST.child");
  889. println("currentAST.child = None");
  890. }
  891. // match root
  892. if ( t.root instanceof WildcardElement ) {
  893. println("if not _t: raise antlr.MismatchedTokenException()");
  894. }
  895. else {
  896. genMatch(t.root);
  897. }
  898. // move to list of children
  899. println("_t = _t.getFirstChild()");
  900. // walk list of children, generating code for each
  901. for (int i = 0; i < t.getAlternatives().size(); i++) {
  902. Alternative a = t.getAlternativeAt(i);
  903. AlternativeElement e = a.head;
  904. while (e != null) {
  905. e.generate();
  906. e = e.next;
  907. }
  908. }
  909. if (grammar.buildAST) {
  910. // restore the AST construction state to that just after the
  911. // tree root was added
  912. println("currentAST = _currentAST" + t.ID + "");
  913. }
  914. // restore AST cursor
  915. println("_t = _t" + t.ID + "");
  916. // move cursor to sibling of tree just parsed
  917. println("_t = _t.getNextSibling()");
  918. }
  919. /** Generate the tree-parser Java file */
  920. public void gen(TreeWalkerGrammar g) throws IOException {
  921. // SAS: debugging stuff removed for now...
  922. setGrammar(g);
  923. if (!(grammar instanceof TreeWalkerGrammar)) {
  924. antlrTool.panic("Internal error generating tree-walker");
  925. }
  926. // Open the output stream for the parser and set the currentOutput
  927. // SAS: move file open to method so subclass can override it
  928. // (mainly for VAJ interface)
  929. setupOutput(grammar.getClassName());
  930. genAST = grammar.buildAST;
  931. tabs = 0;
  932. // Generate the header common to all output files.
  933. genHeader();
  934. // Generate header specific to lexer Java file
  935. println("### import antlr and other modules ..");
  936. println("import sys");
  937. println("import antlr");
  938. println("");
  939. println("version = sys.version.split()[0]");
  940. println("if version < '2.2.1':");
  941. tabs++;
  942. println("False = 0");
  943. tabs--;
  944. println("if version < '2.3':");
  945. tabs++;
  946. println("True = not False");
  947. tabs--;
  948. println("### header action >>> ");
  949. printActionCode(behavior.getHeaderAction(""),0);
  950. println("### header action <<< ");
  951. flushTokens();
  952. println("### user code>>>");
  953. // Output the user-defined parser preamble
  954. printActionCode(grammar.preambleAction.getText(),0);
  955. println("### user code<<<");
  956. // Generate parser class definition
  957. String sup = null;
  958. if (grammar.superClass != null) {
  959. sup = grammar.superClass;
  960. }
  961. else {
  962. sup = "antlr." + grammar.getSuperClass();
  963. }
  964. println("");
  965. // get prefix (replaces "public" and lets user specify)
  966. String prefix = "";
  967. Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
  968. if (tprefix != null) {
  969. String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
  970. if (p != null) {
  971. prefix = p;
  972. }
  973. }
  974. // print javadoc comment if any
  975. genJavadocComment(grammar);
  976. println("class " + treeWalkerClassName + "(" + sup + "):");
  977. tabs++;
  978. // Generate default parser class constructor
  979. println("");
  980. println("# ctor ..");
  981. println("def __init__(self, *args, **kwargs):");
  982. tabs++;
  983. println(sup + ".__init__(self, *args, **kwargs)");
  984. println("self.tokenNames = _tokenNames");
  985. genHeaderInit(grammar);
  986. tabs--;
  987. println("");
  988. // print grammar specific action
  989. printGrammarAction(grammar);
  990. // Generate code for each rule in the grammar
  991. Enumeration ids = grammar.rules.elements();
  992. int ruleNum = 0;
  993. String ruleNameInits = "";
  994. while (ids.hasMoreElements()) {
  995. GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
  996. if (sym instanceof RuleSymbol) {
  997. RuleSymbol rs = (RuleSymbol)sym;
  998. genRule(rs, rs.references.size() == 0, ruleNum++);
  999. }
  1000. exitIfError();
  1001. }
  1002. // Generate the token names
  1003. genTokenStrings();
  1004. // Generate the bitsets used throughout the grammar
  1005. genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
  1006. tabs = 0;
  1007. genHeaderMain(grammar);
  1008. // Close the parser output stream
  1009. currentOutput.close();
  1010. currentOutput = null;
  1011. }
  1012. /** Generate code for the given grammar element.
  1013. * @param wc The wildcard element to generate
  1014. */
  1015. public void gen(WildcardElement wc) {
  1016. // Variable assignment for labeled elements
  1017. if (wc.getLabel() != null && syntacticPredLevel == 0) {
  1018. println(wc.getLabel() + " = " + lt1Value + "");
  1019. }
  1020. // AST
  1021. genElementAST(wc);
  1022. // Match anything but EOF
  1023. if (grammar instanceof TreeWalkerGrammar) {
  1024. println("if not _t:");
  1025. tabs++;
  1026. println("raise MismatchedTokenException()");
  1027. tabs--;
  1028. }
  1029. else if (grammar instanceof LexerGrammar) {
  1030. if (grammar instanceof LexerGrammar &&
  1031. (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  1032. println("_saveIndex = self.text.length()");
  1033. }
  1034. println("self.matchNot(antlr.EOF_CHAR)");
  1035. if (grammar instanceof LexerGrammar &&
  1036. (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
  1037. println("self.text.setLength(_saveIndex)"); // kill text atom put in buffer
  1038. }
  1039. }
  1040. else {
  1041. println("self.matchNot(" + getValueString(Token.EOF_TYPE,false) + ")");
  1042. }
  1043. // tack on tree cursor motion if doing a tree walker
  1044. if (grammar instanceof TreeWalkerGrammar) {
  1045. println("_t = _t.getNextSibling()");
  1046. }
  1047. }
  1048. /** Generate code for the given grammar element.
  1049. * @param blk The (...)* block to generate
  1050. */
  1051. public void gen(ZeroOrMoreBlock blk) {
  1052. int _tabs_ = tabs;
  1053. genBlockPreamble(blk);
  1054. String label;
  1055. println("while True:");
  1056. tabs++;
  1057. _tabs_ = tabs;
  1058. // generate the init action for ()* inside the loop
  1059. // this allows us to do usefull EOF checking...
  1060. genBlockInitAction(blk);
  1061. // Tell AST generation to build subrule result
  1062. String saveCurrentASTResult = currentASTResult;
  1063. if (blk.getLabel() != null) {
  1064. currentASTResult = blk.getLabel();
  1065. }
  1066. boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
  1067. // generate exit test if greedy set to false
  1068. // and an alt is ambiguous with exit branch
  1069. // or when lookahead derived purely from end-of-file
  1070. // Lookahead analysis stops when end-of-file is hit,
  1071. // returning set {epsilon}. Since {epsilon} is not
  1072. // ambig with any real tokens, no error is reported
  1073. // by deterministic() routines and we have to check
  1074. // for the case where the lookahead depth didn't get
  1075. // set to NONDETERMINISTIC (this only happens when the
  1076. // FOLLOW contains real atoms + epsilon).
  1077. boolean generateNonGreedyExitPath = false;
  1078. int nonGreedyExitDepth = grammar.maxk;
  1079. if (!blk.greedy &&
  1080. blk.exitLookaheadDepth <= grammar.maxk &&
  1081. blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
  1082. generateNonGreedyExitPath = true;
  1083. nonGreedyExitDepth = blk.exitLookaheadDepth;
  1084. }
  1085. else if (!blk.greedy &&
  1086. blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
  1087. generateNonGreedyExitPath = true;
  1088. }
  1089. if (generateNonGreedyExitPath) {
  1090. if (DEBUG_CODE_GENERATOR) {
  1091. System.out.println("nongreedy (...)* loop; exit depth is " +
  1092. blk.exitLookaheadDepth);
  1093. }
  1094. String predictExit =
  1095. getLookaheadTestExpression(blk.exitCache,
  1096. nonGreedyExitDepth);
  1097. println("### nongreedy exit test");
  1098. println("if (" + predictExit + "):");
  1099. tabs++;
  1100. println("break");
  1101. tabs--;
  1102. }
  1103. {
  1104. int _tabs = tabs;
  1105. PythonBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
  1106. genBlockFinish(howToFinish, "break");
  1107. tabs = _tabs;
  1108. }
  1109. tabs = _tabs_; /* no matter where we are */
  1110. tabs--;
  1111. // Restore previous AST generation
  1112. currentASTResult = saveCurrentASTResult;
  1113. }
  1114. /** Generate an alternative.
  1115. * @param alt The alternative to generate
  1116. * @param blk The block to which the alternative belongs
  1117. */
  1118. protected void genAlt(Alternative alt, AlternativeBlock blk) {
  1119. // Save the AST generation state, and set it to that of the alt
  1120. boolean savegenAST = genAST;
  1121. genAST = genAST && alt.getAutoGen();
  1122. boolean oldsaveTest = saveText;
  1123. saveText = saveText && alt.getAutoGen();
  1124. // Reset the variable name map for the alternative
  1125. Hashtable saveMap = treeVariableMap;
  1126. treeVariableMap = new Hashtable();
  1127. // Generate try block around the alt for error handling
  1128. if (alt.exceptionSpec != null) {
  1129. println("try:");
  1130. tabs++;
  1131. }
  1132. println("pass"); // make sure that always something gets generated ..
  1133. AlternativeElement elem = alt.head;
  1134. while (!(elem instanceof BlockEndElement)) {
  1135. elem.generate(); // alt can begin with anything. Ask target to gen.
  1136. elem = elem.next;
  1137. }
  1138. if (genAST) {
  1139. if (blk instanceof RuleBlock) {
  1140. // Set the AST return value for the rule
  1141. RuleBlock rblk = (RuleBlock)blk;
  1142. if (grammar.hasSyntacticPredicate) {
  1143. }
  1144. println(rblk.getRuleName() + "_AST = currentAST.root");
  1145. if (grammar.hasSyntacticPredicate) {
  1146. }
  1147. }
  1148. else if (blk.getLabel() != null) {
  1149. antlrTool.warning(
  1150. "Labeled subrules not yet supported",
  1151. grammar.getFilename(), blk.getLine(), blk.getColumn());
  1152. }
  1153. }
  1154. if (alt.exceptionSpec != null) {
  1155. tabs--;
  1156. genErrorHandler(alt.exceptionSpec);
  1157. }
  1158. genAST = savegenAST;
  1159. saveText = oldsaveTest;
  1160. treeVariableMap = saveMap;
  1161. }
  1162. /** Generate all the bitsets to be used in the parser or lexer
  1163. * Generate the raw bitset data like "long _tokenSet1_data[] = {...}"
  1164. * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data)"
  1165. * Note that most languages do not support object initialization inside a
  1166. * class definition, so other code-generators may have to separate the
  1167. * bitset declarations from the initializations (e.g., put the initializations
  1168. * in the generated constructor instead).
  1169. * @param bitsetList The list of bitsets to generate.
  1170. * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
  1171. */
  1172. protected void genBitsets(Vector bitsetList,
  1173. int maxVocabulary
  1174. ) {
  1175. println("");
  1176. for (int i = 0; i < bitsetList.size(); i++) {
  1177. BitSet p = (BitSet)bitsetList.elementAt(i);
  1178. // Ensure that generated BitSet is large enough for vocabulary
  1179. p.growToInclude(maxVocabulary);
  1180. genBitSet(p, i);
  1181. }
  1182. }
  1183. /** Do something simple like:
  1184. * private static final long[] mk_tokenSet_0() {
  1185. * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
  1186. * return data;
  1187. * }
  1188. * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
  1189. *
  1190. * Or, for large bitsets, optimize init so ranges are collapsed into loops.
  1191. * This is most useful for lexers using unicode.
  1192. */
  1193. private void genBitSet(BitSet p, int id) {
  1194. int _tabs_ = tabs;
  1195. // wanna have bitsets on module scope, so they are available
  1196. // when module gets loaded.
  1197. tabs = 0;
  1198. println("");
  1199. println("### generate bit set");
  1200. println(
  1201. "def mk" + getBitsetName(id) + "(): " );
  1202. tabs++;
  1203. int n = p.lengthInLongWords();
  1204. if ( n<BITSET_OPTIMIZE_INIT_THRESHOLD )
  1205. {
  1206. println("### var1");
  1207. println("data = [ " + p.toStringOfWords() + "]");
  1208. }
  1209. else
  1210. {
  1211. // will init manually, allocate space then set values
  1212. println("data = [0L] * " + n + " ### init list");
  1213. long[] elems = p.toPackedArray();
  1214. for (int i = 0; i < elems.length;)
  1215. {
  1216. if ( elems[i]==0 )
  1217. {
  1218. // done automatically by Java, don't waste time/code
  1219. i++;
  1220. continue;
  1221. }
  1222. if ( (i+1)==elems.length || elems[i]!=elems[i+1] )
  1223. {
  1224. // last number or no run of numbers, just dump assignment
  1225. println("data["+ i + "] =" + elems[i] + "L");
  1226. i++;
  1227. continue;
  1228. }
  1229. // scan to find end of run
  1230. int j;
  1231. for (j = i + 1;j < elems.length && elems[j]==elems[i];j++)
  1232. {}
  1233. long e = elems[i];
  1234. // E0007: fixed
  1235. println("for x in xrange(" + i+", " + j + "):");
  1236. tabs++;
  1237. println("data[x] = " + e + "L");
  1238. tabs--;
  1239. i = j;
  1240. }
  1241. }
  1242. println("return data");
  1243. tabs--;
  1244. // BitSet object
  1245. println(
  1246. getBitsetName(id) + " = antlr.BitSet(mk" + getBitsetName(id) + "())" );
  1247. // restore tabs
  1248. tabs = _tabs_;
  1249. }
  1250. private void genBlockFinish(PythonBlockFinishingInfo howToFinish,
  1251. String noViableAction) {
  1252. if (howToFinish.needAnErrorClause &&
  1253. (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
  1254. if (howToFinish.generatedAnIf)
  1255. {
  1256. println("else:" );
  1257. }
  1258. tabs++;
  1259. println(noViableAction);
  1260. tabs--;
  1261. }
  1262. if (howToFinish.postscript != null) {
  1263. println(howToFinish.postscript);
  1264. }
  1265. }
  1266. /* just to be called by nextToken */
  1267. private void genBlockFinish1(PythonBlockFinishingInfo howToFinish,
  1268. String noViableAction) {
  1269. if (howToFinish.needAnErrorClause &&
  1270. (howToFinish.generatedAnIf || howToFinish.generatedSwitch))
  1271. {
  1272. if (howToFinish.generatedAnIf)
  1273. {
  1274. // tabs++;
  1275. println("else:" );
  1276. }
  1277. tabs++;
  1278. println(noViableAction);
  1279. tabs--;
  1280. if (howToFinish.generatedAnIf)
  1281. {
  1282. // tabs--;
  1283. // println("### tabs--");
  1284. }
  1285. }
  1286. if (howToFinish.postscript != null) {
  1287. println(howToFinish.postscript);
  1288. }
  1289. }
  1290. /** Generate the init action for a block, which may be a RuleBlock or a
  1291. * plain AlternativeBLock.
  1292. * @blk The block for which the preamble is to be generated.
  1293. */
  1294. protected void genBlockInitAction(AlternativeBlock blk) {
  1295. // dump out init action
  1296. if (blk.initAction != null) {
  1297. printAction(processActionForSpecialSymbols(blk.initAction, blk.getLine(), currentRule, null));
  1298. }
  1299. }
  1300. /** Generate the header for a block, which may be a RuleBlock or a
  1301. * plain AlternativeBLock. This generates any variable declarations
  1302. * and syntactic-predicate-testing variables.
  1303. * @blk The block for which the preamble is to be generated.
  1304. */
  1305. protected void genBlockPreamble(AlternativeBlock blk) {
  1306. // define labels for rule blocks.
  1307. if (blk instanceof RuleBlock) {
  1308. RuleBlock rblk = (RuleBlock)blk;
  1309. if (rblk.labeledElements != null) {
  1310. for (int i = 0; i < rblk.labeledElements.size(); i++) {
  1311. AlternativeElement a = (AlternativeElement)rblk.labeledElements.elementAt(i);
  1312. // System.out.println("looking at labeled element: "+a);
  1313. // Variables for labeled rule refs and
  1314. // subrules are different than variables for
  1315. // grammar atoms. This test is a little tricky
  1316. // because we want to get all rule refs and ebnf,
  1317. // but not rule blocks or syntactic predicates
  1318. if (
  1319. a instanceof RuleRefElement ||
  1320. a instanceof AlternativeBlock &&
  1321. !(a instanceof RuleBlock) &&
  1322. !(a instanceof SynPredBlock)
  1323. ) {
  1324. if (
  1325. !(a instanceof RuleRefElement) &&
  1326. ((AlternativeBlock)a).not &&
  1327. analyzer.subruleCanBeInverted(((AlternativeBlock)a), grammar instanceof LexerGrammar)
  1328. ) {
  1329. // Special case for inverted subrules that
  1330. // will be inlined. Treat these like
  1331. // token or char literal references
  1332. println(a.getLabel() + " = " + labeledElementInit);
  1333. if (grammar.buildAST) {
  1334. genASTDeclaration(a);
  1335. }
  1336. }
  1337. else {
  1338. if (grammar.buildAST) {
  1339. // Always gen AST variables for
  1340. // labeled elements, even if the
  1341. // element itself is marked with !
  1342. genASTDeclaration(a);
  1343. }
  1344. if (grammar instanceof LexerGrammar) {
  1345. println(a.getLabel() + " = None");
  1346. }
  1347. if (grammar instanceof TreeWalkerGrammar) {
  1348. // always generate rule-ref variables
  1349. // for tree walker
  1350. println(a.getLabel() + " = " + labeledElementInit);
  1351. }
  1352. }
  1353. }
  1354. else {
  1355. // It is a token or literal reference. Generate the
  1356. // correct variable type for this grammar
  1357. println(a.getLabel() + " = " + labeledElementInit);
  1358. // In addition, generate *_AST variables if
  1359. // building ASTs
  1360. if (grammar.buildAST) {
  1361. if (a instanceof GrammarAtom &&
  1362. ((GrammarAtom)a).getASTNodeType() != null) {
  1363. GrammarAtom ga = (GrammarAtom)a;
  1364. genASTDeclaration(a, ga.getASTNodeType());
  1365. }
  1366. else {
  1367. genASTDeclaration(a);
  1368. }
  1369. }
  1370. }
  1371. }
  1372. }
  1373. }
  1374. }
  1375. /** Generate a series of case statements that implement a BitSet test.
  1376. * @param p The Bitset for which cases are to be generated
  1377. */
  1378. protected void genCases(BitSet p) {
  1379. if (DEBUG_CODE_GENERATOR) System.out.println("genCases(" + p + ")");
  1380. int[] elems;
  1381. elems = p.toArray();
  1382. // Wrap cases four-per-line for lexer, one-per-line for parser
  1383. int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
  1384. int j = 1;
  1385. boolean startOfLine = true;
  1386. print("elif la1 and la1 in ");
  1387. if (grammar instanceof LexerGrammar)
  1388. {
  1389. _print("u'");
  1390. for (int i = 0; i < elems.length; i++) {
  1391. _print(getValueString(elems[i],false));
  1392. }
  1393. _print("':\n");
  1394. return;
  1395. }
  1396. // Parser or TreeParser ..
  1397. _print("[");
  1398. for (int i = 0; i < elems.length; i++) {
  1399. _print(getValueString(elems[i],false));
  1400. if(i+1<elems.length)
  1401. _print(",");
  1402. }
  1403. _print("]:\n");
  1404. }
  1405. /**Generate common code for a block of alternatives; return a
  1406. * postscript that needs to be generated at the end of the
  1407. * block. Other routines may append else-clauses and such for
  1408. * error checking before the postfix is generated. If the
  1409. * grammar is a lexer, then generate alternatives in an order
  1410. * where alternatives requiring deeper lookahead are generated
  1411. * first, and EOF in the lookahead set reduces the depth of
  1412. * the lookahead. @param blk The block to generate @param
  1413. * noTestForSingle If true, then it does not generate a test
  1414. * for a single alternative.
  1415. */
  1416. public PythonBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
  1417. boolean noTestForSingle) {
  1418. int _tabs_ = tabs; // remember where we are ..
  1419. int nIF = 0;
  1420. boolean createdLL1Switch = false;
  1421. int closingBracesOfIFSequence = 0;
  1422. PythonBlockFinishingInfo finishingInfo =
  1423. new PythonBlockFinishingInfo();
  1424. // Save the AST generation state, and set it to that of the block
  1425. boolean savegenAST = genAST;
  1426. genAST = genAST && blk.getAutoGen();
  1427. boolean oldsaveTest = saveText;
  1428. saveText = saveText && blk.getAutoGen();
  1429. // Is this block inverted? If so, generate special-case code
  1430. if (
  1431. blk.not &&
  1432. analyzer.subruleCanBeInverted(blk, grammar instanceof LexerGrammar)
  1433. )
  1434. {
  1435. if (DEBUG_CODE_GENERATOR) System.out.println("special case: ~(subrule)");
  1436. Lookahead p = analyzer.look(1, blk);
  1437. // Variable assignment for labeled elements
  1438. if (blk.getLabel() != null && syntacticPredLevel == 0)
  1439. {
  1440. println(blk.getLabel() + " = " + lt1Value);
  1441. }
  1442. // AST
  1443. genElementAST(blk);
  1444. String astArgs = "";
  1445. if (grammar instanceof TreeWalkerGrammar) {
  1446. astArgs = "_t, ";
  1447. }
  1448. // match the bitset for the alternative
  1449. println("self.match(" + astArgs + getBitsetName(markBitsetForGen(p.fset)) + ")");
  1450. // tack on tree cursor motion if doing a tree walker
  1451. if (grammar instanceof TreeWalkerGrammar) {
  1452. println("_t = _t.getNextSibling()");
  1453. }
  1454. return finishingInfo;
  1455. }
  1456. // Special handling for single alt
  1457. if (blk.getAlternatives().size() == 1)
  1458. {
  1459. Alternative alt = blk.getAlternativeAt(0);
  1460. // Generate a warning if there is a synPred for single alt.
  1461. if (alt.synPred != null) {
  1462. antlrTool.warning(
  1463. "Syntactic predicate superfluous for single alternative",
  1464. grammar.getFilename(),
  1465. blk.getAlternativeAt(0).synPred.getLine(),
  1466. blk.getAlternativeAt(0).synPred.getColumn()
  1467. );
  1468. }
  1469. if (noTestForSingle)
  1470. {
  1471. if (alt.semPred != null) {
  1472. // Generate validating predicate
  1473. genSemPred(alt.semPred, blk.line);
  1474. }
  1475. genAlt(alt, blk);
  1476. return finishingInfo;
  1477. }
  1478. }
  1479. // count number of simple LL(1) cases; only do switch …

Large files files are truncated, but you can click here to view the full file