PageRenderTime 66ms CodeModel.GetById 40ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/antlr-2.7.5/antlr/CodeGenerator.java

https://github.com/boo/boo-lang
Java | 663 lines | 327 code | 78 blank | 258 comment | 69 complexity | ea180ad6009aadf07b41afaef6bbdfc9 MD5 | raw file
Possible License(s): GPL-2.0
  1. package antlr;
  2. /* ANTLR Translator Generator
  3. * Project led by Terence Parr at http://www.jGuru.com
  4. * Software rights: http://www.antlr.org/license.html
  5. *
  6. * $Id: //depot/code/org.antlr/release/antlr-2.7.5/antlr/CodeGenerator.java#1 $
  7. */
  8. import java.io.PrintWriter;
  9. import java.io.IOException;
  10. import java.io.FileWriter;
  11. import antlr.collections.impl.Vector;
  12. import antlr.collections.impl.BitSet;
  13. /**A generic ANTLR code generator. All code generators
  14. * Derive from this class.
  15. *
  16. * <p>
  17. * A CodeGenerator knows about a Grammar data structure and
  18. * a grammar analyzer. The Grammar is walked to generate the
  19. * appropriate code for both a parser and lexer (if present).
  20. * This interface may change slightly so that the lexer is
  21. * itself living inside of a Grammar object (in which case,
  22. * this class generates only one recognizer). The main method
  23. * to call is <tt>gen()</tt>, which initiates all code gen.
  24. *
  25. * <p>
  26. * The interaction of the code generator with the analyzer is
  27. * simple: each subrule block calls deterministic() before generating
  28. * code for the block. Method deterministic() sets lookahead caches
  29. * in each Alternative object. Technically, a code generator
  30. * doesn't need the grammar analyzer if all lookahead analysis
  31. * is done at runtime, but this would result in a slower parser.
  32. *
  33. * <p>
  34. * This class provides a set of support utilities to handle argument
  35. * list parsing and so on.
  36. *
  37. * @author Terence Parr, John Lilley
  38. * @version 2.00a
  39. * @see antlr.JavaCodeGenerator
  40. * @see antlr.DiagnosticCodeGenerator
  41. * @see antlr.LLkAnalyzer
  42. * @see antlr.Grammar
  43. * @see antlr.AlternativeElement
  44. * @see antlr.Lookahead
  45. */
  46. public abstract class CodeGenerator {
  47. protected antlr.Tool antlrTool;
  48. /** Current tab indentation for code output */
  49. protected int tabs = 0;
  50. /** Current output Stream */
  51. transient protected PrintWriter currentOutput; // SAS: for proper text i/o
  52. /** The grammar for which we generate code */
  53. protected Grammar grammar = null;
  54. /** List of all bitsets that must be dumped. These are Vectors of BitSet. */
  55. protected Vector bitsetsUsed;
  56. /** The grammar behavior */
  57. protected DefineGrammarSymbols behavior;
  58. /** The LLk analyzer */
  59. protected LLkGrammarAnalyzer analyzer;
  60. /** Object used to format characters in the target language.
  61. * subclass must initialize this to the language-specific formatter
  62. */
  63. protected CharFormatter charFormatter;
  64. /** Use option "codeGenDebug" to generate debugging output */
  65. protected boolean DEBUG_CODE_GENERATOR = false;
  66. /** Default values for code-generation thresholds */
  67. protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
  68. protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;
  69. /** If there are more than 8 long words to init in a bitset,
  70. * try to optimize it; e.g., detect runs of -1L and 0L.
  71. */
  72. protected static final int BITSET_OPTIMIZE_INIT_THRESHOLD = 8;
  73. /** This is a hint for the language-specific code generator.
  74. * A switch() or language-specific equivalent will be generated instead
  75. * of a series of if/else statements for blocks with number of alternates
  76. * greater than or equal to this number of non-predicated LL(1) alternates.
  77. * This is modified by the grammar option "codeGenMakeSwitchThreshold"
  78. */
  79. protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
  80. /** This is a hint for the language-specific code generator.
  81. * A bitset membership test will be generated instead of an
  82. * ORed series of LA(k) comparisions for lookahead sets with
  83. * degree greater than or equal to this value.
  84. * This is modified by the grammar option "codeGenBitsetTestThreshold"
  85. */
  86. protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
  87. private static boolean OLD_ACTION_TRANSLATOR = true;
  88. public static String TokenTypesFileSuffix = "TokenTypes";
  89. public static String TokenTypesFileExt = ".txt";
  90. /** Construct code generator base class */
  91. public CodeGenerator() {
  92. }
  93. /** Output a String to the currentOutput stream.
  94. * Ignored if string is null.
  95. * @param s The string to output
  96. */
  97. protected void _print(String s) {
  98. if (s != null) {
  99. currentOutput.print(s);
  100. }
  101. }
  102. /** Print an action without leading tabs, attempting to
  103. * preserve the current indentation level for multi-line actions
  104. * Ignored if string is null.
  105. * @param s The action string to output
  106. */
  107. protected void _printAction(String s) {
  108. if (s == null) {
  109. return;
  110. }
  111. // Skip leading newlines, tabs and spaces
  112. int start = 0;
  113. while (start < s.length() && Character.isSpaceChar(s.charAt(start))) {
  114. start++;
  115. }
  116. // Skip leading newlines, tabs and spaces
  117. int end = s.length() - 1;
  118. while (end > start && Character.isSpaceChar(s.charAt(end))) {
  119. end--;
  120. }
  121. char c = 0;
  122. for (int i = start; i <= end;) {
  123. c = s.charAt(i);
  124. i++;
  125. boolean newline = false;
  126. switch (c) {
  127. case '\n':
  128. newline = true;
  129. break;
  130. case '\r':
  131. if (i <= end && s.charAt(i) == '\n') {
  132. i++;
  133. }
  134. newline = true;
  135. break;
  136. default:
  137. currentOutput.print(c);
  138. break;
  139. }
  140. if (newline) {
  141. currentOutput.println();
  142. printTabs();
  143. // Absorb leading whitespace
  144. while (i <= end && Character.isSpaceChar(s.charAt(i))) {
  145. i++;
  146. }
  147. newline = false;
  148. }
  149. }
  150. currentOutput.println();
  151. }
  152. /** Output a String followed by newline, to the currentOutput stream.
  153. * Ignored if string is null.
  154. * @param s The string to output
  155. */
  156. protected void _println(String s) {
  157. if (s != null) {
  158. currentOutput.println(s);
  159. }
  160. }
  161. /** Test if a set element array represents a contiguous range.
  162. * @param elems The array of elements representing the set, usually from BitSet.toArray().
  163. * @return true if the elements are a contiguous range (with two or more).
  164. */
  165. public static boolean elementsAreRange(int[] elems) {
  166. if (elems.length == 0) {
  167. return false;
  168. }
  169. int begin = elems[0];
  170. int end = elems[elems.length - 1];
  171. if (elems.length <= 2) {
  172. // Not enough elements for a range expression
  173. return false;
  174. }
  175. if (end - begin + 1 > elems.length) {
  176. // The set does not represent a contiguous range
  177. return false;
  178. }
  179. int v = begin + 1;
  180. for (int i = 1; i < elems.length - 1; i++) {
  181. if (v != elems[i]) {
  182. // The set does not represent a contiguous range
  183. return false;
  184. }
  185. v++;
  186. }
  187. return true;
  188. }
  189. /** Get the identifier portion of an argument-action token.
  190. * The ID of an action is assumed to be a trailing identifier.
  191. * Specific code-generators may want to override this
  192. * if the language has unusual declaration syntax.
  193. * @param t The action token
  194. * @return A string containing the text of the identifier
  195. */
  196. protected String extractIdOfAction(Token t) {
  197. return extractIdOfAction(t.getText(), t.getLine(), t.getColumn());
  198. }
  199. /** Get the identifier portion of an argument-action.
  200. * The ID of an action is assumed to be a trailing identifier.
  201. * Specific code-generators may want to override this
  202. * if the language has unusual declaration syntax.
  203. * @param s The action text
  204. * @param line Line used for error reporting.
  205. * @param column Line used for error reporting.
  206. * @return A string containing the text of the identifier
  207. */
  208. protected String extractIdOfAction(String s, int line, int column) {
  209. s = removeAssignmentFromDeclaration(s);
  210. // Search back from the end for a non alphanumeric. That marks the
  211. // beginning of the identifier
  212. for (int i = s.length() - 2; i >= 0; i--) {
  213. // TODO: make this work for language-independent identifiers?
  214. if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_') {
  215. // Found end of type part
  216. return s.substring(i + 1);
  217. }
  218. }
  219. // Something is bogus, but we cannot parse the language-specific
  220. // actions any better. The compiler will have to catch the problem.
  221. antlrTool.warning("Ill-formed action", grammar.getFilename(), line, column);
  222. return "";
  223. }
  224. /** Get the type string out of an argument-action token.
  225. * The type of an action is assumed to precede a trailing identifier
  226. * Specific code-generators may want to override this
  227. * if the language has unusual declaration syntax.
  228. * @param t The action token
  229. * @return A string containing the text of the type
  230. */
  231. protected String extractTypeOfAction(Token t) {
  232. return extractTypeOfAction(t.getText(), t.getLine(), t.getColumn());
  233. }
  234. /** Get the type portion of an argument-action.
  235. * The type of an action is assumed to precede a trailing identifier
  236. * Specific code-generators may want to override this
  237. * if the language has unusual declaration syntax.
  238. * @param s The action text
  239. * @param line Line used for error reporting.
  240. * @return A string containing the text of the type
  241. */
  242. protected String extractTypeOfAction(String s, int line, int column) {
  243. s = removeAssignmentFromDeclaration(s);
  244. // Search back from the end for a non alphanumeric. That marks the
  245. // beginning of the identifier
  246. for (int i = s.length() - 2; i >= 0; i--) {
  247. // TODO: make this work for language-independent identifiers?
  248. if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_') {
  249. // Found end of type part
  250. return s.substring(0, i + 1);
  251. }
  252. }
  253. // Something is bogus, but we cannot parse the language-specific
  254. // actions any better. The compiler will have to catch the problem.
  255. antlrTool.warning("Ill-formed action", grammar.getFilename(), line, column);
  256. return "";
  257. }
  258. /** Generate the code for all grammars
  259. */
  260. public abstract void gen();
  261. /** Generate code for the given grammar element.
  262. * @param action The {...} action to generate
  263. */
  264. public abstract void gen(ActionElement action);
  265. /** Generate code for the given grammar element.
  266. * @param blk The "x|y|z|..." block to generate
  267. */
  268. public abstract void gen(AlternativeBlock blk);
  269. /** Generate code for the given grammar element.
  270. * @param end The block-end element to generate. Block-end
  271. * elements are synthesized by the grammar parser to represent
  272. * the end of a block.
  273. */
  274. public abstract void gen(BlockEndElement end);
  275. /** Generate code for the given grammar element.
  276. * @param atom The character literal reference to generate
  277. */
  278. public abstract void gen(CharLiteralElement atom);
  279. /** Generate code for the given grammar element.
  280. * @param r The character-range reference to generate
  281. */
  282. public abstract void gen(CharRangeElement r);
  283. /** Generate the code for a parser */
  284. public abstract void gen(LexerGrammar g) throws IOException;
  285. /** Generate code for the given grammar element.
  286. * @param blk The (...)+ block to generate
  287. */
  288. public abstract void gen(OneOrMoreBlock blk);
  289. /** Generate the code for a parser */
  290. public abstract void gen(ParserGrammar g) throws IOException;
  291. /** Generate code for the given grammar element.
  292. * @param rr The rule-reference to generate
  293. */
  294. public abstract void gen(RuleRefElement rr);
  295. /** Generate code for the given grammar element.
  296. * @param atom The string-literal reference to generate
  297. */
  298. public abstract void gen(StringLiteralElement atom);
  299. /** Generate code for the given grammar element.
  300. * @param r The token-range reference to generate
  301. */
  302. public abstract void gen(TokenRangeElement r);
  303. /** Generate code for the given grammar element.
  304. * @param atom The token-reference to generate
  305. */
  306. public abstract void gen(TokenRefElement atom);
  307. /** Generate code for the given grammar element.
  308. * @param blk The tree to generate code for.
  309. */
  310. public abstract void gen(TreeElement t);
  311. /** Generate the code for a parser */
  312. public abstract void gen(TreeWalkerGrammar g) throws IOException;
  313. /** Generate code for the given grammar element.
  314. * @param wc The wildcard element to generate
  315. */
  316. public abstract void gen(WildcardElement wc);
  317. /** Generate code for the given grammar element.
  318. * @param blk The (...)* block to generate
  319. */
  320. public abstract void gen(ZeroOrMoreBlock blk);
  321. /** Generate the token types as a text file for persistence across shared lexer/parser */
  322. protected void genTokenInterchange(TokenManager tm) throws IOException {
  323. // Open the token output Java file and set the currentOutput stream
  324. String fName = tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt;
  325. currentOutput = antlrTool.openOutputFile(fName);
  326. println("// $ANTLR " + antlrTool.version + ": " +
  327. antlrTool.fileMinusPath(antlrTool.grammarFile) +
  328. " -> " +
  329. fName +
  330. "$");
  331. tabs = 0;
  332. // Header
  333. println(tm.getName() + " // output token vocab name");
  334. // Generate a definition for each token type
  335. Vector v = tm.getVocabulary();
  336. for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
  337. String s = (String)v.elementAt(i);
  338. if (DEBUG_CODE_GENERATOR) {
  339. System.out.println("gen persistence file entry for: " + s);
  340. }
  341. if (s != null && !s.startsWith("<")) {
  342. // if literal, find label
  343. if (s.startsWith("\"")) {
  344. StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s);
  345. if (sl != null && sl.label != null) {
  346. print(sl.label + "=");
  347. }
  348. println(s + "=" + i);
  349. }
  350. else {
  351. print(s);
  352. // check for a paraphrase
  353. TokenSymbol ts = (TokenSymbol)tm.getTokenSymbol(s);
  354. if (ts == null) {
  355. antlrTool.warning("undefined token symbol: " + s);
  356. }
  357. else {
  358. if (ts.getParaphrase() != null) {
  359. print("(" + ts.getParaphrase() + ")");
  360. }
  361. }
  362. println("=" + i);
  363. }
  364. }
  365. }
  366. // Close the tokens output file
  367. currentOutput.close();
  368. currentOutput = null;
  369. }
  370. /** Process a string for an simple expression for use in xx/action.g
  371. * it is used to cast simple tokens/references to the right type for
  372. * the generated language.
  373. * @param str A String.
  374. */
  375. public String processStringForASTConstructor(String str) {
  376. return str;
  377. }
  378. /** Get a string for an expression to generate creation of an AST subtree.
  379. * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
  380. */
  381. public abstract String getASTCreateString(Vector v);
  382. /** Get a string for an expression to generate creating of an AST node
  383. * @param str The text of the arguments to the AST construction
  384. */
  385. public abstract String getASTCreateString(GrammarAtom atom, String str);
  386. /** Given the index of a bitset in the bitset list, generate a unique name.
  387. * Specific code-generators may want to override this
  388. * if the language does not allow '_' or numerals in identifiers.
  389. * @param index The index of the bitset in the bitset list.
  390. */
  391. protected String getBitsetName(int index) {
  392. return "_tokenSet_" + index;
  393. }
  394. public static String encodeLexerRuleName(String id) {
  395. return "m" + id;
  396. }
  397. public static String decodeLexerRuleName(String id) {
  398. if ( id==null ) {
  399. return null;
  400. }
  401. return id.substring(1,id.length());
  402. }
  403. /** Map an identifier to it's corresponding tree-node variable.
  404. * This is context-sensitive, depending on the rule and alternative
  405. * being generated
  406. * @param id The identifier name to map
  407. * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned.
  408. * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
  409. */
  410. public abstract String mapTreeId(String id, ActionTransInfo tInfo);
  411. /** Add a bitset to the list of bitsets to be generated.
  412. * if the bitset is already in the list, ignore the request.
  413. * Always adds the bitset to the end of the list, so the
  414. * caller can rely on the position of bitsets in the list.
  415. * The returned position can be used to format the bitset
  416. * name, since it is invariant.
  417. * @param p Bit set to mark for code generation
  418. * @param forParser true if the bitset is used for the parser, false for the lexer
  419. * @return The position of the bitset in the list.
  420. */
  421. protected int markBitsetForGen(BitSet p) {
  422. // Is the bitset (or an identical one) already marked for gen?
  423. for (int i = 0; i < bitsetsUsed.size(); i++) {
  424. BitSet set = (BitSet)bitsetsUsed.elementAt(i);
  425. if (p.equals(set)) {
  426. // Use the identical one already stored
  427. return i;
  428. }
  429. }
  430. // Add the new bitset
  431. bitsetsUsed.appendElement(p.clone());
  432. return bitsetsUsed.size() - 1;
  433. }
  434. /** Output tab indent followed by a String, to the currentOutput stream.
  435. * Ignored if string is null.
  436. * @param s The string to output.
  437. */
  438. protected void print(String s) {
  439. if (s != null) {
  440. printTabs();
  441. currentOutput.print(s);
  442. }
  443. }
  444. /** Print an action with leading tabs, attempting to
  445. * preserve the current indentation level for multi-line actions
  446. * Ignored if string is null.
  447. * @param s The action string to output
  448. */
  449. protected void printAction(String s) {
  450. if (s != null) {
  451. printTabs();
  452. _printAction(s);
  453. }
  454. }
  455. /** Output tab indent followed by a String followed by newline,
  456. * to the currentOutput stream. Ignored if string is null.
  457. * @param s The string to output
  458. */
  459. protected void println(String s) {
  460. if (s != null) {
  461. printTabs();
  462. currentOutput.println(s);
  463. }
  464. }
  465. /** Output the current tab indentation. This outputs the number of tabs
  466. * indicated by the "tabs" variable to the currentOutput stream.
  467. */
  468. protected void printTabs() {
  469. for (int i = 1; i <= tabs; i++) {
  470. currentOutput.print("\t");
  471. }
  472. }
  473. /** Lexically process $ and # references within the action.
  474. * This will replace #id and #(...) with the appropriate
  475. * function calls and/or variables etc...
  476. */
  477. protected abstract String processActionForSpecialSymbols(String actionStr,
  478. int line,
  479. RuleBlock currentRule,
  480. ActionTransInfo tInfo);
  481. public String getFOLLOWBitSet(String ruleName, int k) {
  482. GrammarSymbol rs = grammar.getSymbol(ruleName);
  483. if ( !(rs instanceof RuleSymbol) ) {
  484. return null;
  485. }
  486. RuleBlock blk = ((RuleSymbol)rs).getBlock();
  487. Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(k, blk.endNode);
  488. String followSetName = getBitsetName(markBitsetForGen(follow.fset));
  489. return followSetName;
  490. }
  491. public String getFIRSTBitSet(String ruleName, int k) {
  492. GrammarSymbol rs = grammar.getSymbol(ruleName);
  493. if ( !(rs instanceof RuleSymbol) ) {
  494. return null;
  495. }
  496. RuleBlock blk = ((RuleSymbol)rs).getBlock();
  497. Lookahead first = grammar.theLLkAnalyzer.look(k, blk);
  498. String firstSetName = getBitsetName(markBitsetForGen(first.fset));
  499. return firstSetName;
  500. }
  501. /**
  502. * Remove the assignment portion of a declaration, if any.
  503. * @param d the declaration
  504. * @return the declaration without any assignment portion
  505. */
  506. protected String removeAssignmentFromDeclaration(String d) {
  507. // If d contains an equal sign, then it's a declaration
  508. // with an initialization. Strip off the initialization part.
  509. if (d.indexOf('=') >= 0) d = d.substring(0, d.indexOf('=')).trim();
  510. return d;
  511. }
  512. /** Set all fields back like one just created */
  513. private void reset() {
  514. tabs = 0;
  515. // Allocate list of bitsets tagged for code generation
  516. bitsetsUsed = new Vector();
  517. currentOutput = null;
  518. grammar = null;
  519. DEBUG_CODE_GENERATOR = false;
  520. makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
  521. bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
  522. }
  523. public static String reverseLexerRuleName(String id) {
  524. return id.substring(1, id.length());
  525. }
  526. public void setAnalyzer(LLkGrammarAnalyzer analyzer_) {
  527. analyzer = analyzer_;
  528. }
  529. public void setBehavior(DefineGrammarSymbols behavior_) {
  530. behavior = behavior_;
  531. }
  532. /** Set a grammar for the code generator to use */
  533. protected void setGrammar(Grammar g) {
  534. reset();
  535. grammar = g;
  536. // Lookup make-switch threshold in the grammar generic options
  537. if (grammar.hasOption("codeGenMakeSwitchThreshold")) {
  538. try {
  539. makeSwitchThreshold = grammar.getIntegerOption("codeGenMakeSwitchThreshold");
  540. //System.out.println("setting codeGenMakeSwitchThreshold to " + makeSwitchThreshold);
  541. }
  542. catch (NumberFormatException e) {
  543. Token tok = grammar.getOption("codeGenMakeSwitchThreshold");
  544. antlrTool.error(
  545. "option 'codeGenMakeSwitchThreshold' must be an integer",
  546. grammar.getClassName(),
  547. tok.getLine(), tok.getColumn()
  548. );
  549. }
  550. }
  551. // Lookup bitset-test threshold in the grammar generic options
  552. if (grammar.hasOption("codeGenBitsetTestThreshold")) {
  553. try {
  554. bitsetTestThreshold = grammar.getIntegerOption("codeGenBitsetTestThreshold");
  555. //System.out.println("setting codeGenBitsetTestThreshold to " + bitsetTestThreshold);
  556. }
  557. catch (NumberFormatException e) {
  558. Token tok = grammar.getOption("codeGenBitsetTestThreshold");
  559. antlrTool.error(
  560. "option 'codeGenBitsetTestThreshold' must be an integer",
  561. grammar.getClassName(),
  562. tok.getLine(), tok.getColumn()
  563. );
  564. }
  565. }
  566. // Lookup debug code-gen in the grammar generic options
  567. if (grammar.hasOption("codeGenDebug")) {
  568. Token t = grammar.getOption("codeGenDebug");
  569. if (t.getText().equals("true")) {
  570. //System.out.println("setting code-generation debug ON");
  571. DEBUG_CODE_GENERATOR = true;
  572. }
  573. else if (t.getText().equals("false")) {
  574. //System.out.println("setting code-generation debug OFF");
  575. DEBUG_CODE_GENERATOR = false;
  576. }
  577. else {
  578. antlrTool.error("option 'codeGenDebug' must be true or false", grammar.getClassName(), t.getLine(), t.getColumn());
  579. }
  580. }
  581. }
  582. public void setTool(Tool tool) {
  583. antlrTool = tool;
  584. }
  585. }