/src/org/ubi/SourceReader.java

http://github.com/nddrylliog/ooc · Java · 1185 lines · 841 code · 101 blank · 243 comment · 77 complexity · 03e0ff8f01f44641aff703e27aded539 MD5 · raw file

  1. package org.ubi;
  2. import java.io.EOFException;
  3. import java.io.File;
  4. import java.io.FileReader;
  5. import java.io.IOException;
  6. import java.util.ArrayList;
  7. import java.util.List;
  8. /**
  9. * Utility class to read blocks of text.
  10. * Mostly useful for keeping track of line number/positions (for accurate
  11. * error messages, @see SyntaxError).
  12. * Has builtin methods for reading C-like elements/tokens, like
  13. * string literals/char literals, blocks, etc.
  14. * @author Amos Wenger
  15. */
  16. public class SourceReader {
  17. /**
  18. * The case sensibility setting, e.g. whether 'A' == 'a' or 'A' != 'a'
  19. * @author Amos Wenger
  20. */
  21. public enum CaseSensibility {
  22. /** Don't make a difference between capitalized characters and others, e.g. 'A' != 'a' */
  23. SENSITIVE,
  24. /** Distinguish between capitalized characters and others, e.g. 'A' == 'a' */
  25. INSENSITIVE
  26. }
  27. protected ArrayList<Integer> newlineIndices;
  28. protected String fileName;
  29. protected char[] content;
  30. protected int index;
  31. protected int mark;
  32. /**
  33. * Read the content of a the file at place "path"
  34. * @param path The path of the file to be read
  35. * @return a SourceReader reading from the file content
  36. * @throws java.io.IOException if file can't be found or opened for reading
  37. * (or any other I/O exception, for that matter).
  38. */
  39. public static SourceReader getReaderFromPath(String path) throws IOException {
  40. return getReaderFromFile(new File(path));
  41. }
  42. /**
  43. * Read the content of a the file pointed by "file"
  44. * @param file The file object from which to read
  45. * @return a SourceReader reading from the file content
  46. * @throws java.io.IOException if file can't be found or opened for reading
  47. * (or any other I/O exception, for that matter).
  48. */
  49. public static SourceReader getReaderFromFile(File file) throws IOException {
  50. return new SourceReader(file.getPath(), readToString(file));
  51. }
  52. /**
  53. * Read the content of a string
  54. * @param path The path this string came from. Can be an URL, a file path, etc.
  55. * anything descriptive, really, even "<system>" or "<copy-protected>" ^^
  56. * @param content
  57. * @return
  58. */
  59. public static SourceReader getReaderFromText(String path, String content) {
  60. return new SourceReader(path, content);
  61. }
  62. /**
  63. * Read the content of a the file pointed by "file"
  64. * @param file The file object from which to read
  65. * @return a SourceReader reading from the file content
  66. * @throws java.io.IOException if file can't be found or opened for reading
  67. * (or any other I/O exception, for that matter).
  68. */
  69. public static String readToString(File file) throws IOException {
  70. char[] buffer = new char[8192];
  71. FileReader fR = new FileReader(file);
  72. StringBuilder content = new StringBuilder((int) file.length());
  73. int length;
  74. while((length = fR.read(buffer)) != -1) {
  75. content.append(buffer, 0, length);
  76. }
  77. fR.close();
  78. return content.toString();
  79. }
  80. /**
  81. * Create a new SourceReader
  82. * @param filePath The filepath is used in locations, for accurate
  83. * error messages @see SyntaxError
  84. * @param content The content to read from.
  85. */
  86. protected SourceReader(String filePath, String content) {
  87. this.fileName = filePath;
  88. this.content = content.toCharArray();
  89. this.index = 0;
  90. this.mark = 0;
  91. this.newlineIndices = new ArrayList<Integer> ();
  92. }
  93. /**
  94. * Read one character from the source at current position.
  95. * @return The character read
  96. * @throws EOFException When the end of the file is reached.
  97. */
  98. public char read() throws EOFException {
  99. if(index + 1 > content.length) {
  100. throw new EOFException("Parsing ended. Parsed"+index
  101. +" chars, "+getLineNumber()+" lines total.");
  102. }
  103. char character = content[index++];
  104. if(character == '\n') {
  105. if(newlineIndices.isEmpty() || newlineIndices.get(newlineIndices.size() - 1).intValue() < index) {
  106. newlineIndices.add(new Integer(index));
  107. }
  108. }
  109. return character;
  110. }
  111. /**
  112. * Read one character from the source at current position, without advancing
  113. * the pointer
  114. * @return The character read
  115. * @throws EOFException When the end of the file is reached.
  116. */
  117. public char peek() {
  118. return content[index];
  119. }
  120. /**
  121. * Save the current position, allowing it to be restored later with the reset()
  122. *
  123. * <i>Note : functions from SourceReader may call mark(), thus overwriting the saved
  124. * position. If you want to be safe, assign the return value from mark() to an int,
  125. * which you can later pass to reset(int).</i>
  126. *
  127. * Example :
  128. * <code>
  129. * int mark = sourceReader.mark();
  130. * sourceReader.readUntil(...);
  131. * sourceReader.reset(mark);
  132. * </code>
  133. *
  134. * @return The current position
  135. */
  136. public int mark() {
  137. this.mark = index;
  138. return mark;
  139. }
  140. /**
  141. * Restore position to the last saved with mark()
  142. */
  143. public void reset() {
  144. this.index = this.mark;
  145. }
  146. /**
  147. * Restore position to the given one
  148. * @param index The position to jump to
  149. */
  150. public void reset(int index) {
  151. this.index = index;
  152. }
  153. /**
  154. * Rewind position from given offset.
  155. * (Subtracts offset from index)
  156. * @param index The position to jump to
  157. */
  158. public void rewind(int offset) {
  159. this.index -= offset;
  160. }
  161. /**
  162. * Advance position from given offset.
  163. * @param offset
  164. * @throws EOFException
  165. */
  166. public void skip(int offset) throws EOFException {
  167. if(offset < 0) {
  168. rewind(-offset);
  169. } else {
  170. for(int i = 0; i < offset; i++) {
  171. read();
  172. }
  173. }
  174. }
  175. /**
  176. * @return the current line number
  177. */
  178. public int getLineNumber() {
  179. int lineNumber = 0;
  180. while(lineNumber < newlineIndices.size() && newlineIndices.get(lineNumber).intValue() <= index) {
  181. lineNumber++;
  182. }
  183. return lineNumber + 1;
  184. }
  185. /**
  186. * @return the current position in line (e.g. number of characters since the last newline
  187. */
  188. public int getLinePos() {
  189. int lineNumber = getLineNumber();
  190. if(lineNumber == 1) {
  191. return index + 1;
  192. }
  193. return index - newlineIndices.get(getLineNumber() - 2).intValue() + 1;
  194. }
  195. /**
  196. * @return false if positioned at the end of the content.
  197. */
  198. public boolean hasNext() {
  199. return (index < content.length);
  200. }
  201. public String getFileName() {
  202. return fileName;
  203. }
  204. /**
  205. * @return the current file location, containing the file number, line position, and index
  206. */
  207. public FileLocation getLocation() {
  208. return new FileLocation(fileName, getLineNumber(), getLinePos(), index);
  209. }
  210. public FileLocation getLocation(Locatable loc) {
  211. return getLocation(loc.getStart(), loc.getLength());
  212. }
  213. public FileLocation getLocation(int start, int length) {
  214. int mark = mark();
  215. reset(0);
  216. try {
  217. skip(start);
  218. } catch(EOFException e) {}
  219. FileLocation loc = getLocation();
  220. loc.length = length;
  221. reset(mark);
  222. return loc;
  223. }
  224. /**
  225. * @param character
  226. * @return true if the last-but-one char equals 'character'.
  227. */
  228. public boolean backMatches(char character, boolean trueIfStartpos) {
  229. if(index <= 0) {
  230. return trueIfStartpos;
  231. }
  232. return content[index - 1] == character;
  233. }
  234. /**
  235. * Test if each candidate in "candidates" matches the next characters in the content.
  236. * @param candidates
  237. * @param keepEnd If false, will reset to the initial position before returning.
  238. * If true, will stay after the matched candidate.
  239. * @return -1 if no candidate matched. Otherwise, the index of the first matching candidate.
  240. * @throws java.io.EOFException
  241. * @throws java.io.IOException
  242. */
  243. public int matches(List<String> candidates, boolean keepEnd) throws EOFException {
  244. int match = -1;
  245. int count = 0;
  246. search: for(String candidate: candidates) {
  247. if(matches(candidate, keepEnd, CaseSensibility.SENSITIVE)) {
  248. match = count;
  249. break search;
  250. }
  251. ++count;
  252. }
  253. return match;
  254. }
  255. /**
  256. * Test if a "candidate" matches the next character in the content, and if there's
  257. * whitespace after it.
  258. * @param candidate
  259. * @param keepEnd
  260. * @return
  261. * @throws EOFException
  262. */
  263. public boolean matchesSpaced(String candidate, boolean keepEnd) throws EOFException {
  264. int mark = mark();
  265. boolean result = matches(candidate, true) && hasWhitespace(false);
  266. if(!keepEnd) {
  267. reset(mark);
  268. }
  269. return result;
  270. }
  271. /**
  272. * Test if a "candidate" matches the next character in the content, and if there's
  273. * characters other than "A-Za-z0-9_" after iti
  274. * @param candidate
  275. * @param keepEnd
  276. * @return
  277. * @throws EOFException
  278. */
  279. public boolean matchesNonident(String candidate, boolean keepEnd) throws EOFException {
  280. int mark = mark();
  281. boolean result = matches(candidate, true);
  282. char c = peek();
  283. result &= !((c == '_') || Character.isLetterOrDigit(c));
  284. if(!keepEnd) {
  285. reset(mark);
  286. }
  287. return result;
  288. }
  289. /**
  290. * Test if a "candidate" matches the next characters in the content.
  291. * It is case-sensitive by default
  292. * @param candidate
  293. * @param keepEnd If false, will reset to the initial position before returning.
  294. * If true, will stay after the matched candidate.
  295. * @return true if the candidate matches, false otherwise.
  296. *
  297. */
  298. public boolean matches(String candidate, boolean keepEnd) throws EOFException {
  299. return matches(candidate, keepEnd, CaseSensibility.SENSITIVE);
  300. }
  301. /**
  302. * Test if a "candidate" matches the next characters in the content.
  303. * @param candidate
  304. * @param keepEnd If false, will reset to the initial position before returning.
  305. * If true, will stay after the matched candidate.
  306. * @param caseMode either Case.SENSITIVE or Case.INSENSITIVE
  307. * @return true if the candidate matches, false otherwise.
  308. */
  309. public boolean matches(String candidate, boolean keepEnd, CaseSensibility caseMode) throws EOFException {
  310. mark();
  311. int i = 0;
  312. char c, c2;
  313. boolean result = true;
  314. while(i < candidate.length()) {
  315. c = read();
  316. c2 = candidate.charAt(i);
  317. if(c2 != c) {
  318. if((caseMode == CaseSensibility.SENSITIVE) || (Character.toLowerCase(c2) != Character.toLowerCase(c))) {
  319. result = false;
  320. break;
  321. }
  322. }
  323. i++;
  324. }
  325. if(!result || !keepEnd) {
  326. reset();
  327. }
  328. return result;
  329. }
  330. /**
  331. * Read a C-style name (a string containing [A-Za-z0-9_] characters) and return it.
  332. * @return the read name
  333. */
  334. public boolean skipName() throws EOFException {
  335. if(hasNext()) {
  336. char chr = read();
  337. if(!Character.isLetter(chr) && chr != '_') {
  338. rewind(1);
  339. return false;
  340. }
  341. }
  342. read : while(hasNext()) {
  343. char chr = read();
  344. if(!Character.isLetterOrDigit(chr) && chr != '_' && chr != '!') {
  345. rewind(1);
  346. break read;
  347. }
  348. }
  349. return true;
  350. }
  351. /**
  352. * Read a C-style name (a string containing [A-Za-z0-9_] characters) and return it.
  353. * @return the read name
  354. */
  355. public String readName() throws EOFException {
  356. StringBuilder sB = new StringBuilder();
  357. mark();
  358. if(hasNext()) {
  359. char chr = read();
  360. if(Character.isLetter(chr) || chr == '_') {
  361. sB.append(chr);
  362. } else {
  363. rewind(1);
  364. return "";
  365. }
  366. }
  367. read : while(hasNext()) {
  368. mark();
  369. char chr = read();
  370. if(Character.isLetterOrDigit(chr) || chr == '_' || chr == '!') {
  371. sB.append(chr);
  372. } else {
  373. rewind(1);
  374. break read;
  375. }
  376. }
  377. return sB.toString();
  378. }
  379. /**
  380. * Read until a newline character and return the read input
  381. * @return the read input
  382. */
  383. public String readLine() throws EOFException {
  384. return readUntil('\n', true);
  385. }
  386. /**
  387. * Read a C-style single-line comment (ignore a line).
  388. * C-style single-line comments are prefixed by "//"
  389. */
  390. public void readSingleComment() throws EOFException {
  391. readLine();
  392. }
  393. /**
  394. * Read a C-style multi-line comment (ignore until "*\/").
  395. * C-style multi-line comments are prefixed by "/*" and "*\/"
  396. */
  397. public void readMultiComment() throws EOFException {
  398. while(!matches("*/", true, CaseSensibility.SENSITIVE)) { read(); }
  399. }
  400. /**
  401. * Read as many times candidates as we can ! Ignoring any char
  402. * in 'ignored'.
  403. * @param candidates
  404. * @param ignored
  405. * @param keepEnd
  406. * @return
  407. */
  408. public String readMany(String candidates, String ignored, boolean keepEnd) throws EOFException {
  409. StringBuilder sB = new StringBuilder();
  410. int myMark = mark();
  411. while(hasNext()) {
  412. char c = read();
  413. if(candidates.indexOf(c) != -1) {
  414. sB.append(c);
  415. } else if(ignored.indexOf(c) != -1) {
  416. // look up in the sky, and think of how lucky you are and others aren't.
  417. } else {
  418. if(keepEnd) {
  419. rewind(1); // We went one too far.
  420. }
  421. break;
  422. }
  423. }
  424. if(!keepEnd) {
  425. reset(myMark);
  426. }
  427. return sB.toString();
  428. }
  429. /**
  430. * Read a C-style character literal, e.g. any character or an escape sequence,
  431. * and return it as a char.
  432. */
  433. @SuppressWarnings("fallthrough")
  434. public char readCharLiteral() throws EOFException, SyntaxError {
  435. mark();
  436. char c = read();
  437. switch(c) {
  438. case '\'':
  439. throw new SyntaxError(getLocation(), "Empty char literal !");
  440. case '\\':
  441. char c2 = read();
  442. switch(c2) {
  443. case '\\': // backslash
  444. c = '\\'; break;
  445. case '0': // null char
  446. c = '\0'; break;
  447. case 'n': // newline
  448. c = '\n'; break;
  449. case 't': // tab
  450. c = '\t'; break;
  451. case 'v': // vertical tab
  452. c = '\013'; break;
  453. case 'b': // backspace
  454. c = '\b'; break;
  455. case 'f': // form feed
  456. c = '\f'; break;
  457. case 'r': // carriage return
  458. c = '\r'; break;
  459. case '\'': // simple quote
  460. c = '\''; break;
  461. default:
  462. throw new SyntaxError(getLocation(), "Invalid escape sequence : "+spelled(c));
  463. }
  464. // intentional fallthrough
  465. default:
  466. c2 = read();
  467. if(c2 != '\'') {
  468. throw new SyntaxError(getLocation(), "Char literal too long. Expected ', found "+spelled(c2));
  469. }
  470. return c;
  471. }
  472. }
  473. /**
  474. * Parse a C-style character literal from an input string, e.g. any character
  475. * or an escape sequence, and return it as a char.
  476. */
  477. @SuppressWarnings("fallthrough")
  478. public static char parseCharLiteral(String input) throws SyntaxError {
  479. char c = input.charAt(0);
  480. int supposedLength = 1;
  481. switch(c) {
  482. case '\'':
  483. throw new SyntaxError(null, "Empty char literal !");
  484. case '\\':
  485. supposedLength++;
  486. char c2 = input.charAt(1);
  487. switch(c2) {
  488. case '\\': // backslash
  489. c = '\\'; break;
  490. case '0': // null char
  491. c = '\0'; break;
  492. case 'n': // newline
  493. c = '\n'; break;
  494. case 't': // tab
  495. c = '\t'; break;
  496. case 'v': // vertical tab
  497. c = '\013'; break;
  498. case 'b': // backspace
  499. c = '\b'; break;
  500. case 'f': // form feed
  501. c = '\f'; break;
  502. case 'r': // carriage return
  503. c = '\r'; break;
  504. case '\'': // simple quote
  505. c = '\''; break;
  506. default:
  507. throw new SyntaxError(null, "Invalid escape sequence : "+spelled(c));
  508. }
  509. // intentional fallthrough
  510. default:
  511. if(input.length() > supposedLength) {
  512. throw new SyntaxError(null, "Char literal too long.");
  513. }
  514. return c;
  515. }
  516. }
  517. public static String parseStringLiteral(String string) {
  518. int index = 0;
  519. StringBuilder buffer = new StringBuilder();
  520. char c;
  521. while(index < string.length()) {
  522. c = string.charAt(index++);
  523. switch(c) {
  524. case '\\':
  525. char c2 = string.charAt(index++);
  526. switch(c2) {
  527. case '\\': // backslash
  528. buffer.append('\\'); break;
  529. case '0': // null char
  530. buffer.append('\0'); break;
  531. case 'n': // newline
  532. buffer.append('\n'); break;
  533. case 't': // tab
  534. buffer.append('\t'); break;
  535. case 'b': // backspace
  536. buffer.append('\b'); break;
  537. case 'f': // form feed
  538. buffer.append('\f'); break;
  539. case 'r': // return
  540. buffer.append('\r'); break;
  541. default: // delimiter
  542. buffer.append(c2); break;
  543. }
  544. break;
  545. default:
  546. buffer.append(c);
  547. }
  548. }
  549. return buffer.toString();
  550. }
  551. /**
  552. * Read a C-like string literal, e.g. enclosed by '"', and with C-like escape sequences,
  553. * and return it.
  554. * Note: eats the final '"', no need to skip it.
  555. */
  556. public String readStringLiteral() throws EOFException {
  557. return readStringLiteral('"');
  558. }
  559. /**
  560. * Read a string literal, e.g. enclosed by "delimiter", and with C-like escape sequences,
  561. * and return it.
  562. * Note: eats the final '"', no need to skip it.
  563. * @param delimiter The delimitr, e.g. " (C-like), or ' (e.g. Python-like)
  564. */
  565. public String readStringLiteral(char delimiter) throws EOFException {
  566. StringBuilder buffer = new StringBuilder();
  567. char c;
  568. reading : while(true) {
  569. mark();
  570. c = read();
  571. switch(c) {
  572. case '\\':
  573. char c2 = read();
  574. switch(c2) {
  575. case '\\': // backslash
  576. buffer.append('\\'); break;
  577. case '0': // null char
  578. buffer.append('\0'); break;
  579. case 'n': // newline
  580. buffer.append('\n'); break;
  581. case 't': // tab
  582. buffer.append('\t'); break;
  583. case 'b': // backspace
  584. buffer.append('\b'); break;
  585. case 'f': // form feed
  586. buffer.append('\f'); break;
  587. case 'r': // return
  588. buffer.append('\r'); break;
  589. default: // delimiter
  590. if(c2 == delimiter) { // freakin' java switches. *growl*
  591. buffer.append(delimiter);
  592. } break;
  593. }
  594. break;
  595. default: // TODO : wonder if newline is a syntax error in a string literal
  596. if(c == delimiter) {
  597. break reading;
  598. }
  599. buffer.append(c);
  600. }
  601. }
  602. return buffer.toString();
  603. }
  604. /**
  605. * Return true if there's any whitespace after the current position.
  606. * @param keep If true, will have the same effect as skipWhitespace
  607. * If false, the position will be left unchanged.
  608. * @return true if there was any whitespace.
  609. * @throws java.io.IOException Go look in the closet, 3rd door left.
  610. */
  611. public boolean hasWhitespace(boolean skip) throws EOFException {
  612. boolean has = false;
  613. int myMark = mark();
  614. while(hasNext()) {
  615. int c = read();
  616. if(Character.isWhitespace(c)) {
  617. has = true;
  618. } else {
  619. rewind(1);
  620. break;
  621. }
  622. }
  623. if(!skip) {
  624. reset(myMark);
  625. }
  626. return has;
  627. }
  628. /**
  629. * Ignore the next characters which are whitespace (e.g. spaces, tabulations,
  630. * newlines, linefeeds, ie. anything for which Character.isWhitespace(int) is true.
  631. * @throws java.io.IOException
  632. */
  633. public boolean skipWhitespace() throws EOFException {
  634. while(hasNext()) {
  635. int myMark = mark();
  636. int c = read();
  637. if(!Character.isWhitespace(c)) {
  638. reset(myMark);
  639. break;
  640. }
  641. }
  642. return true;
  643. }
  644. /**
  645. * Ignore the next characters which are contained in the string 'chars'
  646. * @throws java.io.IOException
  647. */
  648. public boolean skipChars(String chars) throws EOFException {
  649. while(hasNext()) {
  650. int myMark = mark();
  651. int c = read();
  652. if(chars.indexOf(c) == -1) {
  653. reset(myMark);
  654. break;
  655. }
  656. }
  657. return true;
  658. }
  659. /**
  660. * Skip the next characters until a newline.
  661. * @throws java.io.EOFException
  662. */
  663. public void skipLine() throws EOFException {
  664. while(read() != '\n') {
  665. // Go on with the loop, don't look back.
  666. }
  667. }
  668. /**
  669. * Read until the character "chr", and return the characters read.
  670. * Example:
  671. * <code>
  672. * String myLine = sourceReader.readUntil(';');
  673. * </code>
  674. * @param chr The end delimiter.
  675. * @throws java.io.EOFException
  676. */
  677. public String readUntil(char chr) throws EOFException {
  678. return readUntil(chr, false);
  679. }
  680. /**
  681. * Read until the character "chr", and return the characters read.
  682. * @param chr The end delimiter.
  683. * @param keepEnd If false, leave the position before the end delimiter.
  684. * If true, include the delimiter in the returned String, and leave the
  685. * position after.
  686. * @throws java.io.EOFException
  687. */
  688. public String readUntil(char chr, boolean keepEnd) throws EOFException {
  689. StringBuilder sB = new StringBuilder();
  690. char chrRead = 0;
  691. while(hasNext() && (chrRead = read()) != chr) {
  692. sB.append(chrRead);
  693. }
  694. if(!keepEnd) {
  695. reset(index - 1); // chop off the last character
  696. } else if(chrRead != 0) {
  697. sB.append(chr);
  698. }
  699. return sB.toString();
  700. }
  701. /**
  702. * Read until one of the Strings in "matches" matches, and return the characters read.
  703. * By default, do not include the matching end delimiter in the resulting String, and leave
  704. * the position before the matching end delimiter.
  705. * @param readUntil The potential end delimiters
  706. * @throws java.io.EOFException
  707. */
  708. public String readUntil(String[] matches) throws EOFException {
  709. return readUntil(matches, false);
  710. }
  711. /**
  712. * Read until one of the Strings in "matches" matches, and return the characters read.
  713. * @param readUntil The potential end delimiters
  714. * @param keepEnd If false, leave the position before the matching end delimiter.
  715. * If true, include the matching delimiter in the returned String, and leave the
  716. * position after.
  717. * @throws java.io.EOFException
  718. */
  719. public String readUntil(String[] matches, boolean keepEnd) throws EOFException {
  720. StringBuilder sB = new StringBuilder();
  721. try { while(hasNext()) {
  722. for(String match: matches) {
  723. if(matches(match, keepEnd, CaseSensibility.SENSITIVE)) {
  724. if(keepEnd) {
  725. sB.append(match);
  726. }
  727. return sB.toString();
  728. }
  729. }
  730. sB.append(read());
  731. } } catch(EOFException e) {
  732. // Normal operation.
  733. }
  734. return sB.toString();
  735. }
  736. /**
  737. * Read until the end of file, and return the result.
  738. */
  739. public String readUntilEOF() {
  740. StringBuilder output = new StringBuilder();
  741. try { while(hasNext()) {
  742. output.append(read());
  743. } } catch(EOFException e) {
  744. // Well, that's the point
  745. }
  746. return output.toString();
  747. }
  748. /**
  749. * Read a block delimited by "start" and "end". It deals with nested blocks, e.g.
  750. * with '{' and '}', it will match '{{}}' in one piece.
  751. * Note : the final end delimiter is eaten, No need to skip it.
  752. * @param startChr the start delimiter
  753. * @param endChr the end delimiter
  754. * @return the content of the block
  755. * @throws org.ubi.SyntaxError
  756. * @throws java.io.IOException
  757. */
  758. public String readBlock(char startChr, char endChr) throws SyntaxError, EOFException {
  759. return readBlock(startChr, endChr, '\0');
  760. }
  761. /**
  762. * Read a block delimited by "start" and "end" delimiters. It deals with nested blocks, e.g.
  763. * with '{' and '}', it will match '{{}}' in one piece.
  764. * The escape character (escapeChar) allows to include the endDelimiter in the block,
  765. * e.g. with '"' and '"' delimiters, and '\\' escapeChar, there can be escape sequence in
  766. * what looks obviously like a String literal.
  767. * Note : the final end delimiter is eaten, No need to skip it.
  768. * @param startChr the start delimiter
  769. * @param endChr the end delimiter
  770. * @return the content of the block
  771. * @throws org.ubi.SyntaxError
  772. * @throws java.io.IOException
  773. */
  774. public String readBlock(char startChr, char endChr, char escapeChar) throws SyntaxError, EOFException {
  775. skipWhitespace();
  776. mark();
  777. char c;
  778. if((c = read()) != startChr) {
  779. reset();
  780. throw new SyntaxError(getLocation(), "Trying to read block delimited by "
  781. +spelled(startChr)+spelled(endChr)
  782. +", but "+spelled(c)+" found instead.");
  783. }
  784. StringBuilder output = new StringBuilder();
  785. int count = 1;
  786. char chr;
  787. try { reading: while(true) {
  788. chr = read();
  789. if(chr == escapeChar) {
  790. output.append(chr);
  791. chr = read();
  792. }
  793. if(chr == endChr) {
  794. if(--count <= 0) {
  795. break reading;
  796. }
  797. } else if(chr == startChr) {
  798. ++count;
  799. }
  800. output.append(chr);
  801. } } catch(EOFException e) {
  802. // Normal operation
  803. }
  804. return output.toString();
  805. }
  806. /**
  807. * Read a block delimited by "start" and "end" delimiters. It deals with nested blocks, e.g.
  808. * with '{' and '}', it will match '{{}}' in one piece.
  809. * The escape character (escapeChar) allows to include the endDelimiter in the block,
  810. * e.g. with '"' and '"' delimiters, and '\\' escapeChar, there can be escape sequence in
  811. * what looks obviously like a String literal.
  812. * Note : the final end delimiter is eaten, No need to skip it.
  813. * @param start the start delimiter
  814. * @param end the end delimiter
  815. * @return the content of the block
  816. * @throws org.ubi.SyntaxError
  817. * @throws java.io.IOException
  818. */
  819. public String readBlock(String start, String end, char escapeChar) throws SyntaxError, EOFException {
  820. skipWhitespace();
  821. mark();
  822. if(!matches(start, true)) {
  823. char c = read();
  824. reset();
  825. throw new SyntaxError(getLocation(), "Trying to read block delimited by "
  826. +spelled(start)+spelled(end)+", but "+spelled(c)+" found instead.");
  827. }
  828. StringBuilder output = new StringBuilder();
  829. int count = 1;
  830. char chr;
  831. try { reading: while(true) {
  832. if(matches(end, true)) {
  833. if(--count <= 0) {
  834. break reading;
  835. }
  836. } else if(matches(start, true)) {
  837. ++count;
  838. } else {
  839. chr = read();
  840. if(chr == escapeChar) {
  841. output.append(chr);
  842. chr = read();
  843. }
  844. output.append(chr);
  845. }
  846. } } catch(EOFException e) {
  847. // Normal operation
  848. }
  849. return output.toString();
  850. }
  851. /**
  852. * Throws a SyntaxError with the current location
  853. * @param string
  854. */
  855. public void err(String msg) throws SyntaxError {
  856. throw new SyntaxError(getLocation(), msg);
  857. }
  858. /**
  859. * Return a String representation of a character, with spelled
  860. * out representations of newlines, tabs, etc.
  861. * Example: spelled(32) = " ";
  862. * Example: spelled('\n') = "\\n";
  863. */
  864. public static String spelled(char character) {
  865. switch(character) {
  866. case '\"':
  867. return "\\\"";
  868. case '\t':
  869. return "\\t";
  870. case '\f':
  871. return "\\f";
  872. case '\013':
  873. return "\\v";
  874. case '\r':
  875. return "\\r";
  876. case '\n':
  877. return "\\n";
  878. case '\0':
  879. return "\\0";
  880. case '\'':
  881. return "\\'";
  882. case '\\':
  883. return "\\\\";
  884. default:
  885. return Character.toString(character);
  886. }
  887. }
  888. public static void spelled(char character, Appendable output) throws IOException {
  889. switch(character) {
  890. case '\"':
  891. output.append("\""); return;
  892. case '\t':
  893. output.append("\\t"); return;
  894. case '\f':
  895. output.append("\\f"); return;
  896. case '\b':
  897. output.append("\\b"); return;
  898. case '\013':
  899. output.append("\\v"); return;
  900. case '\r':
  901. output.append("\\r"); return;
  902. case '\n':
  903. output.append("\\n"); return;
  904. case '\0':
  905. output.append("\\0"); return;
  906. case '\'':
  907. output.append("\\'"); return;
  908. case '\\':
  909. output.append("\\"); return;
  910. default:
  911. output.append(character); return;
  912. }
  913. }
  914. /**
  915. * Return a String representation of a String, with spelled
  916. * out representations of newlines, tabs, etc.
  917. * Example: spelled(32) = " ";
  918. * Example: spelled('\n') = "\\n";
  919. */
  920. public static String spelled(String str) {
  921. StringBuilder output = new StringBuilder();
  922. try {
  923. spelled(str, output);
  924. } catch (IOException e) {
  925. throw new Error(e);
  926. }
  927. return output.toString();
  928. }
  929. public static void spelled(String str, Appendable output) throws IOException {
  930. spelled(str, output, false);
  931. }
  932. public static void spelled(String str, Appendable output, boolean doBackslashes) throws IOException {
  933. int length = str.length();
  934. for(int i = 0; i < length; i++) {
  935. char c = str.charAt(i);
  936. if(doBackslashes && c == '\\') {
  937. output.append("\\\\");
  938. } else {
  939. spelled(c, output);
  940. }
  941. }
  942. }
  943. /**
  944. * Return the String containing the whole content this SourceReader is reading from.
  945. */
  946. public char[] getContent() {
  947. return content;
  948. }
  949. /**
  950. * Put the current index in token.start and return true.
  951. * Intended to be used like this:
  952. * <code>
  953. * static Token token = new Token();
  954. * void parse(SourceReader read) {
  955. * if(reader.startToken(token) && reader.matches("myKeyword", true) && reader.endToken(token)) {
  956. * // Add a copy of the Token to the token list.
  957. * }
  958. * }
  959. * </code>
  960. * @param token
  961. * @return
  962. */
  963. public boolean startToken(Token token) {
  964. token.start = index;
  965. return true;
  966. }
  967. /**
  968. * Put the current index in token.end and return true.
  969. * Intended to be used like this:
  970. * <code>
  971. * static Token token = new Token();
  972. * void parse(SourceReader read) {
  973. * if(reader.startToken(token) && reader.matches("myKeyword", true) && reader.endToken(token)) {
  974. * // Add a copy of the Token to the token list.
  975. * }
  976. * }
  977. * </code>
  978. * @param token
  979. * @return
  980. */
  981. public boolean endToken(Token token) {
  982. token.length = index - token.start;
  983. return true;
  984. }
  985. @SuppressWarnings("boxing")
  986. public String getLine(int lineNumber) throws IOException {
  987. int mark = mark();
  988. if(newlineIndices.size() > lineNumber) {
  989. reset(newlineIndices.get(lineNumber));
  990. } else {
  991. reset(0);
  992. for(int i = 1; i < lineNumber; i++) {
  993. readLine();
  994. }
  995. }
  996. String line = readLine();
  997. reset(mark);
  998. return line;
  999. }
  1000. /**
  1001. * Get a slice of the source, specifying the start position
  1002. * and the length of the slice.
  1003. * @param start
  1004. * @param length
  1005. * @return
  1006. */
  1007. public String getSlice(int start, int length) {
  1008. return new String(content, start, length);
  1009. }
  1010. /**
  1011. * Reads an exponent, such as in a number literal (for example: 8E5 or 1.5e+24).
  1012. * If no exponent is read, the position is reset
  1013. * @return true if an exponent could be read, otherwise false
  1014. */
  1015. public boolean readExponent() throws IOException {
  1016. if (peek() == 'e' || peek() == 'E') {
  1017. int expMark = mark();
  1018. skip(1);
  1019. if (peek() == '+' || peek() == '-') {
  1020. skip(1);
  1021. }
  1022. if (Character.isDigit(peek())) {
  1023. skip(1);
  1024. readMany("0123456789", "_", true);
  1025. return true;
  1026. }
  1027. reset(expMark);
  1028. }
  1029. return false;
  1030. }
  1031. }