PageRenderTime 31ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/ojc-core/encodersl/encoder-coco/src/com/sun/encoder/coco/model/CocoLexer.java

https://bitbucket.org/pymma/openesb-components
Java | 629 lines | 349 code | 51 blank | 229 comment | 80 complexity | ec950c0d1b26424343e1113f483c0a85 MD5 | raw file
  1. /*
  2. * BEGIN_HEADER - DO NOT EDIT
  3. *
  4. * The contents of this file are subject to the terms
  5. * of the Common Development and Distribution License
  6. * (the "License"). You may not use this file except
  7. * in compliance with the License.
  8. *
  9. * You can obtain a copy of the license at
  10. * https://open-jbi-components.dev.java.net/public/CDDLv1.0.html.
  11. * See the License for the specific language governing
  12. * permissions and limitations under the License.
  13. *
  14. * When distributing Covered Code, include this CDDL
  15. * HEADER in each file and include the License file at
  16. * https://open-jbi-components.dev.java.net/public/CDDLv1.0.html.
  17. * If applicable add the following below this CDDL HEADER,
  18. * with the fields enclosed by brackets "[]" replaced with
  19. * your own identifying information: Portions Copyright
  20. * [year] [name of copyright owner]
  21. */
  22. /*
  23. * @(#)CocoLexer.java
  24. *
  25. * Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved.
  26. *
  27. * END_HEADER - DO NOT EDIT
  28. */
  29. package com.sun.encoder.coco.model;
  30. import java.io.File;
  31. import java.io.FileInputStream;
  32. import java.io.FileNotFoundException;
  33. import java.io.IOException;
  34. import java.io.InputStreamReader;
  35. import java.io.PushbackReader;
  36. import java.io.UnsupportedEncodingException;
  37. import java.util.ArrayList;
  38. /**
  39. * Tokenizer for Cobol Copybook input.
  40. *
  41. * @author Noel Ang
  42. *
  43. */
  44. public final class CocoLexer {
  45. private int mRecordRow;
  46. private int mRecordCol;
  47. private ArrayList mUndoTokens;
  48. private final PushbackReader mInputReader;
  49. private boolean mIsDisable72ColumnLimit;
  50. @Override
  51. public String toString() {
  52. StringBuilder sb = new StringBuilder();
  53. // sb.append("CocoLexer@").append(Integer.toHexString(hashCode()));
  54. sb.append("row=").append(mRecordRow);
  55. sb.append(" col=").append(mRecordCol);
  56. if (mIsDisable72ColumnLimit)
  57. sb.append(" Col>72OK");
  58. else
  59. sb.append(" Col<=72");
  60. if (mUndoTokens != null && mUndoTokens.size() > 0) {
  61. sb.append(" undoTokens=").append(mUndoTokens);
  62. }
  63. return sb.toString();
  64. }
  65. /**
  66. * Create a Cobol Copybook tokenizer for an input source.
  67. *
  68. * @param file The input source
  69. *
  70. * @throws FileNotFoundException if file does not point to an existing,
  71. * readable file
  72. */
  73. public CocoLexer(File file) throws FileNotFoundException {
  74. mInputReader = new PushbackReader(
  75. new InputStreamReader(new FileInputStream(file)),
  76. 10);
  77. mRecordRow = 1;
  78. mRecordCol = 1;
  79. mUndoTokens = new ArrayList();
  80. }
  81. /**
  82. * Create a Cobol Copybook tokenizer for an input source.
  83. *
  84. * @param file The input source
  85. * @param encoding Encoding of the input source
  86. *
  87. * @throws FileNotFoundException if file does not point to an
  88. * existing, readable file
  89. * @throws UnsupportedEncodingException if the specified encoding is not
  90. * supported
  91. */
  92. public CocoLexer(File file, String encoding)
  93. throws FileNotFoundException, UnsupportedEncodingException {
  94. mInputReader = new PushbackReader(
  95. new InputStreamReader(new FileInputStream(file), encoding),
  96. 10);
  97. mRecordRow = 1;
  98. mRecordCol = 1;
  99. mUndoTokens = new ArrayList();
  100. }
  101. /**
  102. * Provide hint to the lexer that it will no longer be used, and thus it
  103. * may release resources. After calling dispose(), undefined behaviour will
  104. * result if the lexer continues to be used.
  105. */
  106. public void dispose() {
  107. mUndoTokens.clear();
  108. try {
  109. mInputReader.close();
  110. } catch (IOException e) {
  111. // do nothing
  112. }
  113. }
  114. /**
  115. * Disable lexer compliance with IBM Cobol constraint of 72-column copybooks.
  116. * When disabled, the lexer allows copybook "Area B" content to extend past
  117. * column 72.
  118. *
  119. * @param b <code>true</code> to disable the 72-column constraint,
  120. * <code>false</code> to (re)enable it.
  121. */
  122. public void setDisable72ColumnLimit(boolean b) {
  123. mIsDisable72ColumnLimit = b;
  124. }
  125. /**
  126. * Indicate whether or not the lexer is configured to enforce the IBM Cobol
  127. * constraint of 72-column copybook content.
  128. *
  129. * @return <code>true</code> if the constraint is enforced, <code>false</code>
  130. * if it is not.
  131. *
  132. * @see #setDisable72ColumnLimit
  133. */
  134. public boolean is72ColumnLimitEnforced() {
  135. return !mIsDisable72ColumnLimit;
  136. }
  137. /**
  138. * Get the next token from the input.
  139. *
  140. * @return Next token, or null if no more tokens available
  141. * @throws java.io.IOException if an I/O error event occurs; note that this is
  142. * distinguishable from an EOF/EOD event!
  143. */
  144. public CocoToken getNextToken() throws IOException {
  145. // check if undoTokens has something
  146. if (mUndoTokens.size() > 0) {
  147. int idx = mUndoTokens.size() - 1;
  148. CocoToken undoToken = (CocoToken) mUndoTokens.remove(idx);
  149. return undoToken;
  150. }
  151. /*
  152. * The Cobol language has characters that serve dual purposes. For
  153. * example, the letter G at the head of sequence could yield a Cobol word
  154. * or a separator (DBCS literal opening delimiter: G"). Therefore it is not
  155. * sufficient to assume exclusive classifications of every scanned
  156. * character ...
  157. *
  158. * Keep this in mind ...
  159. */
  160. CocoToken token = null;
  161. int bytefat4;
  162. try {
  163. bytefat4 = peek();
  164. if (bytefat4 != -1) {
  165. char ch = (char) bytefat4;
  166. if (ch == '\n' || ch == '\r') {
  167. /* Case: newline (CR or CR LF) */
  168. if (ch == '\n') {
  169. token = new CocoToken(CocoLanguage.SPACE,
  170. CocoTokenTypes.SEPARATOR_TOKEN,
  171. mRecordRow,
  172. mRecordCol);
  173. token.setIsEOL(true);
  174. mRecordRow++;
  175. mRecordCol = 1;
  176. read();
  177. } else if (isCharsAvailable(2)) {
  178. char[] peeks = new char[2];
  179. peek(peeks);
  180. char ch2 = peeks[1];
  181. if (ch2 == '\n') {
  182. token = new CocoToken(CocoLanguage.SPACE,
  183. CocoTokenTypes.SEPARATOR_TOKEN,
  184. mRecordRow,
  185. mRecordCol);
  186. token.setIsEOL(true);
  187. mRecordRow++;
  188. mRecordCol = 1;
  189. read();
  190. read();
  191. }
  192. }
  193. } else if (Character.isDigit(ch)) {
  194. /* Case: digit */
  195. token = getNumeric();
  196. } else if (Character.isLetter(ch)) {
  197. /*
  198. * Case: alpha
  199. * ... but some delimiters begin with alphas ...
  200. */
  201. if (isPrettyDamnAnnoyingDelimiterNext()) {
  202. token = getSeparator();
  203. } else {
  204. token = getAlphaNumeric();
  205. }
  206. } else if (!CocoLanguage.isInCobolCharSet(ch)) {
  207. /* Case: character in system's set but not Cobol's set */
  208. read(); // discard
  209. token = new CocoToken(String.valueOf(ch),
  210. CocoTokenTypes.NONCOBOL_TOKEN,
  211. mRecordRow,
  212. mRecordCol);
  213. movePosition(1);
  214. } else {
  215. /* Case: special character or delimiter */
  216. if (isSeparatorNext()) {
  217. token = getSeparator();
  218. } else {
  219. token = getCobolCharacter();
  220. }
  221. }
  222. } else {
  223. // EOF - empty string is used to indicate this special token
  224. token = new CocoToken("EOF",
  225. CocoTokenTypes.EOF_TOKEN,
  226. mRecordRow,
  227. mRecordCol);
  228. }
  229. } catch (IOException ioe) {
  230. bytefat4 = -1;
  231. }
  232. return token;
  233. }
  234. /**
  235. * Put back a token into the token stream. The next call to
  236. * {@link #getNextToken} produces the re-inserted token. The method doesn't
  237. * actually check that the specified token is the same one it emitted in the
  238. * last prior call to getNextToken, so you can cheat, but cheating is bad.
  239. *
  240. * @param token Token to re-insert in "front" of the token stream
  241. * @throws java.lang.IllegalArgumentException if token is null
  242. */
  243. public void ungetToken(CocoToken token)
  244. throws IllegalArgumentException {
  245. if (token == null) {
  246. throw new IllegalArgumentException();
  247. }
  248. mUndoTokens.add(token);
  249. }
  250. /**
  251. * Scan for an alphanumeric lexeme.
  252. *
  253. * @return alphanumeric token, or null if no (or no valid) input left to form one
  254. * @throws java.io.IOException if an I/O error occurs
  255. */
  256. private CocoToken getAlphaNumeric() throws IOException {
  257. StringBuffer buffer = new StringBuffer();
  258. while (isAlphaOrDigitNext()) {
  259. buffer.append((char) read());
  260. }
  261. CocoToken token = null;
  262. if (buffer.length() > 0) {
  263. token = new CocoToken(buffer.toString(),
  264. CocoTokenTypes.ALNUM_TOKEN,
  265. mRecordRow,
  266. mRecordCol);
  267. movePosition(token.getLength());
  268. }
  269. return token;
  270. }
  271. /**
  272. * Scan for a numeric lexeme. If a letter character is encountered, it is
  273. * tolerated, and the method ends up returning an alphanumeric token instead.
  274. *
  275. * @return numeric or alphanumeric token, or null if no (or no valid) input
  276. * left to form one
  277. * @throws java.io.IOException if an I/O error occurs
  278. */
  279. private CocoToken getNumeric() throws IOException {
  280. StringBuffer buffer = new StringBuffer();
  281. /* preliminary guess */
  282. CocoTokenTypes tokenType = CocoTokenTypes.NUM_TOKEN;
  283. while (isAlphaOrDigitNext()) {
  284. char ch = (char) read();
  285. /* Accept alphas, but if I do, return an alphanumeric token instead. */
  286. if (Character.isLetter(ch)) {
  287. tokenType = CocoTokenTypes.ALNUM_TOKEN;
  288. }
  289. buffer.append(ch);
  290. }
  291. CocoToken token = null;
  292. if (buffer.length() > 0) {
  293. token = new CocoToken(buffer.toString(),
  294. tokenType, mRecordRow, mRecordCol);
  295. movePosition(token.getLength());
  296. }
  297. return token;
  298. }
  299. /**
  300. * Scan for a Cobol character.
  301. *
  302. * @return Cobol character token, or null if no (or no valid) input left to
  303. * form one
  304. * @throws java.io.IOException if an I/O error occurs
  305. */
  306. private CocoToken getCobolCharacter() throws IOException {
  307. CocoToken token = null;
  308. char ch = (char) peek();
  309. if (CocoLanguage.isInCobolCharSet(ch)) {
  310. token = new CocoToken(String.valueOf(ch),
  311. CocoTokenTypes.SPECIALCHAR_TOKEN,
  312. mRecordRow,
  313. mRecordCol);
  314. movePosition(1);
  315. read();
  316. }
  317. return token;
  318. }
  319. /**
  320. * Scan for a Cobol separator.
  321. *
  322. * @return Cobol separator token, or null if no (or no valid) input left to
  323. * form one
  324. * @throws java.io.IOException if an I/O error occurs
  325. */
  326. private CocoToken getSeparator() throws IOException {
  327. CocoToken token = null;
  328. while (token == null && isCharsAvailable(1) ) {
  329. int chi = read();
  330. char ch = (char) chi;
  331. // b== is a separator
  332. if (ch == ' ') {
  333. if (isCharsAvailable(2)) {
  334. char[] peeks = new char[2];
  335. peek(peeks);
  336. char ch2 = peeks[0];
  337. char ch3 = peeks[1];
  338. if (ch2 == '=' && ch3 == '=') {
  339. token = new CocoToken("==",
  340. CocoTokenTypes.SEPARATOR_TOKEN,
  341. mRecordRow,
  342. mRecordCol);
  343. movePosition(3);
  344. read();
  345. read();
  346. }
  347. }
  348. if (token == null) {
  349. if (CocoLanguage.isSeparator(ch)) {
  350. token = new CocoToken(String.valueOf(ch),
  351. CocoTokenTypes.SEPARATOR_TOKEN,
  352. mRecordRow,
  353. mRecordCol);
  354. movePosition(1);
  355. } else {
  356. unread(chi);
  357. break;
  358. }
  359. }
  360. }
  361. // Z", X", N", and G" are separators
  362. if (("GNXZ".indexOf(ch) != -1) && isCharsAvailable(1)) {
  363. char ch1 = Character.toUpperCase(ch);
  364. char ch2 = Character.toUpperCase((char) peek());
  365. if (CocoLanguage.isSeparator(ch1, ch2)) {
  366. char[] c = new char[2];
  367. c[0] = ch1;
  368. c[1] = ch2;
  369. token = new CocoToken(new String(c, 0, 2),
  370. CocoTokenTypes.SEPARATOR_TOKEN,
  371. mRecordRow,
  372. mRecordCol);
  373. movePosition(2);
  374. read();
  375. }
  376. }
  377. // covers every other case
  378. if (token == null) {
  379. if (CocoLanguage.isSeparator(ch)) {
  380. token = new CocoToken(String.valueOf(ch),
  381. CocoTokenTypes.SEPARATOR_TOKEN,
  382. mRecordRow,
  383. mRecordCol);
  384. movePosition(1);
  385. } else {
  386. unread(chi);
  387. break;
  388. }
  389. }
  390. }
  391. return token;
  392. }
  393. /**
  394. * Determine if the next input character is a numeric or alpha character.
  395. *
  396. * @return true if the next character is numeric or alpha, else false
  397. * @throws java.io.IOException if an I/O error occurs
  398. */
  399. private boolean isAlphaOrDigitNext() throws IOException {
  400. boolean isIt = false;
  401. int value = peek();
  402. if (value != -1) {
  403. char ch = (char) value;
  404. isIt = Character.isLetterOrDigit(ch);
  405. }
  406. return isIt;
  407. }
  408. /**
  409. * Determine if the next input character is in the Cobol character set.
  410. *
  411. * @return true if the next character is in the set, else false
  412. * @throws java.io.IOException if an I/O error occurs
  413. */
  414. private boolean isCobolCharNext() throws IOException {
  415. boolean isIt = false;
  416. int value = peek();
  417. if (value != -1) {
  418. char ch = (char) value;
  419. isIt = com.sun.encoder.coco.model.CocoLanguage.isInCobolCharSet(ch);
  420. }
  421. return isIt;
  422. }
  423. /**
  424. * Determine if the next input character is a separator.
  425. *
  426. * @return true if the next character is a separator, else false
  427. * @throws java.io.IOException if an I/O error occurs
  428. */
  429. private boolean isSeparatorNext() throws IOException {
  430. boolean isIt = false;
  431. int value = peek();
  432. if (value != -1) {
  433. char ch = (char) value;
  434. isIt = com.sun.encoder.coco.model.CocoLanguage.isSeparator(ch);
  435. }
  436. return isIt;
  437. }
  438. /**
  439. * Determine if the next few input characters in the token stream comprise
  440. * a multi-byte delimiters: X", Z", N", G" or the sequence == preceeded by a
  441. * space
  442. *
  443. * @return true if one of these delimiters have been found, else false
  444. */
  445. private boolean isPrettyDamnAnnoyingDelimiterNext() throws IOException {
  446. boolean isIt = false;
  447. char[] c = new char[3];
  448. int len = peek(c);
  449. if (len >= 2) {
  450. switch (Character.toUpperCase(c[0])) {
  451. case 'X':
  452. case 'Z':
  453. case 'N':
  454. case 'G':
  455. isIt = CocoLanguage.isSeparator(c[0], c[1]);
  456. break;
  457. case ' ':
  458. isIt = (len == 3);
  459. isIt = CocoLanguage.isSeparator(c[1], c[2]);
  460. }
  461. }
  462. return isIt;
  463. }
  464. /**
  465. * Determine if the token input has been exhausted.
  466. *
  467. * @return true if there is no more input, else false
  468. * @throws java.io.IOException if an I/O error occurs
  469. */
  470. private boolean isEod() throws IOException {
  471. boolean isIt = false;
  472. int value = peek();
  473. isIt = (value == -1);
  474. return isIt;
  475. }
  476. /**
  477. * Update row and column counters by the specified displacement.
  478. * The counters are coordinates into a position in a 2-axis view of the lexer's
  479. * input (i.e., view as a Cobol source file). This "file" has a width
  480. * determined by {@link com.sun.encoder.coco.model.CocoParser#SOURCE_LINE_LENGTH}.
  481. *
  482. * <p>When amount is a positive value, the counters are updated as if the
  483. * position they currently represent is displaced "forward" in the file by the
  484. * indicated amount. When amount is negative, the counters update to display
  485. * the position "backward".</p>
  486. *
  487. * <p>So, for example, if the file view has a width of 72, and
  488. * column = 71, and row = 10, and amount = 3, then in moving forward 3 units
  489. * causes the changes: column = 2, row = 11.</p>
  490. *
  491. * @param amount Positive or negative value indicating size of forward or
  492. * backward displacement
  493. */
  494. private void movePosition(int amount) {
  495. mRecordCol += amount;
  496. if (!mIsDisable72ColumnLimit) {
  497. if (mRecordCol > CocoParser.SOURCE_LINE_LENGTH) {
  498. mRecordRow += mRecordCol / CocoParser.SOURCE_LINE_LENGTH;
  499. mRecordCol = mRecordCol % CocoParser.SOURCE_LINE_LENGTH;
  500. }
  501. }
  502. }
  503. /**
  504. * Obtain the next character from the lexer input source without consuming
  505. * the character.
  506. *
  507. * @return Next input character or -1 if EOF/EOD occured
  508. *
  509. * @throws IOException if an I/O error occurs
  510. */
  511. private int peek() throws IOException {
  512. int value = mInputReader.read();
  513. if (value != -1) {
  514. mInputReader.unread(value);
  515. }
  516. return value;
  517. }
  518. /**
  519. * Fill an array with characters from the lexer input source without
  520. * removing the the characters from the input stream.
  521. *
  522. * @param c array to fill
  523. *
  524. * @return Number of elements in c actually filled with input data; 0 if c
  525. * is a zero-size array; -1 if c could not be filled because EOF/EOD
  526. * occured before obtaining any characters
  527. *
  528. * @throws IOException if an I/O error occurs
  529. * @throws NullPointerException if c is null
  530. */
  531. private int peek(char[] c)
  532. throws IOException, NullPointerException {
  533. int count = 0;
  534. int value = 0;
  535. int[] buf = new int[c.length];
  536. while ((value != -1) && (count < c.length)) {
  537. value = mInputReader.read();
  538. buf[count] = value;
  539. if (value != -1) {
  540. c[count++] = (char) value;
  541. }
  542. }
  543. if (count > 0) {
  544. mInputReader.unread(c, 0, count);
  545. }
  546. return count;
  547. }
  548. /**
  549. * Determines whether the next input stream read request for a given number
  550. * of characters will succeed. Success is defined as, not encountering an
  551. * EOD. Stream readiness (as reported by {@link InputStreamReader#ready()})
  552. * is not factored in the decision because blocking is desirable. This call
  553. * may itself block if the stream is not ready.
  554. *
  555. * @param mincount Desired number of ready characters in the stream
  556. *
  557. * @return <code>true</code> if the next read request for max(1,
  558. * <code>mincount</code>) will neither block or encounter EOD
  559. *
  560. * @throws IOException if an I/O error occurs
  561. */
  562. private boolean isCharsAvailable(int mincount)
  563. throws IOException {
  564. int count;
  565. int got;
  566. char[] cr;
  567. count = Math.max(0, mincount);
  568. cr = new char[count];
  569. got = peek(cr);
  570. return (got == count);
  571. }
  572. private int read() throws IOException {
  573. return mInputReader.read();
  574. }
  575. private void unread(int chi) throws IOException {
  576. mInputReader.unread(chi);
  577. }
  578. }