/plugins/Beauty/trunk/src/beauty/parsers/json/json.jj

# · Unknown · 762 lines · 679 code · 83 blank · 0 comment · 0 complexity · 6d4535ddf4d6837b1e0409ec8749302e MD5 · raw file

  1. /**
  2. * Copyright (c) 2010, Dale Anson
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without modification,
  6. * are permitted provided that the following conditions are met:
  7. *
  8. * - Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * - Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  19. * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  20. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  22. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  23. * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  24. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /**
  27. * A parser for json files. I borrowed some of the code from CSS3Parser, so
  28. * it may not all be relevant.
  29. */
  30. options {
  31. JAVA_UNICODE_ESCAPE = true;
  32. UNICODE_INPUT = true;
  33. STATIC = false;
  34. }
  35. PARSER_BEGIN(JsonParser)
  36. package beauty.parsers.json;
  37. import java.io.*;
  38. import java.util.*;
  39. public class JsonParser {
  40. Token t;
  41. // shouldn't use this, a specific line separator should be set based on
  42. // buffer settings. Of course, it may be the same as what the buffer
  43. // uses anyway.
  44. String lineSep = System.getProperty("line.separator");
  45. public void setIndentWidth(int i) {
  46. token_source.setIndentWidth(i);
  47. }
  48. public void setTabSize(int size) {
  49. jj_input_stream.setTabSize(size);
  50. }
  51. public int getTabSize() {
  52. // this really isn't necessary for this beautifier. Setting the tab
  53. // size on the input stream makes the token locations more accurate
  54. // is all.
  55. return jj_input_stream.getTabSize(0);
  56. }
  57. public void setUseSoftTabs(boolean b) {
  58. token_source.setUseSoftTabs(b);
  59. }
  60. /**
  61. * @return The beautified text.
  62. */
  63. public String getText() {
  64. return token_source.getText();
  65. }
  66. public void resetTokenSource() {
  67. token_source.reset();
  68. }
  69. private void add(Token t) {
  70. token_source.add(t);
  71. }
  72. private void add(String s) {
  73. token_source.add(s);
  74. }
  75. private void trim() {
  76. token_source.trim();
  77. }
  78. private void trimWhitespace() {
  79. token_source.trimWhitespace();
  80. }
  81. private void write() {
  82. token_source.write();
  83. }
  84. private void writeln() {
  85. token_source.writeln();
  86. }
  87. public void setLineSeparator(String le) {
  88. lineSep = le;
  89. token_source.setLineSeparator(le);
  90. }
  91. public static void main(String args[]) {
  92. JsonParser parser;
  93. if (args.length == 0) {
  94. System.out.println("JSON Parser: Reading from standard input . . .");
  95. parser = new JsonParser(System.in);
  96. } else if (args.length == 1) {
  97. System.out.println("JSON Parser: Reading from file " + args[0] + " . . .");
  98. try {
  99. parser = new JsonParser(new java.io.FileInputStream(args[0]));
  100. } catch (java.io.FileNotFoundException e) {
  101. System.out.println("JSON Parser: File " + args[0] + " not found.");
  102. return;
  103. }
  104. } else {
  105. System.out.println("JSON Parser: Usage is one of:");
  106. System.out.println(" java JsonParser < inputfile");
  107. System.out.println("OR");
  108. System.out.println(" java JsonParser inputfile");
  109. return;
  110. }
  111. try {
  112. parser.enable_tracing();
  113. parser.parse();
  114. System.out.println("JSON Parser: JSON input parsed successfully.");
  115. } catch (ParseException e) {
  116. System.out.println("JSON Parser: Encountered errors during parse.");
  117. System.out.println(e.getMessage());
  118. }
  119. }
  120. }
  121. PARSER_END(JsonParser)
  122. /*******************************************************************************
  123. JSON token descriptions start here
  124. *******************************************************************************/
  125. // white space
  126. SKIP :
  127. {
  128. " "
  129. | "\t"
  130. | "\n"
  131. | "\r"
  132. | "\f"
  133. }
  134. // The JSON standard does not allow comments of any variety, although people use
  135. // javascript comments and html comments within json files. Files containing
  136. // comments are not acceptable to many parsers since the standard does not allow
  137. // them. THIS PARSER WILL NOT ACCEPT COMMENTS. However, if you'd rather it just
  138. // silently skipped over comments, uncomment this block. This will cause the
  139. // beautifier to remove comments.
  140. // TODO: add a user setting for this.
  141. /*
  142. SKIP :
  143. {
  144. <SINGLE_LINE_COMMENT: "//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
  145. | <BLOCK_COMMENT: "/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/">
  146. | <HTML_COMMENT: "<!--" (~["-"])* "-" ("-" | (~["-",">"] (~["-"])* "-"))* ">">
  147. }
  148. */
  149. // literals
  150. TOKEN :
  151. {
  152. <LBRACE: "{">
  153. |
  154. <RBRACE: "}">
  155. |
  156. <LSQUARE: "[">
  157. |
  158. <RSQUARE: "]">
  159. |
  160. <COMMA: ",">
  161. |
  162. <COLON: ":">
  163. |
  164. <TRUE: "true">
  165. |
  166. <FALSE: "false">
  167. |
  168. <NULL: "null">
  169. |
  170. // json only allows base 10 numbers, no octal or hex or binary, at least, not as a number.
  171. // Unicode values are allowed in char and string. Need to define numbers ahead of characters
  172. // since numbers can also match as characters.
  173. <NUMBER: (["-"])? ((["0"]) | (["1"-"9"] (["0"-"9"])*)) ("." (["0"-"9"])+)? (["e","E"] (["+","-"])? (["0"-"9"])+ )?>
  174. |
  175. // a 'char' is any unicode character except " (double quote) or \ (backslash) or
  176. // control character (unicode range 0000 - 001f). Certain special characters and
  177. // certain control characters are allowed if escaped with \: ", \, /, b, f, n, r, t.
  178. // Unicode characters are allowed using the \\u four-hex-digits notation, e.g.
  179. // \\u04af
  180. <CHAR:(
  181. (~["\"", "\\", "\u0000"-"\u001f"])
  182. | ("\\"
  183. ( ["u"] ["0"-"9","a"-"f", "A"-"F"] ["0"-"9","a"-"f", "A"-"F"] ["0"-"9","a"-"f", "A"-"F"] ["0"-"9","a"-"f", "A"-"F"]
  184. | ["\"", "\\", "b", "f", "n", "r", "t"]
  185. )
  186. )
  187. )>
  188. |
  189. // A string is a collection of zero or more Unicode characters, wrapped in
  190. // double quotes, using backslash escapes. A character is represented as a
  191. // single character string.
  192. // TODO: allow strings not wrapped in double quotes -- make it a user setting.
  193. <STRING: "\"" (<CHAR>)* "\"">
  194. }
  195. /*******************************************************************************
  196. JSON grammar starts here
  197. *******************************************************************************/
  198. void parse() :
  199. {
  200. }
  201. {
  202. (
  203. object() <EOF>
  204. |
  205. array() <EOF>
  206. )
  207. {
  208. write();
  209. }
  210. }
  211. /*
  212. Objects are formatted like this:
  213. {
  214. (members())*
  215. }
  216. The left brace triggers an indent level increase, for the object members.
  217. The right brace a corresponding indent level decrease.
  218. */
  219. void object() :
  220. {
  221. }
  222. {
  223. <LBRACE> {
  224. writeln();
  225. add("{");
  226. writeln();
  227. ++token_source.level;
  228. }
  229. (members())?
  230. t=<RBRACE> {
  231. writeln();
  232. --token_source.level;
  233. add("}");
  234. if (t.next != null && t.next.kind != JsonParserConstants.COMMA)
  235. writeln();
  236. }
  237. }
  238. /*
  239. Arrays are formatted like this:
  240. [ element(, element...) ]
  241. Note that if an element is an object, it will look like this:
  242. [
  243. {
  244. ...
  245. }
  246. ]
  247. */
  248. void array() :
  249. {
  250. }
  251. {
  252. <LSQUARE>
  253. {
  254. add("[");
  255. }
  256. (elements())?
  257. <RSQUARE>
  258. {
  259. add("]");
  260. }
  261. }
  262. /*
  263. A member is a pair of one more key/value.
  264. Pairs are separated by a comma.
  265. Comma signals a newline to be inserted.
  266. A member is formatted like this:
  267. key: value(, key: value...)
  268. */
  269. void members() :
  270. {
  271. }
  272. {
  273. key() <COLON> { add(": "); } value() ( <COMMA> { trimWhitespace(); add(", "); writeln(); } members() )?
  274. }
  275. /*
  276. Elements are members of an array.
  277. Elements are one or more values separated by commas.
  278. Elements do not trigger insertion of newlines nor change of indent level.
  279. */
  280. void elements() :
  281. {
  282. }
  283. {
  284. value() ( <COMMA> { add(", "); } elements() )?
  285. }
  286. /*
  287. A value is the RHS of a pair. It can be pretty much any of the
  288. other types.
  289. */
  290. void value() :
  291. {
  292. }
  293. {
  294. (
  295. string() | number() | object() | array()
  296. |
  297. t=<TRUE> { add(t); }
  298. |
  299. t=<FALSE> { add(t); }
  300. |
  301. t=<NULL> { add(t); }
  302. )
  303. }
  304. /*
  305. A key is the LHS of a pair. It is a simple string.
  306. */
  307. void key() :
  308. {
  309. }
  310. {
  311. (
  312. t=<STRING> { add(t); }
  313. )
  314. }
  315. /*
  316. A string is a string, and it's the same as a key.
  317. */
  318. void string() :
  319. {
  320. }
  321. {
  322. (
  323. t=<STRING> { add(t); }
  324. )
  325. }
  326. /*
  327. A number is a number.
  328. */
  329. void number() :
  330. {
  331. }
  332. {
  333. (
  334. t=<NUMBER> { add(t); }
  335. )
  336. }
  337. TOKEN_MGR_DECLS :
  338. {
  339. // line buffer, text is accumulated here, then written to the output stream
  340. // on end of line marker.
  341. static StringBuilder b = new StringBuilder();
  342. // all text is accumulated here. When processing is complete, this buffer
  343. // will contain the final beautified text.
  344. static StringBuilder outputBuffer = new StringBuilder();
  345. // accumulate pieces a token or string at a time. The objects in this array
  346. // will be converted to strings, padded as appropriate, and added to the
  347. // line buffer b. This is the "accumulator".
  348. static ArrayList a = new ArrayList();
  349. // where to write the completely beautified code.
  350. private static PrintWriter out = null;
  351. // level of indentation
  352. static int level = 0;
  353. // width of indent
  354. static int indent_width = 4;
  355. static String indent = " ";
  356. static String double_indent = indent + indent;
  357. // the soft tab setting from jEdit, use soft tabs by default.
  358. static boolean useSoftTabs = true;
  359. // line separator, defaults to system line separator, but can be set to
  360. // a specific separator
  361. static String ls = System.getProperty("line.separator");
  362. static void reset() {
  363. b = new StringBuilder();
  364. outputBuffer = new StringBuilder();
  365. a.clear();
  366. level = 0;
  367. }
  368. static String getText() {
  369. return outputBuffer.toString();
  370. }
  371. static void setLineSeparator(String le) {
  372. ls = le;
  373. }
  374. static void setIndentWidth(int w) {
  375. indent_width = w;
  376. if (indent_width <= 0) {
  377. indent_width = 4;
  378. }
  379. indent = "";
  380. for (int i = 0; i < w; i++) {
  381. indent += " ";
  382. }
  383. double_indent = indent + indent;
  384. }
  385. static void setUseSoftTabs(boolean b) {
  386. useSoftTabs = b;
  387. if (b) {
  388. setIndentWidth(indent_width);
  389. }
  390. else {
  391. indent = "\t";
  392. double_indent = "\t\t";
  393. }
  394. }
  395. // add a token to the accumulator
  396. static void add(Token t) {
  397. if (t != null) {
  398. a.add(t);
  399. }
  400. }
  401. // add a string to the accumulator
  402. static void add(String s) {
  403. if (s != null) {
  404. a.add(s);
  405. }
  406. }
  407. // trim spaces from the last item in the accumulator
  408. static void trim() {
  409. if (a.size() == 0)
  410. return;
  411. Object o = a.get(a.size() - 1);
  412. StringBuilder sb = new StringBuilder();
  413. if (o instanceof Token)
  414. sb.append( ((Token)o).image );
  415. else
  416. sb.append((String)o);
  417. while(sb.length() > 0 && sb.charAt(sb.length() - 1) == ' ')
  418. sb.deleteCharAt(sb.length() - 1);
  419. a.set(a.size() - 1, sb.toString() );
  420. }
  421. // trim a single new line from the end of the output buffer
  422. static void trimNL() {
  423. if(outputBuffer.length() > 0 && outputBuffer.charAt(outputBuffer.length() - 1) == '\n')
  424. outputBuffer.deleteCharAt(outputBuffer.length() - 1);
  425. if(outputBuffer.length() > 0 && outputBuffer.charAt(outputBuffer.length() - 1) == '\r')
  426. outputBuffer.deleteCharAt(outputBuffer.length() - 1);
  427. }
  428. // trim all \n and/or \r from the end of the given string
  429. static void trimNL(String s) {
  430. StringBuilder sb = new StringBuilder(s);
  431. while(sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\r' || sb.charAt(sb.length() - 1) == '\n'))
  432. sb.deleteCharAt(sb.length() - 1);
  433. }
  434. // trim all whitespace (\r, \n, space, \t) from the start of the given string
  435. static String trimStart(String s) {
  436. StringBuilder sb = new StringBuilder(s);
  437. while(sb.length() > 0 && (sb.charAt(0) == '\r'
  438. || sb.charAt(0) == '\n'
  439. || sb.charAt(0) == '\t'
  440. || sb.charAt(0) == ' ')) {
  441. sb.deleteCharAt(0);
  442. }
  443. return sb.toString();
  444. }
  445. // trim up to max whitespace (\r, \n, space, \t) from the start of the given string
  446. static String trimStart(String s, int max) {
  447. StringBuilder sb = new StringBuilder(s);
  448. int trimmed = 0;
  449. while(sb.length() > 0 && Character.isWhitespace(sb.charAt(0)) && trimmed < max) {
  450. sb.deleteCharAt(0);
  451. ++trimmed;
  452. }
  453. return sb.toString();
  454. }
  455. // trims whitespace (\r, \n, space, \t) from the last items in the
  456. // accumulator. If the last item is all whitespace, continues on to the
  457. // previous until a non-whitespace character is encountered. If the
  458. // entire accumulator is whitespace, continues to trim whitespace from the
  459. // outputBuffer.
  460. static void trimWhitespace() {
  461. for (int i = a.size() - 1; i >= 0; i-- ) {
  462. Object o = a.get(i);
  463. StringBuilder sb = new StringBuilder();
  464. if (o instanceof Token)
  465. sb.append( ((Token)o).image );
  466. else
  467. sb.append((String)o);
  468. while(sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\r'
  469. || sb.charAt(sb.length() - 1) == '\n'
  470. || sb.charAt(sb.length() - 1) == '\t'
  471. || sb.charAt(sb.length() - 1) == ' ')) {
  472. sb.deleteCharAt(sb.length() - 1);
  473. }
  474. if (sb.length() == 0) {
  475. a.remove(i);
  476. }
  477. else {
  478. a.set(i, sb.toString());
  479. break;
  480. }
  481. }
  482. if (a.size() == 0) {
  483. while(outputBuffer.length() > 0 && (outputBuffer.charAt(outputBuffer.length() - 1) == '\r'
  484. || outputBuffer.charAt(outputBuffer.length() - 1) == '\n'
  485. || outputBuffer.charAt(outputBuffer.length() - 1) == '\t'
  486. || outputBuffer.charAt(outputBuffer.length() - 1) == ' ')) {
  487. outputBuffer.deleteCharAt(outputBuffer.length() - 1);
  488. }
  489. }
  490. }
  491. // writes the contents of the accumulator to the outputBuffer. The line
  492. // buffer (b) is used to build the line.
  493. static void write() {
  494. try {
  495. b.setLength(0); // clear the line buffer
  496. // this next section builds the output string while protecting
  497. // string literals. All extra spaces are removed from the output
  498. // string, except that string literals are left as is.
  499. ArrayList list = new ArrayList();
  500. String s = new String("");
  501. for (int i = 0; i < a.size(); i++) {
  502. Object o = a.get(i);
  503. if (o instanceof Token) {
  504. Token token = (Token)o;
  505. if (token.kind == JsonParserConstants.STRING) {
  506. s = s.replaceAll("[ ]+", " ");
  507. list.add(s);
  508. s = new String("");
  509. list.add(token.image);
  510. }
  511. else {
  512. s += ((Token)o).image;
  513. s = s.replaceAll("[ ]+", " ");
  514. }
  515. }
  516. else {
  517. s += (String)o;
  518. s = s.replaceAll("[ ]+", " ");
  519. }
  520. }
  521. for (int i = 0; i < list.size(); i++) {
  522. b.append((String)list.get(i));
  523. }
  524. b.append(s);
  525. s = b.toString();
  526. // check for blank line(s)
  527. String maybe_blank = new String(s);
  528. if (maybe_blank.trim().isEmpty()) {
  529. // yep, it's a blank, so just print out a line separator
  530. outputBuffer.append(ls);
  531. a.clear();
  532. return;
  533. }
  534. // indent --
  535. // most lines get indented, but there are a few special cases:
  536. // "else" gets put on the same line as the closing "}" for the "if",
  537. // so don't want to indent. Similarly with "catch" and "finally".
  538. // The "while" at the end of a "do" loop is marked as "^while" to
  539. // differentiate it from a regular "while" block. "else if" is also
  540. // a special case.
  541. if (!s.startsWith(" {")) {
  542. s = s.trim();
  543. for (int i = 0; i < level; i++) {
  544. s = indent + s;
  545. }
  546. }
  547. // check if the output buffer does NOT end with a new line. If it
  548. // doesn't, remove any leading whitespace from this line
  549. if (!endsWith(outputBuffer, "\n") && !endsWith(outputBuffer, "\r")) {
  550. s = trimStart(s);
  551. }
  552. // check that there aren't extra spaces in the buffer already --
  553. // this handles the case where the output buffer ends with a space
  554. // and the new string starts with a space, don't want 2 spaces.
  555. if (s.startsWith(" ") && endsWith(outputBuffer, " ")) {
  556. s = s.substring(1);
  557. }
  558. // check that there is one space between the end of the output
  559. // buffer and this line -- this handles the case where the output
  560. // buffer does not end in a space and the new string does not start
  561. // with a space, want one space in between.
  562. if (!s.startsWith(" ")
  563. && !endsWith(outputBuffer, " ")
  564. && !endsWith(outputBuffer, "\r")
  565. && !endsWith(outputBuffer, "\n")
  566. && outputBuffer.length() > 0) {
  567. outputBuffer.append(" ");
  568. }
  569. // by the Sun standard, there is no situation where '(' is followed
  570. // by a space or ')' is preceded with by a space
  571. s = s.replaceAll("[(][ ]", "(");
  572. s = s.replaceAll("[ ][)]", ")");
  573. // there should be no situation where a comma is preceded by a space,
  574. // although that seems to happen when formatting string arrays.
  575. s = s.replaceAll("\\s+[,]", ",");
  576. // finally! add the string to the output buffer
  577. // check for line length, may need to wrap. Sun says to avoid lines
  578. // longer than 80 characters. This doesn't work well yet, so I've
  579. // commented out the wrapping code. Still need to clean out the
  580. // wrapping markers.
  581. //s = s.replaceAll("[\u001c]", "");
  582. outputBuffer.append(s);
  583. /*
  584. int wrap_sep_count = countWrapSep(s);
  585. if (s.length() - wrap_sep_count > 80) {
  586. String[] lines = wrapLines(s);
  587. if ( lines != null ) {
  588. for (int i = 0; i < lines.length; i++) {
  589. outputBuffer.append(lines[i]).append(ls);
  590. }
  591. }
  592. else {
  593. // whack any remaining \u001c characters
  594. s = s.replaceAll("[\u001c]", "");
  595. outputBuffer.append(s);
  596. }
  597. }
  598. else {
  599. // whack any remaining \u001c characters
  600. s = s.replaceAll("[\u001c]", "");
  601. outputBuffer.append(s);
  602. }
  603. */
  604. // clear the accumulator for the next line
  605. a.clear();
  606. }
  607. catch(Exception e) {
  608. e.printStackTrace();
  609. }
  610. }
  611. static void writeln() {
  612. write();
  613. trimNL();
  614. outputBuffer.append(ls);
  615. }
  616. static int countWrapSep(String s) {
  617. int count = 0;
  618. for (int i = 0; i < s.length(); i++) {
  619. if (s.charAt(i) == '\u001c') {
  620. ++count;
  621. }
  622. }
  623. return count;
  624. }
  625. // needs work, does a wrap, but not per spec
  626. static String[] wrapLines(String s) {
  627. if (s.length() <= 80) {
  628. return new String[]{s};
  629. }
  630. int wc = countWrapSep(s);
  631. if (wc > 0) {
  632. int[] break_points = new int[wc];
  633. int offset = 0;
  634. for (int i = 0; i < wc; i++) {
  635. int index = s.indexOf('\u001c', offset);
  636. break_points[i] = index;
  637. offset = index + 1;
  638. }
  639. int first_break = -1;
  640. for (int i = 0; i < break_points.length; i++) {
  641. int possible = break_points[i];
  642. if (possible > 80) {
  643. break;
  644. }
  645. first_break = possible;
  646. }
  647. if ( first_break == -1 ) {
  648. first_break = s.length();
  649. }
  650. int ws_length = 0;
  651. for (int i = 0; i < s.length(); i++) {
  652. if (s.charAt(i) == ' ')
  653. ++ws_length;
  654. else
  655. break;
  656. }
  657. String leading_ws = s.substring(0, ws_length);
  658. String head = s.substring(0, first_break);
  659. String tail = s.substring(first_break);
  660. //head = head.replaceAll("[\u001c]", "");
  661. //tail = tail.replaceAll("[\u001c]", "");
  662. return new String[]{head, leading_ws + double_indent + tail};
  663. }
  664. return null;
  665. }
  666. // StringBuilder doesn't have an "endsWith" method
  667. static boolean endsWith(StringBuilder sb, String s) {
  668. if (sb == null && s == null)
  669. return true;
  670. if (sb == null && s != null)
  671. return false;
  672. if (s == null)
  673. return false;
  674. if (sb.length() < s.length())
  675. return false;
  676. String end = sb.substring(sb.length() - s.length());
  677. return end.equals(s);
  678. }
  679. }