json.jj | searchcode

/plugins/Beauty/trunk/src/beauty/parsers/json/json.jj

# · Unknown · 762 lines · 679 code · 83 blank · 0 comment · 0 complexity · 6d4535ddf4d6837b1e0409ec8749302e MD5 · raw file

/**
 * Copyright (c) 2010, Dale Anson
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without modification, 
 * are permitted provided that the following conditions are met:
 * 
 *     - Redistributions of source code must retain the above copyright notice, this 
 *     list of conditions and the following disclaimer.
 *     
 *     - Redistributions in binary form must reproduce the above copyright notice, 
 *     this list of conditions and the following disclaimer in the documentation 
 *     and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * A parser for json files.  I borrowed some of the code from CSS3Parser, so
 * it may not all be relevant.
 */
options {
   JAVA_UNICODE_ESCAPE = true;
   UNICODE_INPUT = true;
   STATIC = false;
}

PARSER_BEGIN(JsonParser)
package beauty.parsers.json;

import java.io.*;
import java.util.*;

public class JsonParser {
    Token t;
    
    // shouldn't use this, a specific line separator should be set based on
    // buffer settings.  Of course, it may be the same as what the buffer
    // uses anyway.
    String lineSep = System.getProperty("line.separator");

    public void setIndentWidth(int i) {
        token_source.setIndentWidth(i);
    }

    public void setTabSize(int size) {
        jj_input_stream.setTabSize(size);
    }

    public int getTabSize() {
        // this really isn't necessary for this beautifier.  Setting the tab
        // size on the input stream makes the token locations more accurate
        // is all.
        return jj_input_stream.getTabSize(0);
    }

    public void setUseSoftTabs(boolean b) {
        token_source.setUseSoftTabs(b);
    }
    
    /**
     * @return The beautified text.    
     */
    public String getText() {
        return token_source.getText();
    }

    public void resetTokenSource() {
        token_source.reset();
    }

    private void add(Token t) {
        token_source.add(t);
    }

    private void add(String s) {
        token_source.add(s);
    }

    private void trim() {
        token_source.trim();
    }

    private void trimWhitespace() {
        token_source.trimWhitespace();
    }

    private void write() {
        token_source.write();
    }

    private void writeln() {
        token_source.writeln();
    }

    public void setLineSeparator(String le) {
        lineSep = le;
        token_source.setLineSeparator(le);
    }
    
    public static void main(String args[]) {
        JsonParser parser;
        if (args.length == 0) {
            System.out.println("JSON Parser:  Reading from standard input . . .");
            parser = new JsonParser(System.in);
        } else if (args.length == 1) {
            System.out.println("JSON Parser:  Reading from file " + args[0] + " . . .");
            try {
                parser = new JsonParser(new java.io.FileInputStream(args[0]));
            } catch (java.io.FileNotFoundException e) {
                System.out.println("JSON Parser:  File " + args[0] + " not found.");
                return;
            }
        } else {
            System.out.println("JSON Parser:  Usage is one of:");
            System.out.println("         java JsonParser < inputfile");
            System.out.println("OR");
            System.out.println("         java JsonParser inputfile");
            return;
        }
        try {
            parser.enable_tracing();
            parser.parse();
            System.out.println("JSON Parser:  JSON input parsed successfully.");
        } catch (ParseException e) {
            System.out.println("JSON Parser:  Encountered errors during parse.");
            System.out.println(e.getMessage());
        }
    }
}

PARSER_END(JsonParser)

/*******************************************************************************

JSON token descriptions start here

*******************************************************************************/


// white space
SKIP : 
{
    " "
    | "\t"
    | "\n"
    | "\r"
    | "\f"
}

// The JSON standard does not allow comments of any variety, although people use
// javascript comments and html comments within json files.  Files containing
// comments are not acceptable to many parsers since the standard does not allow
// them.  THIS PARSER WILL NOT ACCEPT COMMENTS. However, if you'd rather it just
// silently skipped over comments, uncomment this block. This will cause the
// beautifier to remove comments.
// TODO: add a user setting for this.
/*
SKIP : 
{
    <SINGLE_LINE_COMMENT: "//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
    | <BLOCK_COMMENT: "/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/">
    | <HTML_COMMENT: "<!--" (~["-"])* "-" ("-" | (~["-",">"] (~["-"])* "-"))* ">"> 
}
*/
// literals
TOKEN : 
{
    <LBRACE: "{">
    |
    <RBRACE: "}">
    |
    <LSQUARE: "[">
    |
    <RSQUARE: "]">
    |
    <COMMA: ",">
    |
    <COLON: ":">  
    |
    <TRUE: "true">
    |
    <FALSE: "false">
    |
    <NULL: "null">
    |  
    // json only allows base 10 numbers, no octal or hex or binary, at least, not as a number.
    // Unicode values are allowed in char and string. Need to define numbers ahead of characters
    // since numbers can also match as characters.
    <NUMBER: (["-"])? ((["0"]) | (["1"-"9"] (["0"-"9"])*)) ("." (["0"-"9"])+)? (["e","E"] (["+","-"])? (["0"-"9"])+ )?>
    |
    // a 'char' is any unicode character except " (double quote) or \ (backslash) or
    // control character (unicode range 0000 - 001f).  Certain special characters and
    // certain control characters are allowed if escaped with \: ", \, /, b, f, n, r, t.
    // Unicode characters are allowed using the \\u four-hex-digits notation, e.g.
    // \\u04af
    <CHAR:(   
        (~["\"", "\\", "\u0000"-"\u001f"])
        | ("\\"
            ( ["u"] ["0"-"9","a"-"f", "A"-"F"] ["0"-"9","a"-"f", "A"-"F"] ["0"-"9","a"-"f", "A"-"F"] ["0"-"9","a"-"f", "A"-"F"]
                | ["\"", "\\", "b", "f", "n", "r", "t"]
                )
            )
        )>
    |
    // A string is a collection of zero or more Unicode characters, wrapped in 
    // double quotes, using backslash escapes. A character is represented as a 
    // single character string.
    // TODO: allow strings not wrapped in double quotes -- make it a user setting.
    <STRING: "\"" (<CHAR>)* "\"">
}


/*******************************************************************************

JSON grammar starts here

*******************************************************************************/

void parse() : 
{
}
{
    (
        object() <EOF> 
        |
        array() <EOF> 
    )
    {
        write();   
    }
}

/*
    Objects are formatted like this:
    {
        (members())*
    }
    
    The left brace triggers an indent level increase, for the object members. 
    The right brace a corresponding indent level decrease.
*/
void object() : 
{
}
{
    <LBRACE> { 
        writeln(); 
        add("{"); 
        writeln(); 
        ++token_source.level; 
    }
    (members())? 
    t=<RBRACE> { 
        writeln(); 
        --token_source.level; 
        add("}"); 
        if (t.next != null && t.next.kind != JsonParserConstants.COMMA) 
            writeln(); 
    }
}

/*
    Arrays are formatted like this:
    [ element(, element...) ]
    
    Note that if an element is an object, it will look like this:
    [
        {
            ...
        }
    ]
    
*/
void array() : 
{
}
{
    <LSQUARE> 
    {
        add("[");   
    }
    (elements())? 
    <RSQUARE> 
    { 
        add("]"); 
    }
}

/*
    A member is a pair of one more key/value.
    Pairs are separated by a comma.
    Comma signals a newline to be inserted.
    A member is formatted like this:
    
    key: value(, key: value...)
*/
void members() : 
{
}
{
    key() <COLON> { add(": "); } value() ( <COMMA> { trimWhitespace(); add(", "); writeln(); } members() )?
}

/*
    Elements are members of an array.
    Elements are one or more values separated by commas.
    Elements do not trigger insertion of newlines nor change of indent level.
*/
void elements() : 
{
}
{
    value() ( <COMMA> { add(", "); } elements() )?
}

/*
    A value is the RHS of a pair.  It can be pretty much any of the
    other types.
*/
void value() : 
{
}
{
    (
        string() | number() | object() | array() 
        |
        t=<TRUE>  { add(t); }
        |
        t=<FALSE>  { add(t); }
        |
        t=<NULL> { add(t); }
    ) 
}

/*
    A key is the LHS of a pair.  It is a simple string.
*/
void key() :
{
}
{
    (
        t=<STRING> { add(t); }
    )
}

/*
    A string is a string, and it's the same as a key.
*/
void string() : 
{
}
{
    (
        t=<STRING> { add(t); }
    )
}

/*
    A number is a number.
*/
void number() :
{
}
{
    (
        t=<NUMBER> { add(t); }
    )
}


TOKEN_MGR_DECLS :
{

    // line buffer, text is accumulated here, then written to the output stream
    // on end of line marker.
    static StringBuilder b = new StringBuilder();

    // all text is accumulated here.  When processing is complete, this buffer
    // will contain the final beautified text.
    static StringBuilder outputBuffer = new StringBuilder();

    // accumulate pieces a token or string at a time.  The objects in this array
    // will be converted to strings, padded as appropriate, and added to the
    // line buffer b.  This is the "accumulator".
    static ArrayList a = new ArrayList();

    // where to write the completely beautified code.
    private static PrintWriter out = null;

    // level of indentation
    static int level = 0;

    // width of indent
    static int indent_width = 4;
    static String indent = "    ";
    static String double_indent = indent + indent;

    // the soft tab setting from jEdit, use soft tabs by default.
    static boolean useSoftTabs = true;

    // line separator, defaults to system line separator, but can be set to
    // a specific separator
    static String ls = System.getProperty("line.separator");

    static void reset() {
        b = new StringBuilder();
        outputBuffer = new StringBuilder();
        a.clear();
        level = 0;
    }

    static String getText() {
        return outputBuffer.toString();
    }

    static void setLineSeparator(String le) {
        ls = le;
    }

    static void setIndentWidth(int w) {
        indent_width = w;
        if (indent_width <= 0) {
            indent_width = 4;
        }
        indent = "";
        for (int i = 0; i < w; i++) {
            indent += " ";
        }
        double_indent = indent + indent;
    }

    static void setUseSoftTabs(boolean b) {
        useSoftTabs = b;
        if (b) {
            setIndentWidth(indent_width);
        }
        else {
            indent = "\t";
            double_indent = "\t\t";
        }
    }

    // add a token to the accumulator
    static void add(Token t) {
        if (t != null) {
            a.add(t);
        }
    }

    // add a string to the accumulator
    static void add(String s) {
        if (s != null) {
            a.add(s);
        }
    }

    // trim spaces from the last item in the accumulator
    static void trim() {
        if (a.size() == 0)
            return;
        Object o = a.get(a.size() - 1);
        StringBuilder sb = new StringBuilder();
        if (o instanceof Token)
            sb.append( ((Token)o).image );
        else
            sb.append((String)o);
        while(sb.length() > 0 && sb.charAt(sb.length() - 1) == ' ')
            sb.deleteCharAt(sb.length() - 1);
        a.set(a.size() - 1, sb.toString() );
    }

    // trim a single new line from the end of the output buffer
    static void trimNL() {
        if(outputBuffer.length() > 0 && outputBuffer.charAt(outputBuffer.length() - 1) == '\n')
            outputBuffer.deleteCharAt(outputBuffer.length() - 1);
        if(outputBuffer.length() > 0 && outputBuffer.charAt(outputBuffer.length() - 1) == '\r')
            outputBuffer.deleteCharAt(outputBuffer.length() - 1);
    }

    // trim all \n and/or \r from the end of the given string
    static void trimNL(String s) {
        StringBuilder sb = new StringBuilder(s);
        while(sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\r' || sb.charAt(sb.length() - 1) == '\n'))
            sb.deleteCharAt(sb.length() - 1);
    }

    // trim all whitespace (\r, \n, space, \t) from the start of the given string
    static String trimStart(String s) {
        StringBuilder sb = new StringBuilder(s);
        while(sb.length() > 0 && (sb.charAt(0) == '\r'
                || sb.charAt(0) == '\n'
                || sb.charAt(0) == '\t'
                || sb.charAt(0) == ' ')) {
            sb.deleteCharAt(0);
        }
        return sb.toString();
    }

    // trim up to max whitespace (\r, \n, space, \t) from the start of the given string
    static String trimStart(String s, int max) {
        StringBuilder sb = new StringBuilder(s);
        int trimmed = 0;
        while(sb.length() > 0 && Character.isWhitespace(sb.charAt(0)) && trimmed < max) {
            sb.deleteCharAt(0);
            ++trimmed;
        }
        return sb.toString();
    }

    // trims whitespace (\r, \n, space, \t) from the last items in the
    // accumulator.  If the last item is all whitespace, continues on to the
    // previous until a non-whitespace character is encountered.  If the
    // entire accumulator is whitespace, continues to trim whitespace from the
    // outputBuffer.
    static void trimWhitespace() {
        for (int i = a.size() - 1; i >= 0; i-- ) {
            Object o = a.get(i);
            StringBuilder sb = new StringBuilder();
            if (o instanceof Token)
                sb.append( ((Token)o).image );
            else
                sb.append((String)o);
            while(sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\r'
                    || sb.charAt(sb.length() - 1) == '\n'
                    || sb.charAt(sb.length() - 1) == '\t'
                    || sb.charAt(sb.length() - 1) == ' ')) {
                sb.deleteCharAt(sb.length() - 1);
            }
            if (sb.length() == 0) {
                a.remove(i);
            }
            else {
                a.set(i, sb.toString());
                break;
            }
        }
        if (a.size() == 0) {
            while(outputBuffer.length() > 0 && (outputBuffer.charAt(outputBuffer.length() - 1) == '\r'
                    || outputBuffer.charAt(outputBuffer.length() - 1) == '\n'
                    || outputBuffer.charAt(outputBuffer.length() - 1) == '\t'
                    || outputBuffer.charAt(outputBuffer.length() - 1) == ' ')) {
                outputBuffer.deleteCharAt(outputBuffer.length() - 1);
            }
        }
    }

    // writes the contents of the accumulator to the outputBuffer.  The line
    // buffer (b) is used to build the line.
    static void write() {
        try {
            b.setLength(0); // clear the line buffer

            // this next section builds the output string while protecting
            // string literals.  All extra spaces are removed from the output
            // string, except that string literals are left as is.
            ArrayList list = new ArrayList();
            String s = new String("");
            for (int i = 0; i < a.size(); i++) {
                Object o = a.get(i);
                if (o instanceof Token) {
                    Token token = (Token)o;
                    if (token.kind == JsonParserConstants.STRING) {
                        s = s.replaceAll("[ ]+", " ");
                        list.add(s);
                        s = new String("");
                        list.add(token.image);
                    }
                    else {
                        s += ((Token)o).image;
                        s = s.replaceAll("[ ]+", " ");
                    }
                }
                else {
                    s += (String)o;
                    s = s.replaceAll("[ ]+", " ");
                }
            }
            for (int i = 0; i < list.size(); i++) {
                b.append((String)list.get(i));
            }

            b.append(s);
            s = b.toString();

            // check for blank line(s)
            String maybe_blank = new String(s);
            if (maybe_blank.trim().isEmpty()) {
                // yep, it's a blank, so just print out a line separator
                outputBuffer.append(ls);
                a.clear();
                return;
            }

            // indent --
            // most lines get indented, but there are a few special cases:
            // "else" gets put on the same line as the closing "}" for the "if",
            // so don't want to indent.  Similarly with "catch" and "finally".
            // The "while" at the end of a "do" loop is marked as "^while" to
            // differentiate it from a regular "while" block. "else if" is also
            // a special case.
            if (!s.startsWith(" {")) {
                s = s.trim();
                for (int i = 0; i < level; i++) {
                    s = indent + s;
                }
            }

            // check if the output buffer does NOT end with a new line.  If it
            // doesn't, remove any leading whitespace from this line
            if (!endsWith(outputBuffer, "\n") && !endsWith(outputBuffer, "\r")) {
                s = trimStart(s);
            }

            // check that there aren't extra spaces in the buffer already --
            // this handles the case where the output buffer ends with a space
            // and the new string starts with a space, don't want 2 spaces.
            if (s.startsWith(" ") && endsWith(outputBuffer, " ")) {
                s = s.substring(1);
            }

            // check that there is one space between the end of the output
            // buffer and this line -- this handles the case where the output
            // buffer does not end in a space and the new string does not start
            // with a space, want one space in between.
            if (!s.startsWith(" ")
                    && !endsWith(outputBuffer, " ")
                    && !endsWith(outputBuffer, "\r")
                    && !endsWith(outputBuffer, "\n")
                    && outputBuffer.length() > 0) {
                outputBuffer.append(" ");
            }

            // by the Sun standard, there is no situation where '(' is followed
            // by a space or ')' is preceded with by a space
            s = s.replaceAll("[(][ ]", "(");
            s = s.replaceAll("[ ][)]", ")");
            
            // there should be no situation where a comma is preceded by a space,
            // although that seems to happen when formatting string arrays.
            s = s.replaceAll("\\s+[,]", ",");

            // finally! add the string to the output buffer
            // check for line length, may need to wrap.  Sun says to avoid lines
            // longer than 80 characters.  This doesn't work well yet, so I've 
            // commented out the wrapping code.  Still need to clean out the
            // wrapping markers.
            //s = s.replaceAll("[\u001c]", "");
            outputBuffer.append(s);
            /*
            int wrap_sep_count = countWrapSep(s);
            if (s.length() - wrap_sep_count > 80) {
                String[] lines = wrapLines(s);
                if ( lines != null ) {
                    for (int i = 0; i < lines.length; i++) {
                        outputBuffer.append(lines[i]).append(ls);
                    }
                }
                else {
                    // whack any remaining \u001c characters
                    s = s.replaceAll("[\u001c]", "");
                    outputBuffer.append(s);
                }
            }
            else {
                // whack any remaining \u001c characters
                s = s.replaceAll("[\u001c]", "");
                outputBuffer.append(s);
            }
            */
            // clear the accumulator for the next line
            a.clear();
        }
        catch(Exception e) {
            e.printStackTrace();
        }
    }

    static void writeln() {
        write();
        trimNL();
        outputBuffer.append(ls);
    }

    static int countWrapSep(String s) {
        int count = 0;
        for (int i = 0; i < s.length(); i++) {
            if (s.charAt(i) == '\u001c') {
                ++count;
            }
        }
        return count;
    }

    // needs work, does a wrap, but not per spec
    static String[] wrapLines(String s) {
        if (s.length() <= 80) {
            return new String[]{s};
        }
        int wc = countWrapSep(s);
        if (wc > 0) {
            int[] break_points = new int[wc];
            int offset = 0;
            for (int i = 0; i < wc; i++) {
                int index = s.indexOf('\u001c', offset);
                break_points[i] = index;
                offset = index + 1;
            }

            int first_break = -1;
            for (int i = 0; i < break_points.length; i++) {
                int possible = break_points[i];
                if (possible > 80) {
                    break;
                }
                first_break = possible;
            }
            if ( first_break == -1 ) {
                first_break = s.length();
            }

            int ws_length = 0;
            for (int i = 0; i < s.length(); i++) {
                if (s.charAt(i) == ' ')
                    ++ws_length;
                else
                    break;
            }
            String leading_ws = s.substring(0, ws_length);
            String head = s.substring(0, first_break);
            String tail = s.substring(first_break);
            //head = head.replaceAll("[\u001c]", "");
            //tail = tail.replaceAll("[\u001c]", "");
            return new String[]{head, leading_ws + double_indent + tail};
        }
        return null;
    }

    // StringBuilder doesn't have an "endsWith" method
    static boolean endsWith(StringBuilder sb, String s) {
        if (sb == null && s == null)
            return true;
        if (sb == null && s != null)
            return false;
        if (s == null)
            return false;
        if (sb.length() < s.length())
            return false;
        String end = sb.substring(sb.length() - s.length());
        return end.equals(s);
    }
}