jsonparser.cpp - This C++ code implements a JSON parser, pa…

/js/src/jsonparser.cpp

http://github.com/zpao/v8monkey · C++ · 695 lines · 598 code · 31 blank · 66 comment · 100 complexity · d103a7b71a86b6ace93972a05f9e8ef5 MD5 · raw file

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sw=4 et tw=99:
 *
 * ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is SpiderMonkey JSON.
 *
 * The Initial Developer of the Original Code is
 * the Mozilla Foundation.
 * Portions created by the Initial Developer are Copyright (C) 2011
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Jeff Walden <jwalden+code@mit.edu> (original author)
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include "jsarray.h"
#include "jsnum.h"
#include "jsonparser.h"

#include "jsobjinlines.h"
#include "jsstrinlines.h"

using namespace js;

void
JSONParser::error(const char *msg)
{
    if (errorHandling == RaiseError)
        JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE, msg);
}

bool
JSONParser::errorReturn()
{
    return errorHandling == NoError;
}

template<JSONParser::StringType ST>
JSONParser::Token
JSONParser::readString()
{
    JS_ASSERT(current < end);
    JS_ASSERT(*current == '"');

    /*
     * JSONString:
     *   /^"([^\u0000-\u001F"\\]|\\(["/\\bfnrt]|u[0-9a-fA-F]{4}))*"$/
     */

    if (++current == end) {
        error("unterminated string literal");
        return token(Error);
    }

    /*
     * Optimization: if the source contains no escaped characters, create the
     * string directly from the source text.
     */
    RangedPtr<const jschar> start = current;
    for (; current < end; current++) {
        if (*current == '"') {
            size_t length = current - start;
            current++;
            JSFlatString *str = (ST == JSONParser::PropertyName)
                                ? js_AtomizeChars(cx, start.get(), length)
                                : js_NewStringCopyN(cx, start.get(), length);
            if (!str)
                return token(OOM);
            return stringToken(str);
        }

        if (*current == '\\')
            break;

        if (*current <= 0x001F) {
            error("bad control character in string literal");
            return token(Error);
        }
    }

    /*
     * Slow case: string contains escaped characters.  Copy a maximal sequence
     * of unescaped characters into a temporary buffer, then an escaped
     * character, and repeat until the entire string is consumed.
     */
    StringBuffer buffer(cx);
    do {
        if (start < current && !buffer.append(start.get(), current.get()))
            return token(OOM);

        if (current >= end)
            break;

        jschar c = *current++;
        if (c == '"') {
            JSFlatString *str = (ST == JSONParser::PropertyName)
                                ? buffer.finishAtom()
                                : buffer.finishString();
            if (!str)
                return token(OOM);
            return stringToken(str);
        }

        if (c != '\\') {
            error("bad character in string literal");
            return token(Error);
        }

        if (current >= end)
            break;

        switch (*current++) {
          case '"':  c = '"';  break;
          case '/':  c = '/';  break;
          case '\\': c = '\\'; break;
          case 'b':  c = '\b'; break;
          case 'f':  c = '\f'; break;
          case 'n':  c = '\n'; break;
          case 'r':  c = '\r'; break;
          case 't':  c = '\t'; break;

          case 'u':
            if (end - current < 4) {
                error("bad Unicode escape");
                return token(Error);
            }
            if (JS7_ISHEX(current[0]) &&
                JS7_ISHEX(current[1]) &&
                JS7_ISHEX(current[2]) &&
                JS7_ISHEX(current[3]))
            {
                c = (JS7_UNHEX(current[0]) << 12)
                  | (JS7_UNHEX(current[1]) << 8)
                  | (JS7_UNHEX(current[2]) << 4)
                  | (JS7_UNHEX(current[3]));
                current += 4;
                break;
            }
            /* FALL THROUGH */

          default:
            error("bad escaped character");
            return token(Error);
        }
        if (!buffer.append(c))
            return token(OOM);

        start = current;
        for (; current < end; current++) {
            if (*current == '"' || *current == '\\' || *current <= 0x001F)
                break;
        }
    } while (current < end);

    error("unterminated string");
    return token(Error);
}

JSONParser::Token
JSONParser::readNumber()
{
    JS_ASSERT(current < end);
    JS_ASSERT(JS7_ISDEC(*current) || *current == '-');

    /*
     * JSONNumber:
     *   /^-?(0|[1-9][0-9]+)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?$/
     */

    bool negative = *current == '-';

    /* -? */
    if (negative && ++current == end) {
        error("no number after minus sign");
        return token(Error);
    }

    const RangedPtr<const jschar> digitStart = current;

    /* 0|[1-9][0-9]+ */
    if (!JS7_ISDEC(*current)) {
        error("unexpected non-digit");
        return token(Error);
    }
    if (*current++ != '0') {
        for (; current < end; current++) {
            if (!JS7_ISDEC(*current))
                break;
        }
    }

    /* Fast path: no fractional or exponent part. */
    if (current == end || (*current != '.' && *current != 'e' && *current != 'E')) {
        const jschar *dummy;
        jsdouble d;
        if (!GetPrefixInteger(cx, digitStart.get(), current.get(), 10, &dummy, &d))
            return token(OOM);
        JS_ASSERT(current == dummy);
        return numberToken(negative ? -d : d);
    }

    /* (\.[0-9]+)? */
    if (current < end && *current == '.') {
        if (++current == end) {
            error("missing digits after decimal point");
            return token(Error);
        }
        if (!JS7_ISDEC(*current)) {
            error("unterminated fractional number");
            return token(Error);
        }
        while (++current < end) {
            if (!JS7_ISDEC(*current))
                break;
        }
    }

    /* ([eE][\+\-]?[0-9]+)? */
    if (current < end && (*current == 'e' || *current == 'E')) {
        if (++current == end) {
            error("missing digits after exponent indicator");
            return token(Error);
        }
        if (*current == '+' || *current == '-') {
            if (++current == end) {
                error("missing digits after exponent sign");
                return token(Error);
            }
        }
        if (!JS7_ISDEC(*current)) {
            error("exponent part is missing a number");
            return token(Error);
        }
        while (++current < end) {
            if (!JS7_ISDEC(*current))
                break;
        }
    }

    jsdouble d;
    const jschar *finish;
    if (!js_strtod(cx, digitStart.get(), current.get(), &finish, &d))
        return token(OOM);
    JS_ASSERT(current == finish);
    return numberToken(negative ? -d : d);
}

static inline bool
IsJSONWhitespace(jschar c)
{
    return c == '\t' || c == '\r' || c == '\n' || c == ' ';
}

JSONParser::Token
JSONParser::advance()
{
    while (current < end && IsJSONWhitespace(*current))
        current++;
    if (current >= end) {
        error("unexpected end of data");
        return token(Error);
    }

    switch (*current) {
      case '"':
        return readString<LiteralValue>();

      case '-':
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
        return readNumber();

      case 't':
        if (end - current < 4 || current[1] != 'r' || current[2] != 'u' || current[3] != 'e') {
            error("unexpected keyword");
            return token(Error);
        }
        current += 4;
        return token(True);

      case 'f':
        if (end - current < 5 ||
            current[1] != 'a' || current[2] != 'l' || current[3] != 's' || current[4] != 'e')
        {
            error("unexpected keyword");
            return token(Error);
        }
        current += 5;
        return token(False);

      case 'n':
        if (end - current < 4 || current[1] != 'u' || current[2] != 'l' || current[3] != 'l') {
            error("unexpected keyword");
            return token(Error);
        }
        current += 4;
        return token(Null);

      case '[':
        current++;
        return token(ArrayOpen);
      case ']':
        current++;
        return token(ArrayClose);

      case '{':
        current++;
        return token(ObjectOpen);
      case '}':
        current++;
        return token(ObjectClose);

      case ',':
        current++;
        return token(Comma);

      case ':':
        current++;
        return token(Colon);

      default:
        error("unexpected character");
        return token(Error);
    }
}

JSONParser::Token
JSONParser::advanceAfterObjectOpen()
{
    JS_ASSERT(current[-1] == '{');

    while (current < end && IsJSONWhitespace(*current))
        current++;
    if (current >= end) {
        error("end of data while reading object contents");
        return token(Error);
    }

    if (*current == '"')
        return readString<PropertyName>();

    if (*current == '}') {
        current++;
        return token(ObjectClose);
    }

    error("expected property name or '}'");
    return token(Error);
}

static inline void
AssertPastValue(const RangedPtr<const jschar> current)
{
    /*
     * We're past an arbitrary JSON value, so the previous character is
     * *somewhat* constrained, even if this assertion is pretty broad.  Don't
     * knock it till you tried it: this assertion *did* catch a bug once.
     */
    JS_ASSERT((current[-1] == 'l' &&
               current[-2] == 'l' &&
               current[-3] == 'u' &&
               current[-4] == 'n') ||
              (current[-1] == 'e' &&
               current[-2] == 'u' &&
               current[-3] == 'r' &&
               current[-4] == 't') ||
              (current[-1] == 'e' &&
               current[-2] == 's' &&
               current[-3] == 'l' &&
               current[-4] == 'a' &&
               current[-5] == 'f') ||
              current[-1] == '}' ||
              current[-1] == ']' ||
              current[-1] == '"' ||
              JS7_ISDEC(current[-1]));
}

JSONParser::Token
JSONParser::advanceAfterArrayElement()
{
    AssertPastValue(current);

    while (current < end && IsJSONWhitespace(*current))
        current++;
    if (current >= end) {
        error("end of data when ',' or ']' was expected");
        return token(Error);
    }

    if (*current == ',') {
        current++;
        return token(Comma);
    }

    if (*current == ']') {
        current++;
        return token(ArrayClose);
    }

    error("expected ',' or ']' after array element");
    return token(Error);
}

JSONParser::Token
JSONParser::advancePropertyName()
{
    JS_ASSERT(current[-1] == ',');

    while (current < end && IsJSONWhitespace(*current))
        current++;
    if (current >= end) {
        error("end of data when property name was expected");
        return token(Error);
    }

    if (*current == '"')
        return readString<PropertyName>();

    if (parsingMode == LegacyJSON && *current == '}') {
        /*
         * Previous JSON parsing accepted trailing commas in non-empty object
         * syntax, and some users depend on this.  (Specifically, Places data
         * serialization in versions of Firefox before 4.0.  We can remove this
         * mode when profile upgrades from 3.6 become unsupported.)  Permit
         * such trailing commas only when legacy parsing is specifically
         * requested.
         */
        current++;
        return token(ObjectClose);
    }

    error("expected double-quoted property name");
    return token(Error);
}

JSONParser::Token
JSONParser::advancePropertyColon()
{
    JS_ASSERT(current[-1] == '"');

    while (current < end && IsJSONWhitespace(*current))
        current++;
    if (current >= end) {
        error("end of data after property name when ':' was expected");
        return token(Error);
    }

    if (*current == ':') {
        current++;
        return token(Colon);
    }

    error("expected ':' after property name in object");
    return token(Error);
}

JSONParser::Token
JSONParser::advanceAfterProperty()
{
    AssertPastValue(current);

    while (current < end && IsJSONWhitespace(*current))
        current++;
    if (current >= end) {
        error("end of data after property value in object");
        return token(Error);
    }

    if (*current == ',') {
        current++;
        return token(Comma);
    }

    if (*current == '}') {
        current++;
        return token(ObjectClose);
    }

    error("expected ',' or '}' after property value in object");
    return token(Error);
}

/*
 * This enum is local to JSONParser::parse, below, but ISO C++98 doesn't allow
 * templates to depend on local types.  Boo-urns!
 */
enum ParserState { FinishArrayElement, FinishObjectMember, JSONValue };

bool
JSONParser::parse(Value *vp)
{
    Vector<ParserState> stateStack(cx);
    AutoValueVector valueStack(cx);

    *vp = UndefinedValue();

    Token token;
    ParserState state = JSONValue;
    while (true) {
        switch (state) {
          case FinishObjectMember: {
            Value v = valueStack.popCopy();
            /*
             * NB: Relies on js_DefineNativeProperty performing
             *     js_CheckForStringIndex.
             */
            jsid propid = ATOM_TO_JSID(&valueStack.popCopy().toString()->asAtom());
            if (!DefineNativeProperty(cx, &valueStack.back().toObject(), propid, v,
                                      JS_PropertyStub, JS_StrictPropertyStub, JSPROP_ENUMERATE,
                                      0, 0))
            {
                return false;
            }
            token = advanceAfterProperty();
            if (token == ObjectClose)
                break;
            if (token != Comma) {
                if (token == OOM)
                    return false;
                if (token != Error)
                    error("expected ',' or '}' after property-value pair in object literal");
                return errorReturn();
            }
            token = advancePropertyName();
            /* FALL THROUGH */
          }

          JSONMember:
            if (token == String) {
                if (!valueStack.append(atomValue()))
                    return false;
                token = advancePropertyColon();
                if (token != Colon) {
                    JS_ASSERT(token == Error);
                    return errorReturn();
                }
                if (!stateStack.append(FinishObjectMember))
                    return false;
                goto JSONValue;
            }
            if (token == ObjectClose) {
                JS_ASSERT(state == FinishObjectMember);
                JS_ASSERT(parsingMode == LegacyJSON);
                break;
            }
            if (token == OOM)
                return false;
            if (token != Error)
                error("property names must be double-quoted strings");
            return errorReturn();

          case FinishArrayElement: {
            Value v = valueStack.popCopy();
            if (!js_NewbornArrayPush(cx, &valueStack.back().toObject(), v))
                return false;
            token = advanceAfterArrayElement();
            if (token == Comma) {
                if (!stateStack.append(FinishArrayElement))
                    return false;
                goto JSONValue;
            }
            if (token == ArrayClose)
                break;
            JS_ASSERT(token == Error);
            return errorReturn();
          }

          JSONValue:
          case JSONValue:
            token = advance();
          JSONValueSwitch:
            switch (token) {
              case String:
              case Number:
                if (!valueStack.append(token == String ? stringValue() : numberValue()))
                    return false;
                break;
              case True:
                if (!valueStack.append(BooleanValue(true)))
                    return false;
                break;
              case False:
                if (!valueStack.append(BooleanValue(false)))
                    return false;
                break;
              case Null:
                if (!valueStack.append(NullValue()))
                    return false;
                break;

              case ArrayOpen: {
                JSObject *obj = NewDenseEmptyArray(cx);
                if (!obj || !valueStack.append(ObjectValue(*obj)))
                    return false;
                token = advance();
                if (token == ArrayClose)
                    break;
                if (!stateStack.append(FinishArrayElement))
                    return false;
                goto JSONValueSwitch;
              }

              case ObjectOpen: {
                JSObject *obj = NewBuiltinClassInstance(cx, &ObjectClass);
                if (!obj || !valueStack.append(ObjectValue(*obj)))
                    return false;
                token = advanceAfterObjectOpen();
                if (token == ObjectClose)
                    break;
                goto JSONMember;
              }

              case ArrayClose:
                if (parsingMode == LegacyJSON &&
                    !stateStack.empty() &&
                    stateStack.back() == FinishArrayElement) {
                    /*
                     * Previous JSON parsing accepted trailing commas in
                     * non-empty array syntax, and some users depend on this.
                     * (Specifically, Places data serialization in versions of
                     * Firefox prior to 4.0.  We can remove this mode when
                     * profile upgrades from 3.6 become unsupported.)  Permit
                     * such trailing commas only when specifically
                     * instructed to do so.
                     */
                    stateStack.popBack();
                    break;
                }
                /* FALL THROUGH */

              case ObjectClose:
              case Colon:
              case Comma:
                error("unexpected character");
                return errorReturn();

              case OOM:
                return false;

              case Error:
                return errorReturn();
            }
            break;
        }

        if (stateStack.empty())
            break;
        state = stateStack.popCopy();
    }

    for (; current < end; current++) {
        if (!IsJSONWhitespace(*current)) {
            error("unexpected non-whitespace character after JSON data");
            return errorReturn();
        }
    }

    JS_ASSERT(end == current);
    JS_ASSERT(valueStack.length() == 1);
    *vp = valueStack[0];
    return true;
}
Summary ✨

This C++ code implements a JSON parser, parsing and validating JSON data according to the ECMAScript specification. It recursively breaks down the input string into smaller parts, such as strings, numbers, booleans, arrays, and objects, and checks for syntax errors along the way. The parsed data is stored in a JavaScript object, which can be used further by the application.
Alerts (16)

Complexity hotspot; lines 392 to 407 (total complexity: 16)
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407