/js/src/jsonparser.cpp

http://github.com/zpao/v8monkey · C++ · 695 lines · 598 code · 31 blank · 66 comment · 100 complexity · d103a7b71a86b6ace93972a05f9e8ef5 MD5 · raw file

  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  2. * vim: set ts=8 sw=4 et tw=99:
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is SpiderMonkey JSON.
  18. *
  19. * The Initial Developer of the Original Code is
  20. * the Mozilla Foundation.
  21. * Portions created by the Initial Developer are Copyright (C) 2011
  22. * the Initial Developer. All Rights Reserved.
  23. *
  24. * Contributor(s):
  25. * Jeff Walden <jwalden+code@mit.edu> (original author)
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the MPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the MPL, the GPL or the LGPL.
  38. *
  39. * ***** END LICENSE BLOCK ***** */
  40. #include "jsarray.h"
  41. #include "jsnum.h"
  42. #include "jsonparser.h"
  43. #include "jsobjinlines.h"
  44. #include "jsstrinlines.h"
  45. using namespace js;
  46. void
  47. JSONParser::error(const char *msg)
  48. {
  49. if (errorHandling == RaiseError)
  50. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE, msg);
  51. }
  52. bool
  53. JSONParser::errorReturn()
  54. {
  55. return errorHandling == NoError;
  56. }
  57. template<JSONParser::StringType ST>
  58. JSONParser::Token
  59. JSONParser::readString()
  60. {
  61. JS_ASSERT(current < end);
  62. JS_ASSERT(*current == '"');
  63. /*
  64. * JSONString:
  65. * /^"([^\u0000-\u001F"\\]|\\(["/\\bfnrt]|u[0-9a-fA-F]{4}))*"$/
  66. */
  67. if (++current == end) {
  68. error("unterminated string literal");
  69. return token(Error);
  70. }
  71. /*
  72. * Optimization: if the source contains no escaped characters, create the
  73. * string directly from the source text.
  74. */
  75. RangedPtr<const jschar> start = current;
  76. for (; current < end; current++) {
  77. if (*current == '"') {
  78. size_t length = current - start;
  79. current++;
  80. JSFlatString *str = (ST == JSONParser::PropertyName)
  81. ? js_AtomizeChars(cx, start.get(), length)
  82. : js_NewStringCopyN(cx, start.get(), length);
  83. if (!str)
  84. return token(OOM);
  85. return stringToken(str);
  86. }
  87. if (*current == '\\')
  88. break;
  89. if (*current <= 0x001F) {
  90. error("bad control character in string literal");
  91. return token(Error);
  92. }
  93. }
  94. /*
  95. * Slow case: string contains escaped characters. Copy a maximal sequence
  96. * of unescaped characters into a temporary buffer, then an escaped
  97. * character, and repeat until the entire string is consumed.
  98. */
  99. StringBuffer buffer(cx);
  100. do {
  101. if (start < current && !buffer.append(start.get(), current.get()))
  102. return token(OOM);
  103. if (current >= end)
  104. break;
  105. jschar c = *current++;
  106. if (c == '"') {
  107. JSFlatString *str = (ST == JSONParser::PropertyName)
  108. ? buffer.finishAtom()
  109. : buffer.finishString();
  110. if (!str)
  111. return token(OOM);
  112. return stringToken(str);
  113. }
  114. if (c != '\\') {
  115. error("bad character in string literal");
  116. return token(Error);
  117. }
  118. if (current >= end)
  119. break;
  120. switch (*current++) {
  121. case '"': c = '"'; break;
  122. case '/': c = '/'; break;
  123. case '\\': c = '\\'; break;
  124. case 'b': c = '\b'; break;
  125. case 'f': c = '\f'; break;
  126. case 'n': c = '\n'; break;
  127. case 'r': c = '\r'; break;
  128. case 't': c = '\t'; break;
  129. case 'u':
  130. if (end - current < 4) {
  131. error("bad Unicode escape");
  132. return token(Error);
  133. }
  134. if (JS7_ISHEX(current[0]) &&
  135. JS7_ISHEX(current[1]) &&
  136. JS7_ISHEX(current[2]) &&
  137. JS7_ISHEX(current[3]))
  138. {
  139. c = (JS7_UNHEX(current[0]) << 12)
  140. | (JS7_UNHEX(current[1]) << 8)
  141. | (JS7_UNHEX(current[2]) << 4)
  142. | (JS7_UNHEX(current[3]));
  143. current += 4;
  144. break;
  145. }
  146. /* FALL THROUGH */
  147. default:
  148. error("bad escaped character");
  149. return token(Error);
  150. }
  151. if (!buffer.append(c))
  152. return token(OOM);
  153. start = current;
  154. for (; current < end; current++) {
  155. if (*current == '"' || *current == '\\' || *current <= 0x001F)
  156. break;
  157. }
  158. } while (current < end);
  159. error("unterminated string");
  160. return token(Error);
  161. }
  162. JSONParser::Token
  163. JSONParser::readNumber()
  164. {
  165. JS_ASSERT(current < end);
  166. JS_ASSERT(JS7_ISDEC(*current) || *current == '-');
  167. /*
  168. * JSONNumber:
  169. * /^-?(0|[1-9][0-9]+)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?$/
  170. */
  171. bool negative = *current == '-';
  172. /* -? */
  173. if (negative && ++current == end) {
  174. error("no number after minus sign");
  175. return token(Error);
  176. }
  177. const RangedPtr<const jschar> digitStart = current;
  178. /* 0|[1-9][0-9]+ */
  179. if (!JS7_ISDEC(*current)) {
  180. error("unexpected non-digit");
  181. return token(Error);
  182. }
  183. if (*current++ != '0') {
  184. for (; current < end; current++) {
  185. if (!JS7_ISDEC(*current))
  186. break;
  187. }
  188. }
  189. /* Fast path: no fractional or exponent part. */
  190. if (current == end || (*current != '.' && *current != 'e' && *current != 'E')) {
  191. const jschar *dummy;
  192. jsdouble d;
  193. if (!GetPrefixInteger(cx, digitStart.get(), current.get(), 10, &dummy, &d))
  194. return token(OOM);
  195. JS_ASSERT(current == dummy);
  196. return numberToken(negative ? -d : d);
  197. }
  198. /* (\.[0-9]+)? */
  199. if (current < end && *current == '.') {
  200. if (++current == end) {
  201. error("missing digits after decimal point");
  202. return token(Error);
  203. }
  204. if (!JS7_ISDEC(*current)) {
  205. error("unterminated fractional number");
  206. return token(Error);
  207. }
  208. while (++current < end) {
  209. if (!JS7_ISDEC(*current))
  210. break;
  211. }
  212. }
  213. /* ([eE][\+\-]?[0-9]+)? */
  214. if (current < end && (*current == 'e' || *current == 'E')) {
  215. if (++current == end) {
  216. error("missing digits after exponent indicator");
  217. return token(Error);
  218. }
  219. if (*current == '+' || *current == '-') {
  220. if (++current == end) {
  221. error("missing digits after exponent sign");
  222. return token(Error);
  223. }
  224. }
  225. if (!JS7_ISDEC(*current)) {
  226. error("exponent part is missing a number");
  227. return token(Error);
  228. }
  229. while (++current < end) {
  230. if (!JS7_ISDEC(*current))
  231. break;
  232. }
  233. }
  234. jsdouble d;
  235. const jschar *finish;
  236. if (!js_strtod(cx, digitStart.get(), current.get(), &finish, &d))
  237. return token(OOM);
  238. JS_ASSERT(current == finish);
  239. return numberToken(negative ? -d : d);
  240. }
  241. static inline bool
  242. IsJSONWhitespace(jschar c)
  243. {
  244. return c == '\t' || c == '\r' || c == '\n' || c == ' ';
  245. }
  246. JSONParser::Token
  247. JSONParser::advance()
  248. {
  249. while (current < end && IsJSONWhitespace(*current))
  250. current++;
  251. if (current >= end) {
  252. error("unexpected end of data");
  253. return token(Error);
  254. }
  255. switch (*current) {
  256. case '"':
  257. return readString<LiteralValue>();
  258. case '-':
  259. case '0':
  260. case '1':
  261. case '2':
  262. case '3':
  263. case '4':
  264. case '5':
  265. case '6':
  266. case '7':
  267. case '8':
  268. case '9':
  269. return readNumber();
  270. case 't':
  271. if (end - current < 4 || current[1] != 'r' || current[2] != 'u' || current[3] != 'e') {
  272. error("unexpected keyword");
  273. return token(Error);
  274. }
  275. current += 4;
  276. return token(True);
  277. case 'f':
  278. if (end - current < 5 ||
  279. current[1] != 'a' || current[2] != 'l' || current[3] != 's' || current[4] != 'e')
  280. {
  281. error("unexpected keyword");
  282. return token(Error);
  283. }
  284. current += 5;
  285. return token(False);
  286. case 'n':
  287. if (end - current < 4 || current[1] != 'u' || current[2] != 'l' || current[3] != 'l') {
  288. error("unexpected keyword");
  289. return token(Error);
  290. }
  291. current += 4;
  292. return token(Null);
  293. case '[':
  294. current++;
  295. return token(ArrayOpen);
  296. case ']':
  297. current++;
  298. return token(ArrayClose);
  299. case '{':
  300. current++;
  301. return token(ObjectOpen);
  302. case '}':
  303. current++;
  304. return token(ObjectClose);
  305. case ',':
  306. current++;
  307. return token(Comma);
  308. case ':':
  309. current++;
  310. return token(Colon);
  311. default:
  312. error("unexpected character");
  313. return token(Error);
  314. }
  315. }
  316. JSONParser::Token
  317. JSONParser::advanceAfterObjectOpen()
  318. {
  319. JS_ASSERT(current[-1] == '{');
  320. while (current < end && IsJSONWhitespace(*current))
  321. current++;
  322. if (current >= end) {
  323. error("end of data while reading object contents");
  324. return token(Error);
  325. }
  326. if (*current == '"')
  327. return readString<PropertyName>();
  328. if (*current == '}') {
  329. current++;
  330. return token(ObjectClose);
  331. }
  332. error("expected property name or '}'");
  333. return token(Error);
  334. }
  335. static inline void
  336. AssertPastValue(const RangedPtr<const jschar> current)
  337. {
  338. /*
  339. * We're past an arbitrary JSON value, so the previous character is
  340. * *somewhat* constrained, even if this assertion is pretty broad. Don't
  341. * knock it till you tried it: this assertion *did* catch a bug once.
  342. */
  343. JS_ASSERT((current[-1] == 'l' &&
  344. current[-2] == 'l' &&
  345. current[-3] == 'u' &&
  346. current[-4] == 'n') ||
  347. (current[-1] == 'e' &&
  348. current[-2] == 'u' &&
  349. current[-3] == 'r' &&
  350. current[-4] == 't') ||
  351. (current[-1] == 'e' &&
  352. current[-2] == 's' &&
  353. current[-3] == 'l' &&
  354. current[-4] == 'a' &&
  355. current[-5] == 'f') ||
  356. current[-1] == '}' ||
  357. current[-1] == ']' ||
  358. current[-1] == '"' ||
  359. JS7_ISDEC(current[-1]));
  360. }
  361. JSONParser::Token
  362. JSONParser::advanceAfterArrayElement()
  363. {
  364. AssertPastValue(current);
  365. while (current < end && IsJSONWhitespace(*current))
  366. current++;
  367. if (current >= end) {
  368. error("end of data when ',' or ']' was expected");
  369. return token(Error);
  370. }
  371. if (*current == ',') {
  372. current++;
  373. return token(Comma);
  374. }
  375. if (*current == ']') {
  376. current++;
  377. return token(ArrayClose);
  378. }
  379. error("expected ',' or ']' after array element");
  380. return token(Error);
  381. }
  382. JSONParser::Token
  383. JSONParser::advancePropertyName()
  384. {
  385. JS_ASSERT(current[-1] == ',');
  386. while (current < end && IsJSONWhitespace(*current))
  387. current++;
  388. if (current >= end) {
  389. error("end of data when property name was expected");
  390. return token(Error);
  391. }
  392. if (*current == '"')
  393. return readString<PropertyName>();
  394. if (parsingMode == LegacyJSON && *current == '}') {
  395. /*
  396. * Previous JSON parsing accepted trailing commas in non-empty object
  397. * syntax, and some users depend on this. (Specifically, Places data
  398. * serialization in versions of Firefox before 4.0. We can remove this
  399. * mode when profile upgrades from 3.6 become unsupported.) Permit
  400. * such trailing commas only when legacy parsing is specifically
  401. * requested.
  402. */
  403. current++;
  404. return token(ObjectClose);
  405. }
  406. error("expected double-quoted property name");
  407. return token(Error);
  408. }
  409. JSONParser::Token
  410. JSONParser::advancePropertyColon()
  411. {
  412. JS_ASSERT(current[-1] == '"');
  413. while (current < end && IsJSONWhitespace(*current))
  414. current++;
  415. if (current >= end) {
  416. error("end of data after property name when ':' was expected");
  417. return token(Error);
  418. }
  419. if (*current == ':') {
  420. current++;
  421. return token(Colon);
  422. }
  423. error("expected ':' after property name in object");
  424. return token(Error);
  425. }
  426. JSONParser::Token
  427. JSONParser::advanceAfterProperty()
  428. {
  429. AssertPastValue(current);
  430. while (current < end && IsJSONWhitespace(*current))
  431. current++;
  432. if (current >= end) {
  433. error("end of data after property value in object");
  434. return token(Error);
  435. }
  436. if (*current == ',') {
  437. current++;
  438. return token(Comma);
  439. }
  440. if (*current == '}') {
  441. current++;
  442. return token(ObjectClose);
  443. }
  444. error("expected ',' or '}' after property value in object");
  445. return token(Error);
  446. }
  447. /*
  448. * This enum is local to JSONParser::parse, below, but ISO C++98 doesn't allow
  449. * templates to depend on local types. Boo-urns!
  450. */
  451. enum ParserState { FinishArrayElement, FinishObjectMember, JSONValue };
  452. bool
  453. JSONParser::parse(Value *vp)
  454. {
  455. Vector<ParserState> stateStack(cx);
  456. AutoValueVector valueStack(cx);
  457. *vp = UndefinedValue();
  458. Token token;
  459. ParserState state = JSONValue;
  460. while (true) {
  461. switch (state) {
  462. case FinishObjectMember: {
  463. Value v = valueStack.popCopy();
  464. /*
  465. * NB: Relies on js_DefineNativeProperty performing
  466. * js_CheckForStringIndex.
  467. */
  468. jsid propid = ATOM_TO_JSID(&valueStack.popCopy().toString()->asAtom());
  469. if (!DefineNativeProperty(cx, &valueStack.back().toObject(), propid, v,
  470. JS_PropertyStub, JS_StrictPropertyStub, JSPROP_ENUMERATE,
  471. 0, 0))
  472. {
  473. return false;
  474. }
  475. token = advanceAfterProperty();
  476. if (token == ObjectClose)
  477. break;
  478. if (token != Comma) {
  479. if (token == OOM)
  480. return false;
  481. if (token != Error)
  482. error("expected ',' or '}' after property-value pair in object literal");
  483. return errorReturn();
  484. }
  485. token = advancePropertyName();
  486. /* FALL THROUGH */
  487. }
  488. JSONMember:
  489. if (token == String) {
  490. if (!valueStack.append(atomValue()))
  491. return false;
  492. token = advancePropertyColon();
  493. if (token != Colon) {
  494. JS_ASSERT(token == Error);
  495. return errorReturn();
  496. }
  497. if (!stateStack.append(FinishObjectMember))
  498. return false;
  499. goto JSONValue;
  500. }
  501. if (token == ObjectClose) {
  502. JS_ASSERT(state == FinishObjectMember);
  503. JS_ASSERT(parsingMode == LegacyJSON);
  504. break;
  505. }
  506. if (token == OOM)
  507. return false;
  508. if (token != Error)
  509. error("property names must be double-quoted strings");
  510. return errorReturn();
  511. case FinishArrayElement: {
  512. Value v = valueStack.popCopy();
  513. if (!js_NewbornArrayPush(cx, &valueStack.back().toObject(), v))
  514. return false;
  515. token = advanceAfterArrayElement();
  516. if (token == Comma) {
  517. if (!stateStack.append(FinishArrayElement))
  518. return false;
  519. goto JSONValue;
  520. }
  521. if (token == ArrayClose)
  522. break;
  523. JS_ASSERT(token == Error);
  524. return errorReturn();
  525. }
  526. JSONValue:
  527. case JSONValue:
  528. token = advance();
  529. JSONValueSwitch:
  530. switch (token) {
  531. case String:
  532. case Number:
  533. if (!valueStack.append(token == String ? stringValue() : numberValue()))
  534. return false;
  535. break;
  536. case True:
  537. if (!valueStack.append(BooleanValue(true)))
  538. return false;
  539. break;
  540. case False:
  541. if (!valueStack.append(BooleanValue(false)))
  542. return false;
  543. break;
  544. case Null:
  545. if (!valueStack.append(NullValue()))
  546. return false;
  547. break;
  548. case ArrayOpen: {
  549. JSObject *obj = NewDenseEmptyArray(cx);
  550. if (!obj || !valueStack.append(ObjectValue(*obj)))
  551. return false;
  552. token = advance();
  553. if (token == ArrayClose)
  554. break;
  555. if (!stateStack.append(FinishArrayElement))
  556. return false;
  557. goto JSONValueSwitch;
  558. }
  559. case ObjectOpen: {
  560. JSObject *obj = NewBuiltinClassInstance(cx, &ObjectClass);
  561. if (!obj || !valueStack.append(ObjectValue(*obj)))
  562. return false;
  563. token = advanceAfterObjectOpen();
  564. if (token == ObjectClose)
  565. break;
  566. goto JSONMember;
  567. }
  568. case ArrayClose:
  569. if (parsingMode == LegacyJSON &&
  570. !stateStack.empty() &&
  571. stateStack.back() == FinishArrayElement) {
  572. /*
  573. * Previous JSON parsing accepted trailing commas in
  574. * non-empty array syntax, and some users depend on this.
  575. * (Specifically, Places data serialization in versions of
  576. * Firefox prior to 4.0. We can remove this mode when
  577. * profile upgrades from 3.6 become unsupported.) Permit
  578. * such trailing commas only when specifically
  579. * instructed to do so.
  580. */
  581. stateStack.popBack();
  582. break;
  583. }
  584. /* FALL THROUGH */
  585. case ObjectClose:
  586. case Colon:
  587. case Comma:
  588. error("unexpected character");
  589. return errorReturn();
  590. case OOM:
  591. return false;
  592. case Error:
  593. return errorReturn();
  594. }
  595. break;
  596. }
  597. if (stateStack.empty())
  598. break;
  599. state = stateStack.popCopy();
  600. }
  601. for (; current < end; current++) {
  602. if (!IsJSONWhitespace(*current)) {
  603. error("unexpected non-whitespace character after JSON data");
  604. return errorReturn();
  605. }
  606. }
  607. JS_ASSERT(end == current);
  608. JS_ASSERT(valueStack.length() == 1);
  609. *vp = valueStack[0];
  610. return true;
  611. }