/deps/mochiweb/src/mochijson2.erl

http://github.com/zotonic/zotonic · Erlang · 849 lines · 678 code · 70 blank · 101 comment · 6 complexity · d855569d94b0924d6ac240f96a392e53 MD5 · raw file

  1. %% @author Bob Ippolito <bob@mochimedia.com>
  2. %% @copyright 2007 Mochi Media, Inc.
  3. %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
  4. %% with binaries as strings, arrays as lists (without an {array, _})
  5. %% wrapper and it only knows how to decode UTF-8 (and ASCII).
  6. %%
  7. %% JSON terms are decoded as follows (javascript -> erlang):
  8. %% <ul>
  9. %% <li>{"key": "value"} ->
  10. %% {struct, [{&lt;&lt;"key">>, &lt;&lt;"value">>}]}</li>
  11. %% <li>["array", 123, 12.34, true, false, null] ->
  12. %% [&lt;&lt;"array">>, 123, 12.34, true, false, null]
  13. %% </li>
  14. %% </ul>
  15. %% <ul>
  16. %% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
  17. %% <li>Objects decode to {struct, PropList}</li>
  18. %% <li>Numbers decode to integer or float</li>
  19. %% <li>true, false, null decode to their respective terms.</li>
  20. %% </ul>
  21. %% The encoder will accept the same format that the decoder will produce,
  22. %% but will also allow additional cases for leniency:
  23. %% <ul>
  24. %% <li>atoms other than true, false, null will be considered UTF-8
  25. %% strings (even as a proplist key)
  26. %% </li>
  27. %% <li>{json, IoList} will insert IoList directly into the output
  28. %% with no validation
  29. %% </li>
  30. %% <li>{array, Array} will be encoded as Array
  31. %% (legacy mochijson style)
  32. %% </li>
  33. %% <li>A non-empty raw proplist will be encoded as an object as long
  34. %% as the first pair does not have an atom key of json, struct,
  35. %% or array
  36. %% </li>
  37. %% </ul>
  38. -module(mochijson2).
  39. -author('bob@mochimedia.com').
  40. -export([encoder/1, encode/1]).
  41. -export([decoder/1, decode/1]).
  42. % This is a macro to placate syntax highlighters..
  43. -define(Q, $\").
  44. -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
  45. column=N+S#decoder.column}).
  46. -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
  47. column=1+S#decoder.column}).
  48. -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
  49. column=1,
  50. line=1+S#decoder.line}).
  51. -define(INC_CHAR(S, C),
  52. case C of
  53. $\n ->
  54. S#decoder{column=1,
  55. line=1+S#decoder.line,
  56. offset=1+S#decoder.offset};
  57. _ ->
  58. S#decoder{column=1+S#decoder.column,
  59. offset=1+S#decoder.offset}
  60. end).
  61. -define(IS_WHITESPACE(C),
  62. (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
  63. %% @type iolist() = [char() | binary() | iolist()]
  64. %% @type iodata() = iolist() | binary()
  65. %% @type json_string() = atom | binary()
  66. %% @type json_number() = integer() | float()
  67. %% @type json_array() = [json_term()]
  68. %% @type json_object() = {struct, [{json_string(), json_term()}]}
  69. %% @type json_iolist() = {json, iolist()}
  70. %% @type json_term() = json_string() | json_number() | json_array() |
  71. %% json_object() | json_iolist()
  72. -record(encoder, {handler=null,
  73. utf8=false}).
  74. -record(decoder, {object_hook=null,
  75. offset=0,
  76. line=1,
  77. column=1,
  78. state=null}).
  79. %% @spec encoder([encoder_option()]) -> function()
  80. %% @doc Create an encoder/1 with the given options.
  81. %% @type encoder_option() = handler_option() | utf8_option()
  82. %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
  83. encoder(Options) ->
  84. State = parse_encoder_options(Options, #encoder{}),
  85. fun (O) -> json_encode(O, State) end.
  86. %% @spec encode(json_term()) -> iolist()
  87. %% @doc Encode the given as JSON to an iolist.
  88. encode(Any) ->
  89. json_encode(Any, #encoder{}).
  90. %% @spec decoder([decoder_option()]) -> function()
  91. %% @doc Create a decoder/1 with the given options.
  92. decoder(Options) ->
  93. State = parse_decoder_options(Options, #decoder{}),
  94. fun (O) -> json_decode(O, State) end.
  95. %% @spec decode(iolist()) -> json_term()
  96. %% @doc Decode the given iolist to Erlang terms.
  97. decode(S) ->
  98. json_decode(S, #decoder{}).
  99. %% Internal API
  100. parse_encoder_options([], State) ->
  101. State;
  102. parse_encoder_options([{handler, Handler} | Rest], State) ->
  103. parse_encoder_options(Rest, State#encoder{handler=Handler});
  104. parse_encoder_options([{utf8, Switch} | Rest], State) ->
  105. parse_encoder_options(Rest, State#encoder{utf8=Switch}).
  106. parse_decoder_options([], State) ->
  107. State;
  108. parse_decoder_options([{object_hook, Hook} | Rest], State) ->
  109. parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
  110. json_encode(true, _State) ->
  111. <<"true">>;
  112. json_encode(false, _State) ->
  113. <<"false">>;
  114. json_encode(null, _State) ->
  115. <<"null">>;
  116. json_encode(I, _State) when is_integer(I) ->
  117. integer_to_list(I);
  118. json_encode(F, _State) when is_float(F) ->
  119. mochinum:digits(F);
  120. json_encode(S, State) when is_binary(S); is_atom(S) ->
  121. json_encode_string(S, State);
  122. json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
  123. K =/= array andalso
  124. K =/= json) ->
  125. json_encode_proplist(Props, State);
  126. json_encode({struct, Props}, State) when is_list(Props) ->
  127. json_encode_proplist(Props, State);
  128. json_encode(Array, State) when is_list(Array) ->
  129. json_encode_array(Array, State);
  130. json_encode({array, Array}, State) when is_list(Array) ->
  131. json_encode_array(Array, State);
  132. json_encode({json, IoList}, _State) ->
  133. IoList;
  134. json_encode(Bad, #encoder{handler=null}) ->
  135. exit({json_encode, {bad_term, Bad}});
  136. json_encode(Bad, State=#encoder{handler=Handler}) ->
  137. json_encode(Handler(Bad), State).
  138. json_encode_array([], _State) ->
  139. <<"[]">>;
  140. json_encode_array(L, State) ->
  141. F = fun (O, Acc) ->
  142. [$,, json_encode(O, State) | Acc]
  143. end,
  144. [$, | Acc1] = lists:foldl(F, "[", L),
  145. lists:reverse([$\] | Acc1]).
  146. json_encode_proplist([], _State) ->
  147. <<"{}">>;
  148. json_encode_proplist(Props, State) ->
  149. F = fun ({K, V}, Acc) ->
  150. KS = json_encode_string(K, State),
  151. VS = json_encode(V, State),
  152. [$,, VS, $:, KS | Acc]
  153. end,
  154. [$, | Acc1] = lists:foldl(F, "{", Props),
  155. lists:reverse([$\} | Acc1]).
  156. json_encode_string(A, State) when is_atom(A) ->
  157. L = atom_to_list(A),
  158. case json_string_is_safe(L) of
  159. true ->
  160. [?Q, L, ?Q];
  161. false ->
  162. json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
  163. end;
  164. json_encode_string(B, State) when is_binary(B) ->
  165. case json_bin_is_safe(B) of
  166. true ->
  167. [?Q, B, ?Q];
  168. false ->
  169. json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
  170. end;
  171. json_encode_string(I, _State) when is_integer(I) ->
  172. [?Q, integer_to_list(I), ?Q];
  173. json_encode_string(L, State) when is_list(L) ->
  174. case json_string_is_safe(L) of
  175. true ->
  176. [?Q, L, ?Q];
  177. false ->
  178. json_encode_string_unicode(L, State, [?Q])
  179. end.
  180. json_string_is_safe([]) ->
  181. true;
  182. json_string_is_safe([C | Rest]) ->
  183. case C of
  184. ?Q ->
  185. false;
  186. $\\ ->
  187. false;
  188. $\b ->
  189. false;
  190. $\f ->
  191. false;
  192. $\n ->
  193. false;
  194. $\r ->
  195. false;
  196. $\t ->
  197. false;
  198. C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
  199. false;
  200. C when C < 16#7f ->
  201. json_string_is_safe(Rest);
  202. _ ->
  203. false
  204. end.
  205. json_bin_is_safe(<<>>) ->
  206. true;
  207. json_bin_is_safe(<<C, Rest/binary>>) ->
  208. case C of
  209. ?Q ->
  210. false;
  211. $\\ ->
  212. false;
  213. $\b ->
  214. false;
  215. $\f ->
  216. false;
  217. $\n ->
  218. false;
  219. $\r ->
  220. false;
  221. $\t ->
  222. false;
  223. C when C >= 0, C < $\s; C >= 16#7f ->
  224. false;
  225. C when C < 16#7f ->
  226. json_bin_is_safe(Rest)
  227. end.
  228. json_encode_string_unicode([], _State, Acc) ->
  229. lists:reverse([$\" | Acc]);
  230. json_encode_string_unicode([C | Cs], State, Acc) ->
  231. Acc1 = case C of
  232. ?Q ->
  233. [?Q, $\\ | Acc];
  234. %% Escaping solidus is only useful when trying to protect
  235. %% against "</script>" injection attacks which are only
  236. %% possible when JSON is inserted into a HTML document
  237. %% in-line. mochijson2 does not protect you from this, so
  238. %% if you do insert directly into HTML then you need to
  239. %% uncomment the following case or escape the output of encode.
  240. %%
  241. %% $/ ->
  242. %% [$/, $\\ | Acc];
  243. %%
  244. $\\ ->
  245. [$\\, $\\ | Acc];
  246. $\b ->
  247. [$b, $\\ | Acc];
  248. $\f ->
  249. [$f, $\\ | Acc];
  250. $\n ->
  251. [$n, $\\ | Acc];
  252. $\r ->
  253. [$r, $\\ | Acc];
  254. $\t ->
  255. [$t, $\\ | Acc];
  256. C when C >= 0, C < $\s ->
  257. [unihex(C) | Acc];
  258. C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
  259. [xmerl_ucs:to_utf8(C) | Acc];
  260. C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
  261. [unihex(C) | Acc];
  262. C when C < 16#7f ->
  263. [C | Acc];
  264. _ ->
  265. exit({json_encode, {bad_char, C}})
  266. end,
  267. json_encode_string_unicode(Cs, State, Acc1).
  268. hexdigit(C) when C >= 0, C =< 9 ->
  269. C + $0;
  270. hexdigit(C) when C =< 15 ->
  271. C + $a - 10.
  272. unihex(C) when C < 16#10000 ->
  273. <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
  274. Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
  275. [$\\, $u | Digits];
  276. unihex(C) when C =< 16#10FFFF ->
  277. N = C - 16#10000,
  278. S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
  279. S2 = 16#dc00 bor (N band 16#3ff),
  280. [unihex(S1), unihex(S2)].
  281. json_decode(L, S) when is_list(L) ->
  282. json_decode(iolist_to_binary(L), S);
  283. json_decode(B, S) ->
  284. {Res, S1} = decode1(B, S),
  285. {eof, _} = tokenize(B, S1#decoder{state=trim}),
  286. Res.
  287. decode1(B, S=#decoder{state=null}) ->
  288. case tokenize(B, S#decoder{state=any}) of
  289. {{const, C}, S1} ->
  290. {C, S1};
  291. {start_array, S1} ->
  292. decode_array(B, S1);
  293. {start_object, S1} ->
  294. decode_object(B, S1)
  295. end.
  296. make_object(V, #decoder{object_hook=null}) ->
  297. V;
  298. make_object(V, #decoder{object_hook=Hook}) ->
  299. Hook(V).
  300. decode_object(B, S) ->
  301. decode_object(B, S#decoder{state=key}, []).
  302. decode_object(B, S=#decoder{state=key}, Acc) ->
  303. case tokenize(B, S) of
  304. {end_object, S1} ->
  305. V = make_object({struct, lists:reverse(Acc)}, S1),
  306. {V, S1#decoder{state=null}};
  307. {{const, K}, S1} ->
  308. {colon, S2} = tokenize(B, S1),
  309. {V, S3} = decode1(B, S2#decoder{state=null}),
  310. decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
  311. end;
  312. decode_object(B, S=#decoder{state=comma}, Acc) ->
  313. case tokenize(B, S) of
  314. {end_object, S1} ->
  315. V = make_object({struct, lists:reverse(Acc)}, S1),
  316. {V, S1#decoder{state=null}};
  317. {comma, S1} ->
  318. decode_object(B, S1#decoder{state=key}, Acc)
  319. end.
  320. decode_array(B, S) ->
  321. decode_array(B, S#decoder{state=any}, []).
  322. decode_array(B, S=#decoder{state=any}, Acc) ->
  323. case tokenize(B, S) of
  324. {end_array, S1} ->
  325. {lists:reverse(Acc), S1#decoder{state=null}};
  326. {start_array, S1} ->
  327. {Array, S2} = decode_array(B, S1),
  328. decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
  329. {start_object, S1} ->
  330. {Array, S2} = decode_object(B, S1),
  331. decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
  332. {{const, Const}, S1} ->
  333. decode_array(B, S1#decoder{state=comma}, [Const | Acc])
  334. end;
  335. decode_array(B, S=#decoder{state=comma}, Acc) ->
  336. case tokenize(B, S) of
  337. {end_array, S1} ->
  338. {lists:reverse(Acc), S1#decoder{state=null}};
  339. {comma, S1} ->
  340. decode_array(B, S1#decoder{state=any}, Acc)
  341. end.
  342. tokenize_string(B, S=#decoder{offset=O}) ->
  343. case tokenize_string_fast(B, O) of
  344. {escape, O1} ->
  345. Length = O1 - O,
  346. S1 = ?ADV_COL(S, Length),
  347. <<_:O/binary, Head:Length/binary, _/binary>> = B,
  348. tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
  349. O1 ->
  350. Length = O1 - O,
  351. <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
  352. {{const, String}, ?ADV_COL(S, Length + 1)}
  353. end.
  354. tokenize_string_fast(B, O) ->
  355. case B of
  356. <<_:O/binary, ?Q, _/binary>> ->
  357. O;
  358. <<_:O/binary, $\\, _/binary>> ->
  359. {escape, O};
  360. <<_:O/binary, C1, _/binary>> when C1 < 128 ->
  361. tokenize_string_fast(B, 1 + O);
  362. <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
  363. C2 >= 128, C2 =< 191 ->
  364. tokenize_string_fast(B, 2 + O);
  365. <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
  366. C2 >= 128, C2 =< 191,
  367. C3 >= 128, C3 =< 191 ->
  368. tokenize_string_fast(B, 3 + O);
  369. <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
  370. C2 >= 128, C2 =< 191,
  371. C3 >= 128, C3 =< 191,
  372. C4 >= 128, C4 =< 191 ->
  373. tokenize_string_fast(B, 4 + O);
  374. _ ->
  375. throw(invalid_utf8)
  376. end.
  377. tokenize_string(B, S=#decoder{offset=O}, Acc) ->
  378. case B of
  379. <<_:O/binary, ?Q, _/binary>> ->
  380. {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
  381. <<_:O/binary, "\\\"", _/binary>> ->
  382. tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
  383. <<_:O/binary, "\\\\", _/binary>> ->
  384. tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
  385. <<_:O/binary, "\\/", _/binary>> ->
  386. tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
  387. <<_:O/binary, "\\b", _/binary>> ->
  388. tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
  389. <<_:O/binary, "\\f", _/binary>> ->
  390. tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
  391. <<_:O/binary, "\\n", _/binary>> ->
  392. tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
  393. <<_:O/binary, "\\r", _/binary>> ->
  394. tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
  395. <<_:O/binary, "\\t", _/binary>> ->
  396. tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
  397. <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
  398. C = erlang:list_to_integer([C3, C2, C1, C0], 16),
  399. if C > 16#D7FF, C < 16#DC00 ->
  400. %% coalesce UTF-16 surrogate pair
  401. <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
  402. D = erlang:list_to_integer([D3,D2,D1,D0], 16),
  403. [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
  404. D:16/big-unsigned-integer>>),
  405. Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
  406. tokenize_string(B, ?ADV_COL(S, 12), Acc1);
  407. true ->
  408. Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
  409. tokenize_string(B, ?ADV_COL(S, 6), Acc1)
  410. end;
  411. <<_:O/binary, C1, _/binary>> when C1 < 128 ->
  412. tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
  413. <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
  414. C2 >= 128, C2 =< 191 ->
  415. tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
  416. <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
  417. C2 >= 128, C2 =< 191,
  418. C3 >= 128, C3 =< 191 ->
  419. tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
  420. <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
  421. C2 >= 128, C2 =< 191,
  422. C3 >= 128, C3 =< 191,
  423. C4 >= 128, C4 =< 191 ->
  424. tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
  425. _ ->
  426. throw(invalid_utf8)
  427. end.
  428. tokenize_number(B, S) ->
  429. case tokenize_number(B, sign, S, []) of
  430. {{int, Int}, S1} ->
  431. {{const, list_to_integer(Int)}, S1};
  432. {{float, Float}, S1} ->
  433. {{const, list_to_float(Float)}, S1}
  434. end.
  435. tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
  436. case B of
  437. <<_:O/binary, $-, _/binary>> ->
  438. tokenize_number(B, int, ?INC_COL(S), [$-]);
  439. _ ->
  440. tokenize_number(B, int, S, [])
  441. end;
  442. tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
  443. case B of
  444. <<_:O/binary, $0, _/binary>> ->
  445. tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
  446. <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
  447. tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
  448. end;
  449. tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
  450. case B of
  451. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  452. tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
  453. _ ->
  454. tokenize_number(B, frac, S, Acc)
  455. end;
  456. tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
  457. case B of
  458. <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
  459. tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
  460. <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
  461. tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
  462. _ ->
  463. {{int, lists:reverse(Acc)}, S}
  464. end;
  465. tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
  466. case B of
  467. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  468. tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
  469. <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
  470. tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
  471. _ ->
  472. {{float, lists:reverse(Acc)}, S}
  473. end;
  474. tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
  475. case B of
  476. <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
  477. tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
  478. _ ->
  479. tokenize_number(B, eint, S, Acc)
  480. end;
  481. tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
  482. case B of
  483. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  484. tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
  485. end;
  486. tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
  487. case B of
  488. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  489. tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
  490. _ ->
  491. {{float, lists:reverse(Acc)}, S}
  492. end.
  493. tokenize(B, S=#decoder{offset=O}) ->
  494. case B of
  495. <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
  496. tokenize(B, ?INC_CHAR(S, C));
  497. <<_:O/binary, "{", _/binary>> ->
  498. {start_object, ?INC_COL(S)};
  499. <<_:O/binary, "}", _/binary>> ->
  500. {end_object, ?INC_COL(S)};
  501. <<_:O/binary, "[", _/binary>> ->
  502. {start_array, ?INC_COL(S)};
  503. <<_:O/binary, "]", _/binary>> ->
  504. {end_array, ?INC_COL(S)};
  505. <<_:O/binary, ",", _/binary>> ->
  506. {comma, ?INC_COL(S)};
  507. <<_:O/binary, ":", _/binary>> ->
  508. {colon, ?INC_COL(S)};
  509. <<_:O/binary, "null", _/binary>> ->
  510. {{const, null}, ?ADV_COL(S, 4)};
  511. <<_:O/binary, "true", _/binary>> ->
  512. {{const, true}, ?ADV_COL(S, 4)};
  513. <<_:O/binary, "false", _/binary>> ->
  514. {{const, false}, ?ADV_COL(S, 5)};
  515. <<_:O/binary, "\"", _/binary>> ->
  516. tokenize_string(B, ?INC_COL(S));
  517. <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
  518. orelse C =:= $- ->
  519. tokenize_number(B, S);
  520. <<_:O/binary>> ->
  521. trim = S#decoder.state,
  522. {eof, S}
  523. end.
  524. %%
  525. %% Tests
  526. %%
  527. -ifdef(TEST).
  528. -include_lib("eunit/include/eunit.hrl").
  529. %% testing constructs borrowed from the Yaws JSON implementation.
  530. %% Create an object from a list of Key/Value pairs.
  531. obj_new() ->
  532. {struct, []}.
  533. is_obj({struct, Props}) ->
  534. F = fun ({K, _}) when is_binary(K) -> true end,
  535. lists:all(F, Props).
  536. obj_from_list(Props) ->
  537. Obj = {struct, Props},
  538. ?assert(is_obj(Obj)),
  539. Obj.
  540. %% Test for equivalence of Erlang terms.
  541. %% Due to arbitrary order of construction, equivalent objects might
  542. %% compare unequal as erlang terms, so we need to carefully recurse
  543. %% through aggregates (tuples and objects).
  544. equiv({struct, Props1}, {struct, Props2}) ->
  545. equiv_object(Props1, Props2);
  546. equiv(L1, L2) when is_list(L1), is_list(L2) ->
  547. equiv_list(L1, L2);
  548. equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
  549. equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
  550. equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
  551. %% Object representation and traversal order is unknown.
  552. %% Use the sledgehammer and sort property lists.
  553. equiv_object(Props1, Props2) ->
  554. L1 = lists:keysort(1, Props1),
  555. L2 = lists:keysort(1, Props2),
  556. Pairs = lists:zip(L1, L2),
  557. true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
  558. equiv(K1, K2) and equiv(V1, V2)
  559. end, Pairs).
  560. %% Recursively compare tuple elements for equivalence.
  561. equiv_list([], []) ->
  562. true;
  563. equiv_list([V1 | L1], [V2 | L2]) ->
  564. equiv(V1, V2) andalso equiv_list(L1, L2).
  565. decode_test() ->
  566. [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
  567. <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
  568. e2j_vec_test() ->
  569. test_one(e2j_test_vec(utf8), 1).
  570. test_one([], _N) ->
  571. %% io:format("~p tests passed~n", [N-1]),
  572. ok;
  573. test_one([{E, J} | Rest], N) ->
  574. %% io:format("[~p] ~p ~p~n", [N, E, J]),
  575. true = equiv(E, decode(J)),
  576. true = equiv(E, decode(encode(E))),
  577. test_one(Rest, 1+N).
  578. e2j_test_vec(utf8) ->
  579. [
  580. {1, "1"},
  581. {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
  582. {-1, "-1"},
  583. {-3.1416, "-3.14160"},
  584. {12.0e10, "1.20000e+11"},
  585. {1.234E+10, "1.23400e+10"},
  586. {-1.234E-10, "-1.23400e-10"},
  587. {10.0, "1.0e+01"},
  588. {123.456, "1.23456E+2"},
  589. {10.0, "1e1"},
  590. {<<"foo">>, "\"foo\""},
  591. {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
  592. {<<"">>, "\"\""},
  593. {<<"\n\n\n">>, "\"\\n\\n\\n\""},
  594. {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
  595. {obj_new(), "{}"},
  596. {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
  597. {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
  598. "{\"foo\":\"bar\",\"baz\":123}"},
  599. {[], "[]"},
  600. {[[]], "[[]]"},
  601. {[1, <<"foo">>], "[1,\"foo\"]"},
  602. %% json array in a json object
  603. {obj_from_list([{<<"foo">>, [123]}]),
  604. "{\"foo\":[123]}"},
  605. %% json object in a json object
  606. {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
  607. "{\"foo\":{\"bar\":true}}"},
  608. %% fold evaluation order
  609. {obj_from_list([{<<"foo">>, []},
  610. {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
  611. {<<"alice">>, <<"bob">>}]),
  612. "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
  613. %% json object in a json array
  614. {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
  615. "[-123,\"foo\",{\"bar\":[]},null]"}
  616. ].
  617. %% test utf8 encoding
  618. encoder_utf8_test() ->
  619. %% safe conversion case (default)
  620. [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
  621. encode(<<1,"\321\202\320\265\321\201\321\202">>),
  622. %% raw utf8 output (optional)
  623. Enc = mochijson2:encoder([{utf8, true}]),
  624. [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
  625. Enc(<<1,"\321\202\320\265\321\201\321\202">>).
  626. input_validation_test() ->
  627. Good = [
  628. {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
  629. {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
  630. {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
  631. ],
  632. lists:foreach(fun({CodePoint, UTF8}) ->
  633. Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
  634. Expect = decode(UTF8)
  635. end, Good),
  636. Bad = [
  637. %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
  638. <<?Q, 16#80, ?Q>>,
  639. %% missing continuations, last byte in each should be 80-BF
  640. <<?Q, 16#C2, 16#7F, ?Q>>,
  641. <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
  642. <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
  643. %% we don't support code points > 10FFFF per RFC 3629
  644. <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
  645. %% escape characters trigger a different code path
  646. <<?Q, $\\, $\n, 16#80, ?Q>>
  647. ],
  648. lists:foreach(
  649. fun(X) ->
  650. ok = try decode(X) catch invalid_utf8 -> ok end,
  651. %% could be {ucs,{bad_utf8_character_code}} or
  652. %% {json_encode,{bad_char,_}}
  653. {'EXIT', _} = (catch encode(X))
  654. end, Bad).
  655. inline_json_test() ->
  656. ?assertEqual(<<"\"iodata iodata\"">>,
  657. iolist_to_binary(
  658. encode({json, [<<"\"iodata">>, " iodata\""]}))),
  659. ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
  660. decode(
  661. encode({struct,
  662. [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
  663. ok.
  664. big_unicode_test() ->
  665. UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
  666. ?assertEqual(
  667. <<"\"\\ud834\\udd20\"">>,
  668. iolist_to_binary(encode(UTF8Seq))),
  669. ?assertEqual(
  670. UTF8Seq,
  671. decode(iolist_to_binary(encode(UTF8Seq)))),
  672. ok.
  673. custom_decoder_test() ->
  674. ?assertEqual(
  675. {struct, [{<<"key">>, <<"value">>}]},
  676. (decoder([]))("{\"key\": \"value\"}")),
  677. F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
  678. ?assertEqual(
  679. win,
  680. (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
  681. ok.
  682. atom_test() ->
  683. %% JSON native atoms
  684. [begin
  685. ?assertEqual(A, decode(atom_to_list(A))),
  686. ?assertEqual(iolist_to_binary(atom_to_list(A)),
  687. iolist_to_binary(encode(A)))
  688. end || A <- [true, false, null]],
  689. %% Atom to string
  690. ?assertEqual(
  691. <<"\"foo\"">>,
  692. iolist_to_binary(encode(foo))),
  693. ?assertEqual(
  694. <<"\"\\ud834\\udd20\"">>,
  695. iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
  696. ok.
  697. key_encode_test() ->
  698. %% Some forms are accepted as keys that would not be strings in other
  699. %% cases
  700. ?assertEqual(
  701. <<"{\"foo\":1}">>,
  702. iolist_to_binary(encode({struct, [{foo, 1}]}))),
  703. ?assertEqual(
  704. <<"{\"foo\":1}">>,
  705. iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
  706. ?assertEqual(
  707. <<"{\"foo\":1}">>,
  708. iolist_to_binary(encode({struct, [{"foo", 1}]}))),
  709. ?assertEqual(
  710. <<"{\"foo\":1}">>,
  711. iolist_to_binary(encode([{foo, 1}]))),
  712. ?assertEqual(
  713. <<"{\"foo\":1}">>,
  714. iolist_to_binary(encode([{<<"foo">>, 1}]))),
  715. ?assertEqual(
  716. <<"{\"foo\":1}">>,
  717. iolist_to_binary(encode([{"foo", 1}]))),
  718. ?assertEqual(
  719. <<"{\"\\ud834\\udd20\":1}">>,
  720. iolist_to_binary(
  721. encode({struct, [{[16#0001d120], 1}]}))),
  722. ?assertEqual(
  723. <<"{\"1\":1}">>,
  724. iolist_to_binary(encode({struct, [{1, 1}]}))),
  725. ok.
  726. unsafe_chars_test() ->
  727. Chars = "\"\\\b\f\n\r\t",
  728. [begin
  729. ?assertEqual(false, json_string_is_safe([C])),
  730. ?assertEqual(false, json_bin_is_safe(<<C>>)),
  731. ?assertEqual(<<C>>, decode(encode(<<C>>)))
  732. end || C <- Chars],
  733. ?assertEqual(
  734. false,
  735. json_string_is_safe([16#0001d120])),
  736. ?assertEqual(
  737. false,
  738. json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
  739. ?assertEqual(
  740. [16#0001d120],
  741. xmerl_ucs:from_utf8(
  742. binary_to_list(
  743. decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
  744. ?assertEqual(
  745. false,
  746. json_string_is_safe([16#110000])),
  747. ?assertEqual(
  748. false,
  749. json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
  750. %% solidus can be escaped but isn't unsafe by default
  751. ?assertEqual(
  752. <<"/">>,
  753. decode(<<"\"\\/\"">>)),
  754. ok.
  755. int_test() ->
  756. ?assertEqual(0, decode("0")),
  757. ?assertEqual(1, decode("1")),
  758. ?assertEqual(11, decode("11")),
  759. ok.
  760. large_int_test() ->
  761. ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
  762. iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
  763. ?assertEqual(<<"2147483649214748364921474836492147483649">>,
  764. iolist_to_binary(encode(2147483649214748364921474836492147483649))),
  765. ok.
  766. float_test() ->
  767. ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
  768. ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
  769. ok.
  770. handler_test() ->
  771. ?assertEqual(
  772. {'EXIT',{json_encode,{bad_term,{}}}},
  773. catch encode({})),
  774. F = fun ({}) -> [] end,
  775. ?assertEqual(
  776. <<"[]">>,
  777. iolist_to_binary((encoder([{handler, F}]))({}))),
  778. ok.
  779. -endif.