PageRenderTime 137ms CodeModel.GetById 17ms app.highlight 111ms RepoModel.GetById 1ms app.codeStats 0ms

/deps/mochiweb/src/mochijson2.erl

http://github.com/zotonic/zotonic
Erlang | 849 lines | 678 code | 70 blank | 101 comment | 6 complexity | d855569d94b0924d6ac240f96a392e53 MD5 | raw file
  1%% @author Bob Ippolito <bob@mochimedia.com>
  2%% @copyright 2007 Mochi Media, Inc.
  3
  4%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
  5%%      with binaries as strings, arrays as lists (without an {array, _})
  6%%      wrapper and it only knows how to decode UTF-8 (and ASCII).
  7%%
  8%%      JSON terms are decoded as follows (javascript -> erlang):
  9%%      <ul>
 10%%          <li>{"key": "value"} ->
 11%%              {struct, [{&lt;&lt;"key">>, &lt;&lt;"value">>}]}</li>
 12%%          <li>["array", 123, 12.34, true, false, null] ->
 13%%              [&lt;&lt;"array">>, 123, 12.34, true, false, null]
 14%%          </li>
 15%%      </ul>
 16%%      <ul>
 17%%          <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
 18%%          <li>Objects decode to {struct, PropList}</li>
 19%%          <li>Numbers decode to integer or float</li>
 20%%          <li>true, false, null decode to their respective terms.</li>
 21%%      </ul>
 22%%      The encoder will accept the same format that the decoder will produce,
 23%%      but will also allow additional cases for leniency:
 24%%      <ul>
 25%%          <li>atoms other than true, false, null will be considered UTF-8
 26%%              strings (even as a proplist key)
 27%%          </li>
 28%%          <li>{json, IoList} will insert IoList directly into the output
 29%%              with no validation
 30%%          </li>
 31%%          <li>{array, Array} will be encoded as Array
 32%%              (legacy mochijson style)
 33%%          </li>
 34%%          <li>A non-empty raw proplist will be encoded as an object as long
 35%%              as the first pair does not have an atom key of json, struct,
 36%%              or array
 37%%          </li>
 38%%      </ul>
 39
 40-module(mochijson2).
 41-author('bob@mochimedia.com').
 42-export([encoder/1, encode/1]).
 43-export([decoder/1, decode/1]).
 44
 45% This is a macro to placate syntax highlighters..
 46-define(Q, $\").
 47-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
 48                                 column=N+S#decoder.column}).
 49-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
 50                              column=1+S#decoder.column}).
 51-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
 52                               column=1,
 53                               line=1+S#decoder.line}).
 54-define(INC_CHAR(S, C),
 55        case C of
 56            $\n ->
 57                S#decoder{column=1,
 58                          line=1+S#decoder.line,
 59                          offset=1+S#decoder.offset};
 60            _ ->
 61                S#decoder{column=1+S#decoder.column,
 62                          offset=1+S#decoder.offset}
 63        end).
 64-define(IS_WHITESPACE(C),
 65        (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
 66
 67%% @type iolist() = [char() | binary() | iolist()]
 68%% @type iodata() = iolist() | binary()
 69%% @type json_string() = atom | binary()
 70%% @type json_number() = integer() | float()
 71%% @type json_array() = [json_term()]
 72%% @type json_object() = {struct, [{json_string(), json_term()}]}
 73%% @type json_iolist() = {json, iolist()}
 74%% @type json_term() = json_string() | json_number() | json_array() |
 75%%                     json_object() | json_iolist()
 76
 77-record(encoder, {handler=null,
 78                  utf8=false}).
 79
 80-record(decoder, {object_hook=null,
 81                  offset=0,
 82                  line=1,
 83                  column=1,
 84                  state=null}).
 85
 86%% @spec encoder([encoder_option()]) -> function()
 87%% @doc Create an encoder/1 with the given options.
 88%% @type encoder_option() = handler_option() | utf8_option()
 89%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
 90encoder(Options) ->
 91    State = parse_encoder_options(Options, #encoder{}),
 92    fun (O) -> json_encode(O, State) end.
 93
 94%% @spec encode(json_term()) -> iolist()
 95%% @doc Encode the given as JSON to an iolist.
 96encode(Any) ->
 97    json_encode(Any, #encoder{}).
 98
 99%% @spec decoder([decoder_option()]) -> function()
100%% @doc Create a decoder/1 with the given options.
101decoder(Options) ->
102    State = parse_decoder_options(Options, #decoder{}),
103    fun (O) -> json_decode(O, State) end.
104
105%% @spec decode(iolist()) -> json_term()
106%% @doc Decode the given iolist to Erlang terms.
107decode(S) ->
108    json_decode(S, #decoder{}).
109
110%% Internal API
111
112parse_encoder_options([], State) ->
113    State;
114parse_encoder_options([{handler, Handler} | Rest], State) ->
115    parse_encoder_options(Rest, State#encoder{handler=Handler});
116parse_encoder_options([{utf8, Switch} | Rest], State) ->
117    parse_encoder_options(Rest, State#encoder{utf8=Switch}).
118
119parse_decoder_options([], State) ->
120    State;
121parse_decoder_options([{object_hook, Hook} | Rest], State) ->
122    parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
123
124json_encode(true, _State) ->
125    <<"true">>;
126json_encode(false, _State) ->
127    <<"false">>;
128json_encode(null, _State) ->
129    <<"null">>;
130json_encode(I, _State) when is_integer(I) ->
131    integer_to_list(I);
132json_encode(F, _State) when is_float(F) ->
133    mochinum:digits(F);
134json_encode(S, State) when is_binary(S); is_atom(S) ->
135    json_encode_string(S, State);
136json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
137                                             K =/= array andalso
138                                             K =/= json) ->
139    json_encode_proplist(Props, State);
140json_encode({struct, Props}, State) when is_list(Props) ->
141    json_encode_proplist(Props, State);
142json_encode(Array, State) when is_list(Array) ->
143    json_encode_array(Array, State);
144json_encode({array, Array}, State) when is_list(Array) ->
145    json_encode_array(Array, State);
146json_encode({json, IoList}, _State) ->
147    IoList;
148json_encode(Bad, #encoder{handler=null}) ->
149    exit({json_encode, {bad_term, Bad}});
150json_encode(Bad, State=#encoder{handler=Handler}) ->
151    json_encode(Handler(Bad), State).
152
153json_encode_array([], _State) ->
154    <<"[]">>;
155json_encode_array(L, State) ->
156    F = fun (O, Acc) ->
157                [$,, json_encode(O, State) | Acc]
158        end,
159    [$, | Acc1] = lists:foldl(F, "[", L),
160    lists:reverse([$\] | Acc1]).
161
162json_encode_proplist([], _State) ->
163    <<"{}">>;
164json_encode_proplist(Props, State) ->
165    F = fun ({K, V}, Acc) ->
166                KS = json_encode_string(K, State),
167                VS = json_encode(V, State),
168                [$,, VS, $:, KS | Acc]
169        end,
170    [$, | Acc1] = lists:foldl(F, "{", Props),
171    lists:reverse([$\} | Acc1]).
172
173json_encode_string(A, State) when is_atom(A) ->
174    L = atom_to_list(A),
175    case json_string_is_safe(L) of
176        true ->
177            [?Q, L, ?Q];
178        false ->
179            json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
180    end;
181json_encode_string(B, State) when is_binary(B) ->
182    case json_bin_is_safe(B) of
183        true ->
184            [?Q, B, ?Q];
185        false ->
186            json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
187    end;
188json_encode_string(I, _State) when is_integer(I) ->
189    [?Q, integer_to_list(I), ?Q];
190json_encode_string(L, State) when is_list(L) ->
191    case json_string_is_safe(L) of
192        true ->
193            [?Q, L, ?Q];
194        false ->
195            json_encode_string_unicode(L, State, [?Q])
196    end.
197
198json_string_is_safe([]) ->
199    true;
200json_string_is_safe([C | Rest]) ->
201    case C of
202        ?Q ->
203            false;
204        $\\ ->
205            false;
206        $\b ->
207            false;
208        $\f ->
209            false;
210        $\n ->
211            false;
212        $\r ->
213            false;
214        $\t ->
215            false;
216        C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
217            false;
218        C when C < 16#7f ->
219            json_string_is_safe(Rest);
220        _ ->
221            false
222    end.
223
224json_bin_is_safe(<<>>) ->
225    true;
226json_bin_is_safe(<<C, Rest/binary>>) ->
227    case C of
228        ?Q ->
229            false;
230        $\\ ->
231            false;
232        $\b ->
233            false;
234        $\f ->
235            false;
236        $\n ->
237            false;
238        $\r ->
239            false;
240        $\t ->
241            false;
242        C when C >= 0, C < $\s; C >= 16#7f ->
243            false;
244        C when C < 16#7f ->
245            json_bin_is_safe(Rest)
246    end.
247
248json_encode_string_unicode([], _State, Acc) ->
249    lists:reverse([$\" | Acc]);
250json_encode_string_unicode([C | Cs], State, Acc) ->
251    Acc1 = case C of
252               ?Q ->
253                   [?Q, $\\ | Acc];
254               %% Escaping solidus is only useful when trying to protect
255               %% against "</script>" injection attacks which are only
256               %% possible when JSON is inserted into a HTML document
257               %% in-line. mochijson2 does not protect you from this, so
258               %% if you do insert directly into HTML then you need to
259               %% uncomment the following case or escape the output of encode.
260               %%
261               %% $/ ->
262               %%    [$/, $\\ | Acc];
263               %%
264               $\\ ->
265                   [$\\, $\\ | Acc];
266               $\b ->
267                   [$b, $\\ | Acc];
268               $\f ->
269                   [$f, $\\ | Acc];
270               $\n ->
271                   [$n, $\\ | Acc];
272               $\r ->
273                   [$r, $\\ | Acc];
274               $\t ->
275                   [$t, $\\ | Acc];
276               C when C >= 0, C < $\s ->
277                   [unihex(C) | Acc];
278               C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
279                   [xmerl_ucs:to_utf8(C) | Acc];
280               C when  C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
281                   [unihex(C) | Acc];
282               C when C < 16#7f ->
283                   [C | Acc];
284               _ ->
285                   exit({json_encode, {bad_char, C}})
286           end,
287    json_encode_string_unicode(Cs, State, Acc1).
288
289hexdigit(C) when C >= 0, C =< 9 ->
290    C + $0;
291hexdigit(C) when C =< 15 ->
292    C + $a - 10.
293
294unihex(C) when C < 16#10000 ->
295    <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
296    Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
297    [$\\, $u | Digits];
298unihex(C) when C =< 16#10FFFF ->
299    N = C - 16#10000,
300    S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
301    S2 = 16#dc00 bor (N band 16#3ff),
302    [unihex(S1), unihex(S2)].
303
304json_decode(L, S) when is_list(L) ->
305    json_decode(iolist_to_binary(L), S);
306json_decode(B, S) ->
307    {Res, S1} = decode1(B, S),
308    {eof, _} = tokenize(B, S1#decoder{state=trim}),
309    Res.
310
311decode1(B, S=#decoder{state=null}) ->
312    case tokenize(B, S#decoder{state=any}) of
313        {{const, C}, S1} ->
314            {C, S1};
315        {start_array, S1} ->
316            decode_array(B, S1);
317        {start_object, S1} ->
318            decode_object(B, S1)
319    end.
320
321make_object(V, #decoder{object_hook=null}) ->
322    V;
323make_object(V, #decoder{object_hook=Hook}) ->
324    Hook(V).
325
326decode_object(B, S) ->
327    decode_object(B, S#decoder{state=key}, []).
328
329decode_object(B, S=#decoder{state=key}, Acc) ->
330    case tokenize(B, S) of
331        {end_object, S1} ->
332            V = make_object({struct, lists:reverse(Acc)}, S1),
333            {V, S1#decoder{state=null}};
334        {{const, K}, S1} ->
335            {colon, S2} = tokenize(B, S1),
336            {V, S3} = decode1(B, S2#decoder{state=null}),
337            decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
338    end;
339decode_object(B, S=#decoder{state=comma}, Acc) ->
340    case tokenize(B, S) of
341        {end_object, S1} ->
342            V = make_object({struct, lists:reverse(Acc)}, S1),
343            {V, S1#decoder{state=null}};
344        {comma, S1} ->
345            decode_object(B, S1#decoder{state=key}, Acc)
346    end.
347
348decode_array(B, S) ->
349    decode_array(B, S#decoder{state=any}, []).
350
351decode_array(B, S=#decoder{state=any}, Acc) ->
352    case tokenize(B, S) of
353        {end_array, S1} ->
354            {lists:reverse(Acc), S1#decoder{state=null}};
355        {start_array, S1} ->
356            {Array, S2} = decode_array(B, S1),
357            decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
358        {start_object, S1} ->
359            {Array, S2} = decode_object(B, S1),
360            decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
361        {{const, Const}, S1} ->
362            decode_array(B, S1#decoder{state=comma}, [Const | Acc])
363    end;
364decode_array(B, S=#decoder{state=comma}, Acc) ->
365    case tokenize(B, S) of
366        {end_array, S1} ->
367            {lists:reverse(Acc), S1#decoder{state=null}};
368        {comma, S1} ->
369            decode_array(B, S1#decoder{state=any}, Acc)
370    end.
371
372tokenize_string(B, S=#decoder{offset=O}) ->
373    case tokenize_string_fast(B, O) of
374        {escape, O1} ->
375            Length = O1 - O,
376            S1 = ?ADV_COL(S, Length),
377            <<_:O/binary, Head:Length/binary, _/binary>> = B,
378            tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
379        O1 ->
380            Length = O1 - O,
381            <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
382            {{const, String}, ?ADV_COL(S, Length + 1)}
383    end.
384
385tokenize_string_fast(B, O) ->
386    case B of
387        <<_:O/binary, ?Q, _/binary>> ->
388            O;
389        <<_:O/binary, $\\, _/binary>> ->
390            {escape, O};
391        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
392            tokenize_string_fast(B, 1 + O);
393        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
394                C2 >= 128, C2 =< 191 ->
395            tokenize_string_fast(B, 2 + O);
396        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
397                C2 >= 128, C2 =< 191,
398                C3 >= 128, C3 =< 191 ->
399            tokenize_string_fast(B, 3 + O);
400        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
401                C2 >= 128, C2 =< 191,
402                C3 >= 128, C3 =< 191,
403                C4 >= 128, C4 =< 191 ->
404            tokenize_string_fast(B, 4 + O);
405        _ ->
406            throw(invalid_utf8)
407    end.
408
409tokenize_string(B, S=#decoder{offset=O}, Acc) ->
410    case B of
411        <<_:O/binary, ?Q, _/binary>> ->
412            {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
413        <<_:O/binary, "\\\"", _/binary>> ->
414            tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
415        <<_:O/binary, "\\\\", _/binary>> ->
416            tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
417        <<_:O/binary, "\\/", _/binary>> ->
418            tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
419        <<_:O/binary, "\\b", _/binary>> ->
420            tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
421        <<_:O/binary, "\\f", _/binary>> ->
422            tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
423        <<_:O/binary, "\\n", _/binary>> ->
424            tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
425        <<_:O/binary, "\\r", _/binary>> ->
426            tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
427        <<_:O/binary, "\\t", _/binary>> ->
428            tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
429        <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
430            C = erlang:list_to_integer([C3, C2, C1, C0], 16),
431            if C > 16#D7FF, C < 16#DC00 ->
432                %% coalesce UTF-16 surrogate pair
433                <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
434                D = erlang:list_to_integer([D3,D2,D1,D0], 16),
435                [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
436                    D:16/big-unsigned-integer>>),
437                Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
438                tokenize_string(B, ?ADV_COL(S, 12), Acc1);
439            true ->
440                Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
441                tokenize_string(B, ?ADV_COL(S, 6), Acc1)
442            end;
443        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
444            tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
445        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
446                C2 >= 128, C2 =< 191 ->
447            tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
448        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
449                C2 >= 128, C2 =< 191,
450                C3 >= 128, C3 =< 191 ->
451            tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
452        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
453                C2 >= 128, C2 =< 191,
454                C3 >= 128, C3 =< 191,
455                C4 >= 128, C4 =< 191 ->
456            tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
457        _ ->
458            throw(invalid_utf8)
459    end.
460
461tokenize_number(B, S) ->
462    case tokenize_number(B, sign, S, []) of
463        {{int, Int}, S1} ->
464            {{const, list_to_integer(Int)}, S1};
465        {{float, Float}, S1} ->
466            {{const, list_to_float(Float)}, S1}
467    end.
468
469tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
470    case B of
471        <<_:O/binary, $-, _/binary>> ->
472            tokenize_number(B, int, ?INC_COL(S), [$-]);
473        _ ->
474            tokenize_number(B, int, S, [])
475    end;
476tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
477    case B of
478        <<_:O/binary, $0, _/binary>> ->
479            tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
480        <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
481            tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
482    end;
483tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
484    case B of
485        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
486            tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
487        _ ->
488            tokenize_number(B, frac, S, Acc)
489    end;
490tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
491    case B of
492        <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
493            tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
494        <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
495            tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
496        _ ->
497            {{int, lists:reverse(Acc)}, S}
498    end;
499tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
500    case B of
501        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
502            tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
503        <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
504            tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
505        _ ->
506            {{float, lists:reverse(Acc)}, S}
507    end;
508tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
509    case B of
510        <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
511            tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
512        _ ->
513            tokenize_number(B, eint, S, Acc)
514    end;
515tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
516    case B of
517        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
518            tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
519    end;
520tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
521    case B of
522        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
523            tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
524        _ ->
525            {{float, lists:reverse(Acc)}, S}
526    end.
527
528tokenize(B, S=#decoder{offset=O}) ->
529    case B of
530        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
531            tokenize(B, ?INC_CHAR(S, C));
532        <<_:O/binary, "{", _/binary>> ->
533            {start_object, ?INC_COL(S)};
534        <<_:O/binary, "}", _/binary>> ->
535            {end_object, ?INC_COL(S)};
536        <<_:O/binary, "[", _/binary>> ->
537            {start_array, ?INC_COL(S)};
538        <<_:O/binary, "]", _/binary>> ->
539            {end_array, ?INC_COL(S)};
540        <<_:O/binary, ",", _/binary>> ->
541            {comma, ?INC_COL(S)};
542        <<_:O/binary, ":", _/binary>> ->
543            {colon, ?INC_COL(S)};
544        <<_:O/binary, "null", _/binary>> ->
545            {{const, null}, ?ADV_COL(S, 4)};
546        <<_:O/binary, "true", _/binary>> ->
547            {{const, true}, ?ADV_COL(S, 4)};
548        <<_:O/binary, "false", _/binary>> ->
549            {{const, false}, ?ADV_COL(S, 5)};
550        <<_:O/binary, "\"", _/binary>> ->
551            tokenize_string(B, ?INC_COL(S));
552        <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
553                                         orelse C =:= $- ->
554            tokenize_number(B, S);
555        <<_:O/binary>> ->
556            trim = S#decoder.state,
557            {eof, S}
558    end.
559%%
560%% Tests
561%%
562-ifdef(TEST).
563-include_lib("eunit/include/eunit.hrl").
564
565
566%% testing constructs borrowed from the Yaws JSON implementation.
567
568%% Create an object from a list of Key/Value pairs.
569
570obj_new() ->
571    {struct, []}.
572
573is_obj({struct, Props}) ->
574    F = fun ({K, _}) when is_binary(K) -> true end,
575    lists:all(F, Props).
576
577obj_from_list(Props) ->
578    Obj = {struct, Props},
579    ?assert(is_obj(Obj)),
580    Obj.
581
582%% Test for equivalence of Erlang terms.
583%% Due to arbitrary order of construction, equivalent objects might
584%% compare unequal as erlang terms, so we need to carefully recurse
585%% through aggregates (tuples and objects).
586
587equiv({struct, Props1}, {struct, Props2}) ->
588    equiv_object(Props1, Props2);
589equiv(L1, L2) when is_list(L1), is_list(L2) ->
590    equiv_list(L1, L2);
591equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
592equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
593equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
594
595%% Object representation and traversal order is unknown.
596%% Use the sledgehammer and sort property lists.
597
598equiv_object(Props1, Props2) ->
599    L1 = lists:keysort(1, Props1),
600    L2 = lists:keysort(1, Props2),
601    Pairs = lists:zip(L1, L2),
602    true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
603                             equiv(K1, K2) and equiv(V1, V2)
604                     end, Pairs).
605
606%% Recursively compare tuple elements for equivalence.
607
608equiv_list([], []) ->
609    true;
610equiv_list([V1 | L1], [V2 | L2]) ->
611    equiv(V1, V2) andalso equiv_list(L1, L2).
612
613decode_test() ->
614    [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
615    <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
616
617e2j_vec_test() ->
618    test_one(e2j_test_vec(utf8), 1).
619
620test_one([], _N) ->
621    %% io:format("~p tests passed~n", [N-1]),
622    ok;
623test_one([{E, J} | Rest], N) ->
624    %% io:format("[~p] ~p ~p~n", [N, E, J]),
625    true = equiv(E, decode(J)),
626    true = equiv(E, decode(encode(E))),
627    test_one(Rest, 1+N).
628
629e2j_test_vec(utf8) ->
630    [
631     {1, "1"},
632     {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
633     {-1, "-1"},
634     {-3.1416, "-3.14160"},
635     {12.0e10, "1.20000e+11"},
636     {1.234E+10, "1.23400e+10"},
637     {-1.234E-10, "-1.23400e-10"},
638     {10.0, "1.0e+01"},
639     {123.456, "1.23456E+2"},
640     {10.0, "1e1"},
641     {<<"foo">>, "\"foo\""},
642     {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
643     {<<"">>, "\"\""},
644     {<<"\n\n\n">>, "\"\\n\\n\\n\""},
645     {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
646     {obj_new(), "{}"},
647     {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
648     {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
649      "{\"foo\":\"bar\",\"baz\":123}"},
650     {[], "[]"},
651     {[[]], "[[]]"},
652     {[1, <<"foo">>], "[1,\"foo\"]"},
653
654     %% json array in a json object
655     {obj_from_list([{<<"foo">>, [123]}]),
656      "{\"foo\":[123]}"},
657
658     %% json object in a json object
659     {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
660      "{\"foo\":{\"bar\":true}}"},
661
662     %% fold evaluation order
663     {obj_from_list([{<<"foo">>, []},
664                     {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
665                     {<<"alice">>, <<"bob">>}]),
666      "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
667
668     %% json object in a json array
669     {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
670      "[-123,\"foo\",{\"bar\":[]},null]"}
671    ].
672
673%% test utf8 encoding
674encoder_utf8_test() ->
675    %% safe conversion case (default)
676    [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
677        encode(<<1,"\321\202\320\265\321\201\321\202">>),
678
679    %% raw utf8 output (optional)
680    Enc = mochijson2:encoder([{utf8, true}]),
681    [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
682        Enc(<<1,"\321\202\320\265\321\201\321\202">>).
683
684input_validation_test() ->
685    Good = [
686        {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
687        {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
688        {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
689    ],
690    lists:foreach(fun({CodePoint, UTF8}) ->
691        Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
692        Expect = decode(UTF8)
693    end, Good),
694
695    Bad = [
696        %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
697        <<?Q, 16#80, ?Q>>,
698        %% missing continuations, last byte in each should be 80-BF
699        <<?Q, 16#C2, 16#7F, ?Q>>,
700        <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
701        <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
702        %% we don't support code points > 10FFFF per RFC 3629
703        <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
704        %% escape characters trigger a different code path
705        <<?Q, $\\, $\n, 16#80, ?Q>>
706    ],
707    lists:foreach(
708      fun(X) ->
709              ok = try decode(X) catch invalid_utf8 -> ok end,
710              %% could be {ucs,{bad_utf8_character_code}} or
711              %%          {json_encode,{bad_char,_}}
712              {'EXIT', _} = (catch encode(X))
713      end, Bad).
714
715inline_json_test() ->
716    ?assertEqual(<<"\"iodata iodata\"">>,
717                 iolist_to_binary(
718                   encode({json, [<<"\"iodata">>, " iodata\""]}))),
719    ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
720                 decode(
721                   encode({struct,
722                           [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
723    ok.
724
725big_unicode_test() ->
726    UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
727    ?assertEqual(
728       <<"\"\\ud834\\udd20\"">>,
729       iolist_to_binary(encode(UTF8Seq))),
730    ?assertEqual(
731       UTF8Seq,
732       decode(iolist_to_binary(encode(UTF8Seq)))),
733    ok.
734
735custom_decoder_test() ->
736    ?assertEqual(
737       {struct, [{<<"key">>, <<"value">>}]},
738       (decoder([]))("{\"key\": \"value\"}")),
739    F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
740    ?assertEqual(
741       win,
742       (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
743    ok.
744
745atom_test() ->
746    %% JSON native atoms
747    [begin
748         ?assertEqual(A, decode(atom_to_list(A))),
749         ?assertEqual(iolist_to_binary(atom_to_list(A)),
750                      iolist_to_binary(encode(A)))
751     end || A <- [true, false, null]],
752    %% Atom to string
753    ?assertEqual(
754       <<"\"foo\"">>,
755       iolist_to_binary(encode(foo))),
756    ?assertEqual(
757       <<"\"\\ud834\\udd20\"">>,
758       iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
759    ok.
760
761key_encode_test() ->
762    %% Some forms are accepted as keys that would not be strings in other
763    %% cases
764    ?assertEqual(
765       <<"{\"foo\":1}">>,
766       iolist_to_binary(encode({struct, [{foo, 1}]}))),
767    ?assertEqual(
768       <<"{\"foo\":1}">>,
769       iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
770    ?assertEqual(
771       <<"{\"foo\":1}">>,
772       iolist_to_binary(encode({struct, [{"foo", 1}]}))),
773	?assertEqual(
774       <<"{\"foo\":1}">>,
775       iolist_to_binary(encode([{foo, 1}]))),
776    ?assertEqual(
777       <<"{\"foo\":1}">>,
778       iolist_to_binary(encode([{<<"foo">>, 1}]))),
779    ?assertEqual(
780       <<"{\"foo\":1}">>,
781       iolist_to_binary(encode([{"foo", 1}]))),
782    ?assertEqual(
783       <<"{\"\\ud834\\udd20\":1}">>,
784       iolist_to_binary(
785         encode({struct, [{[16#0001d120], 1}]}))),
786    ?assertEqual(
787       <<"{\"1\":1}">>,
788       iolist_to_binary(encode({struct, [{1, 1}]}))),
789    ok.
790
791unsafe_chars_test() ->
792    Chars = "\"\\\b\f\n\r\t",
793    [begin
794         ?assertEqual(false, json_string_is_safe([C])),
795         ?assertEqual(false, json_bin_is_safe(<<C>>)),
796         ?assertEqual(<<C>>, decode(encode(<<C>>)))
797     end || C <- Chars],
798    ?assertEqual(
799       false,
800       json_string_is_safe([16#0001d120])),
801    ?assertEqual(
802       false,
803       json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
804    ?assertEqual(
805       [16#0001d120],
806       xmerl_ucs:from_utf8(
807         binary_to_list(
808           decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
809    ?assertEqual(
810       false,
811       json_string_is_safe([16#110000])),
812    ?assertEqual(
813       false,
814       json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
815    %% solidus can be escaped but isn't unsafe by default
816    ?assertEqual(
817       <<"/">>,
818       decode(<<"\"\\/\"">>)),
819    ok.
820
821int_test() ->
822    ?assertEqual(0, decode("0")),
823    ?assertEqual(1, decode("1")),
824    ?assertEqual(11, decode("11")),
825    ok.
826
827large_int_test() ->
828    ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
829        iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
830    ?assertEqual(<<"2147483649214748364921474836492147483649">>,
831        iolist_to_binary(encode(2147483649214748364921474836492147483649))),
832    ok.
833
834float_test() ->
835    ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
836    ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
837    ok.
838
839handler_test() ->
840    ?assertEqual(
841       {'EXIT',{json_encode,{bad_term,{}}}},
842       catch encode({})),
843    F = fun ({}) -> [] end,
844    ?assertEqual(
845       <<"[]">>,
846       iolist_to_binary((encoder([{handler, F}]))({}))),
847    ok.
848
849-endif.