PageRenderTime 395ms CodeModel.GetById 60ms app.highlight 290ms RepoModel.GetById 32ms app.codeStats 1ms

/src/mochijson2.erl

http://github.com/basho/mochiweb
Erlang | 907 lines | 710 code | 74 blank | 123 comment | 9 complexity | c8b75f616697be8654d1ab377b016ab4 MD5 | raw file
  1%% @author Bob Ippolito <bob@mochimedia.com>
  2%% @copyright 2007 Mochi Media, Inc.
  3%%
  4%% Permission is hereby granted, free of charge, to any person obtaining a
  5%% copy of this software and associated documentation files (the "Software"),
  6%% to deal in the Software without restriction, including without limitation
  7%% the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8%% and/or sell copies of the Software, and to permit persons to whom the
  9%% Software is furnished to do so, subject to the following conditions:
 10%%
 11%% The above copyright notice and this permission notice shall be included in
 12%% all copies or substantial portions of the Software.
 13%%
 14%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 17%% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20%% DEALINGS IN THE SOFTWARE.
 21
 22%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
 23%%      with binaries as strings, arrays as lists (without an {array, _})
 24%%      wrapper and it only knows how to decode UTF-8 (and ASCII).
 25%%
 26%%      JSON terms are decoded as follows (javascript -> erlang):
 27%%      <ul>
 28%%          <li>{"key": "value"} ->
 29%%              {struct, [{&lt;&lt;"key">>, &lt;&lt;"value">>}]}</li>
 30%%          <li>["array", 123, 12.34, true, false, null] ->
 31%%              [&lt;&lt;"array">>, 123, 12.34, true, false, null]
 32%%          </li>
 33%%      </ul>
 34%%      <ul>
 35%%          <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
 36%%          <li>Objects decode to {struct, PropList}</li>
 37%%          <li>Numbers decode to integer or float</li>
 38%%          <li>true, false, null decode to their respective terms.</li>
 39%%      </ul>
 40%%      The encoder will accept the same format that the decoder will produce,
 41%%      but will also allow additional cases for leniency:
 42%%      <ul>
 43%%          <li>atoms other than true, false, null will be considered UTF-8
 44%%              strings (even as a proplist key)
 45%%          </li>
 46%%          <li>{json, IoList} will insert IoList directly into the output
 47%%              with no validation
 48%%          </li>
 49%%          <li>{array, Array} will be encoded as Array
 50%%              (legacy mochijson style)
 51%%          </li>
 52%%          <li>A non-empty raw proplist will be encoded as an object as long
 53%%              as the first pair does not have an atom key of json, struct,
 54%%              or array
 55%%          </li>
 56%%      </ul>
 57
 58-module(mochijson2).
 59-author('bob@mochimedia.com').
 60-export([encoder/1, encode/1]).
 61-export([decoder/1, decode/1, decode/2]).
 62
 63%% This is a macro to placate syntax highlighters..
 64-define(Q, $\").
 65-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
 66                                 column=N+S#decoder.column}).
 67-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
 68                              column=1+S#decoder.column}).
 69-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
 70                               column=1,
 71                               line=1+S#decoder.line}).
 72-define(INC_CHAR(S, C),
 73        case C of
 74            $\n ->
 75                S#decoder{column=1,
 76                          line=1+S#decoder.line,
 77                          offset=1+S#decoder.offset};
 78            _ ->
 79                S#decoder{column=1+S#decoder.column,
 80                          offset=1+S#decoder.offset}
 81        end).
 82-define(IS_WHITESPACE(C),
 83        (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
 84
 85%% @type json_string() = atom | binary()
 86%% @type json_number() = integer() | float()
 87%% @type json_array() = [json_term()]
 88%% @type json_object() = {struct, [{json_string(), json_term()}]}
 89%% @type json_eep18_object() = {[{json_string(), json_term()}]}
 90%% @type json_iolist() = {json, iolist()}
 91%% @type json_term() = json_string() | json_number() | json_array() |
 92%%                     json_object() | json_eep18_object() | json_iolist()
 93
 94-record(encoder, {handler=null,
 95                  utf8=false}).
 96
 97-record(decoder, {object_hook=null,
 98                  offset=0,
 99                  line=1,
100                  column=1,
101                  state=null}).
102
103%% @spec encoder([encoder_option()]) -> function()
104%% @doc Create an encoder/1 with the given options.
105%% @type encoder_option() = handler_option() | utf8_option()
106%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
107encoder(Options) ->
108    State = parse_encoder_options(Options, #encoder{}),
109    fun (O) -> json_encode(O, State) end.
110
111%% @spec encode(json_term()) -> iolist()
112%% @doc Encode the given as JSON to an iolist.
113encode(Any) ->
114    json_encode(Any, #encoder{}).
115
116%% @spec decoder([decoder_option()]) -> function()
117%% @doc Create a decoder/1 with the given options.
118decoder(Options) ->
119    State = parse_decoder_options(Options, #decoder{}),
120    fun (O) -> json_decode(O, State) end.
121
122%% @spec decode(iolist(), [{format, proplist | eep18 | struct}]) -> json_term()
123%% @doc Decode the given iolist to Erlang terms using the given object format
124%%      for decoding, where proplist returns JSON objects as [{binary(), json_term()}]
125%%      proplists, eep18 returns JSON objects as {[binary(), json_term()]}, and struct
126%%      returns them as-is.
127decode(S, Options) ->
128    json_decode(S, parse_decoder_options(Options, #decoder{})).
129
130%% @spec decode(iolist()) -> json_term()
131%% @doc Decode the given iolist to Erlang terms.
132decode(S) ->
133    json_decode(S, #decoder{}).
134
135%% Internal API
136
137parse_encoder_options([], State) ->
138    State;
139parse_encoder_options([{handler, Handler} | Rest], State) ->
140    parse_encoder_options(Rest, State#encoder{handler=Handler});
141parse_encoder_options([{utf8, Switch} | Rest], State) ->
142    parse_encoder_options(Rest, State#encoder{utf8=Switch}).
143
144parse_decoder_options([], State) ->
145    State;
146parse_decoder_options([{object_hook, Hook} | Rest], State) ->
147    parse_decoder_options(Rest, State#decoder{object_hook=Hook});
148parse_decoder_options([{format, Format} | Rest], State)
149  when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist ->
150    parse_decoder_options(Rest, State#decoder{object_hook=Format}).
151
152json_encode(true, _State) ->
153    <<"true">>;
154json_encode(false, _State) ->
155    <<"false">>;
156json_encode(null, _State) ->
157    <<"null">>;
158json_encode(I, _State) when is_integer(I) ->
159    integer_to_list(I);
160json_encode(F, _State) when is_float(F) ->
161    mochinum:digits(F);
162json_encode(S, State) when is_binary(S); is_atom(S) ->
163    json_encode_string(S, State);
164json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
165                                             K =/= array andalso
166                                             K =/= json) ->
167    json_encode_proplist(Props, State);
168json_encode({struct, Props}, State) when is_list(Props) ->
169    json_encode_proplist(Props, State);
170json_encode({Props}, State) when is_list(Props) ->
171    json_encode_proplist(Props, State);
172json_encode({}, State) ->
173    json_encode_proplist([], State);
174json_encode(Array, State) when is_list(Array) ->
175    json_encode_array(Array, State);
176json_encode({array, Array}, State) when is_list(Array) ->
177    json_encode_array(Array, State);
178json_encode({json, IoList}, _State) ->
179    IoList;
180json_encode(Bad, #encoder{handler=null}) ->
181    exit({json_encode, {bad_term, Bad}});
182json_encode(Bad, State=#encoder{handler=Handler}) ->
183    json_encode(Handler(Bad), State).
184
185json_encode_array([], _State) ->
186    <<"[]">>;
187json_encode_array(L, State) ->
188    F = fun (O, Acc) ->
189                [$,, json_encode(O, State) | Acc]
190        end,
191    [$, | Acc1] = lists:foldl(F, "[", L),
192    lists:reverse([$\] | Acc1]).
193
194json_encode_proplist([], _State) ->
195    <<"{}">>;
196json_encode_proplist(Props, State) ->
197    F = fun ({K, V}, Acc) ->
198                KS = json_encode_string(K, State),
199                VS = json_encode(V, State),
200                [$,, VS, $:, KS | Acc]
201        end,
202    [$, | Acc1] = lists:foldl(F, "{", Props),
203    lists:reverse([$\} | Acc1]).
204
205json_encode_string(A, State) when is_atom(A) ->
206    L = atom_to_list(A),
207    case json_string_is_safe(L) of
208        true ->
209            [?Q, L, ?Q];
210        false ->
211            json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
212    end;
213json_encode_string(B, State) when is_binary(B) ->
214    case json_bin_is_safe(B) of
215        true ->
216            [?Q, B, ?Q];
217        false ->
218            json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
219    end;
220json_encode_string(I, _State) when is_integer(I) ->
221    [?Q, integer_to_list(I), ?Q];
222json_encode_string(L, State) when is_list(L) ->
223    case json_string_is_safe(L) of
224        true ->
225            [?Q, L, ?Q];
226        false ->
227            json_encode_string_unicode(L, State, [?Q])
228    end.
229
230json_string_is_safe([]) ->
231    true;
232json_string_is_safe([C | Rest]) ->
233    case C of
234        ?Q ->
235            false;
236        $\\ ->
237            false;
238        $\b ->
239            false;
240        $\f ->
241            false;
242        $\n ->
243            false;
244        $\r ->
245            false;
246        $\t ->
247            false;
248        C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
249            false;
250        C when C < 16#7f ->
251            json_string_is_safe(Rest);
252        _ ->
253            false
254    end.
255
256json_bin_is_safe(<<>>) ->
257    true;
258json_bin_is_safe(<<C, Rest/binary>>) ->
259    case C of
260        ?Q ->
261            false;
262        $\\ ->
263            false;
264        $\b ->
265            false;
266        $\f ->
267            false;
268        $\n ->
269            false;
270        $\r ->
271            false;
272        $\t ->
273            false;
274        C when C >= 0, C < $\s; C >= 16#7f ->
275            false;
276        C when C < 16#7f ->
277            json_bin_is_safe(Rest)
278    end.
279
280json_encode_string_unicode([], _State, Acc) ->
281    lists:reverse([$\" | Acc]);
282json_encode_string_unicode([C | Cs], State, Acc) ->
283    Acc1 = case C of
284               ?Q ->
285                   [?Q, $\\ | Acc];
286               %% Escaping solidus is only useful when trying to protect
287               %% against "</script>" injection attacks which are only
288               %% possible when JSON is inserted into a HTML document
289               %% in-line. mochijson2 does not protect you from this, so
290               %% if you do insert directly into HTML then you need to
291               %% uncomment the following case or escape the output of encode.
292               %%
293               %% $/ ->
294               %%    [$/, $\\ | Acc];
295               %%
296               $\\ ->
297                   [$\\, $\\ | Acc];
298               $\b ->
299                   [$b, $\\ | Acc];
300               $\f ->
301                   [$f, $\\ | Acc];
302               $\n ->
303                   [$n, $\\ | Acc];
304               $\r ->
305                   [$r, $\\ | Acc];
306               $\t ->
307                   [$t, $\\ | Acc];
308               C when C >= 0, C < $\s ->
309                   [unihex(C) | Acc];
310               C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
311                   [xmerl_ucs:to_utf8(C) | Acc];
312               C when  C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
313                   [unihex(C) | Acc];
314               C when C < 16#7f ->
315                   [C | Acc];
316               _ ->
317                   exit({json_encode, {bad_char, C}})
318           end,
319    json_encode_string_unicode(Cs, State, Acc1).
320
321hexdigit(C) when C >= 0, C =< 9 ->
322    C + $0;
323hexdigit(C) when C =< 15 ->
324    C + $a - 10.
325
326unihex(C) when C < 16#10000 ->
327    <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
328    Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
329    [$\\, $u | Digits];
330unihex(C) when C =< 16#10FFFF ->
331    N = C - 16#10000,
332    S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
333    S2 = 16#dc00 bor (N band 16#3ff),
334    [unihex(S1), unihex(S2)].
335
336json_decode(L, S) when is_list(L) ->
337    json_decode(iolist_to_binary(L), S);
338json_decode(B, S) ->
339    {Res, S1} = decode1(B, S),
340    {eof, _} = tokenize(B, S1#decoder{state=trim}),
341    Res.
342
343decode1(B, S=#decoder{state=null}) ->
344    case tokenize(B, S#decoder{state=any}) of
345        {{const, C}, S1} ->
346            {C, S1};
347        {start_array, S1} ->
348            decode_array(B, S1);
349        {start_object, S1} ->
350            decode_object(B, S1)
351    end.
352
353make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct ->
354    V;
355make_object({struct, P}, #decoder{object_hook=eep18}) ->
356    {P};
357make_object({struct, P}, #decoder{object_hook=proplist}) ->
358    P;
359make_object(V, #decoder{object_hook=Hook}) ->
360    Hook(V).
361
362decode_object(B, S) ->
363    decode_object(B, S#decoder{state=key}, []).
364
365decode_object(B, S=#decoder{state=key}, Acc) ->
366    case tokenize(B, S) of
367        {end_object, S1} ->
368            V = make_object({struct, lists:reverse(Acc)}, S1),
369            {V, S1#decoder{state=null}};
370        {{const, K}, S1} ->
371            {colon, S2} = tokenize(B, S1),
372            {V, S3} = decode1(B, S2#decoder{state=null}),
373            decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
374    end;
375decode_object(B, S=#decoder{state=comma}, Acc) ->
376    case tokenize(B, S) of
377        {end_object, S1} ->
378            V = make_object({struct, lists:reverse(Acc)}, S1),
379            {V, S1#decoder{state=null}};
380        {comma, S1} ->
381            decode_object(B, S1#decoder{state=key}, Acc)
382    end.
383
384decode_array(B, S) ->
385    decode_array(B, S#decoder{state=any}, []).
386
387decode_array(B, S=#decoder{state=any}, Acc) ->
388    case tokenize(B, S) of
389        {end_array, S1} ->
390            {lists:reverse(Acc), S1#decoder{state=null}};
391        {start_array, S1} ->
392            {Array, S2} = decode_array(B, S1),
393            decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
394        {start_object, S1} ->
395            {Array, S2} = decode_object(B, S1),
396            decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
397        {{const, Const}, S1} ->
398            decode_array(B, S1#decoder{state=comma}, [Const | Acc])
399    end;
400decode_array(B, S=#decoder{state=comma}, Acc) ->
401    case tokenize(B, S) of
402        {end_array, S1} ->
403            {lists:reverse(Acc), S1#decoder{state=null}};
404        {comma, S1} ->
405            decode_array(B, S1#decoder{state=any}, Acc)
406    end.
407
408tokenize_string(B, S=#decoder{offset=O}) ->
409    case tokenize_string_fast(B, O) of
410        {escape, O1} ->
411            Length = O1 - O,
412            S1 = ?ADV_COL(S, Length),
413            <<_:O/binary, Head:Length/binary, _/binary>> = B,
414            tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
415        O1 ->
416            Length = O1 - O,
417            <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
418            {{const, String}, ?ADV_COL(S, Length + 1)}
419    end.
420
421tokenize_string_fast(B, O) ->
422    case B of
423        <<_:O/binary, ?Q, _/binary>> ->
424            O;
425        <<_:O/binary, $\\, _/binary>> ->
426            {escape, O};
427        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
428            tokenize_string_fast(B, 1 + O);
429        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
430                C2 >= 128, C2 =< 191 ->
431            tokenize_string_fast(B, 2 + O);
432        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
433                C2 >= 128, C2 =< 191,
434                C3 >= 128, C3 =< 191 ->
435            tokenize_string_fast(B, 3 + O);
436        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
437                C2 >= 128, C2 =< 191,
438                C3 >= 128, C3 =< 191,
439                C4 >= 128, C4 =< 191 ->
440            tokenize_string_fast(B, 4 + O);
441        _ ->
442            throw(invalid_utf8)
443    end.
444
445tokenize_string(B, S=#decoder{offset=O}, Acc) ->
446    case B of
447        <<_:O/binary, ?Q, _/binary>> ->
448            {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
449        <<_:O/binary, "\\\"", _/binary>> ->
450            tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
451        <<_:O/binary, "\\\\", _/binary>> ->
452            tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
453        <<_:O/binary, "\\/", _/binary>> ->
454            tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
455        <<_:O/binary, "\\b", _/binary>> ->
456            tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
457        <<_:O/binary, "\\f", _/binary>> ->
458            tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
459        <<_:O/binary, "\\n", _/binary>> ->
460            tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
461        <<_:O/binary, "\\r", _/binary>> ->
462            tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
463        <<_:O/binary, "\\t", _/binary>> ->
464            tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
465        <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
466            C = erlang:list_to_integer([C3, C2, C1, C0], 16),
467            if C > 16#D7FF, C < 16#DC00 ->
468                %% coalesce UTF-16 surrogate pair
469                <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
470                D = erlang:list_to_integer([D3,D2,D1,D0], 16),
471                [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
472                    D:16/big-unsigned-integer>>),
473                Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
474                tokenize_string(B, ?ADV_COL(S, 12), Acc1);
475            true ->
476                Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
477                tokenize_string(B, ?ADV_COL(S, 6), Acc1)
478            end;
479        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
480            tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
481        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
482                C2 >= 128, C2 =< 191 ->
483            tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
484        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
485                C2 >= 128, C2 =< 191,
486                C3 >= 128, C3 =< 191 ->
487            tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
488        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
489                C2 >= 128, C2 =< 191,
490                C3 >= 128, C3 =< 191,
491                C4 >= 128, C4 =< 191 ->
492            tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
493        _ ->
494            throw(invalid_utf8)
495    end.
496
497tokenize_number(B, S) ->
498    case tokenize_number(B, sign, S, []) of
499        {{int, Int}, S1} ->
500            {{const, list_to_integer(Int)}, S1};
501        {{float, Float}, S1} ->
502            {{const, list_to_float(Float)}, S1}
503    end.
504
505tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
506    case B of
507        <<_:O/binary, $-, _/binary>> ->
508            tokenize_number(B, int, ?INC_COL(S), [$-]);
509        _ ->
510            tokenize_number(B, int, S, [])
511    end;
512tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
513    case B of
514        <<_:O/binary, $0, _/binary>> ->
515            tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
516        <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
517            tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
518    end;
519tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
520    case B of
521        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
522            tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
523        _ ->
524            tokenize_number(B, frac, S, Acc)
525    end;
526tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
527    case B of
528        <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
529            tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
530        <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
531            tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
532        _ ->
533            {{int, lists:reverse(Acc)}, S}
534    end;
535tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
536    case B of
537        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
538            tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
539        <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
540            tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
541        _ ->
542            {{float, lists:reverse(Acc)}, S}
543    end;
544tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
545    case B of
546        <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
547            tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
548        _ ->
549            tokenize_number(B, eint, S, Acc)
550    end;
551tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
552    case B of
553        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
554            tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
555    end;
556tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
557    case B of
558        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
559            tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
560        _ ->
561            {{float, lists:reverse(Acc)}, S}
562    end.
563
564tokenize(B, S=#decoder{offset=O}) ->
565    case B of
566        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
567            tokenize(B, ?INC_CHAR(S, C));
568        <<_:O/binary, "{", _/binary>> ->
569            {start_object, ?INC_COL(S)};
570        <<_:O/binary, "}", _/binary>> ->
571            {end_object, ?INC_COL(S)};
572        <<_:O/binary, "[", _/binary>> ->
573            {start_array, ?INC_COL(S)};
574        <<_:O/binary, "]", _/binary>> ->
575            {end_array, ?INC_COL(S)};
576        <<_:O/binary, ",", _/binary>> ->
577            {comma, ?INC_COL(S)};
578        <<_:O/binary, ":", _/binary>> ->
579            {colon, ?INC_COL(S)};
580        <<_:O/binary, "null", _/binary>> ->
581            {{const, null}, ?ADV_COL(S, 4)};
582        <<_:O/binary, "true", _/binary>> ->
583            {{const, true}, ?ADV_COL(S, 4)};
584        <<_:O/binary, "false", _/binary>> ->
585            {{const, false}, ?ADV_COL(S, 5)};
586        <<_:O/binary, "\"", _/binary>> ->
587            tokenize_string(B, ?INC_COL(S));
588        <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
589                                         orelse C =:= $- ->
590            tokenize_number(B, S);
591        <<_:O/binary>> ->
592            trim = S#decoder.state,
593            {eof, S}
594    end.
595%%
596%% Tests
597%%
598-ifdef(TEST).
599-include_lib("eunit/include/eunit.hrl").
600
601
602%% testing constructs borrowed from the Yaws JSON implementation.
603
604%% Create an object from a list of Key/Value pairs.
605
606obj_new() ->
607    {struct, []}.
608
609is_obj({struct, Props}) ->
610    F = fun ({K, _}) when is_binary(K) -> true end,
611    lists:all(F, Props).
612
613obj_from_list(Props) ->
614    Obj = {struct, Props},
615    ?assert(is_obj(Obj)),
616    Obj.
617
618%% Test for equivalence of Erlang terms.
619%% Due to arbitrary order of construction, equivalent objects might
620%% compare unequal as erlang terms, so we need to carefully recurse
621%% through aggregates (tuples and objects).
622
623equiv({struct, Props1}, {struct, Props2}) ->
624    equiv_object(Props1, Props2);
625equiv(L1, L2) when is_list(L1), is_list(L2) ->
626    equiv_list(L1, L2);
627equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
628equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
629equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
630
631%% Object representation and traversal order is unknown.
632%% Use the sledgehammer and sort property lists.
633
634equiv_object(Props1, Props2) ->
635    L1 = lists:keysort(1, Props1),
636    L2 = lists:keysort(1, Props2),
637    Pairs = lists:zip(L1, L2),
638    true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
639                             equiv(K1, K2) and equiv(V1, V2)
640                     end, Pairs).
641
642%% Recursively compare tuple elements for equivalence.
643
644equiv_list([], []) ->
645    true;
646equiv_list([V1 | L1], [V2 | L2]) ->
647    equiv(V1, V2) andalso equiv_list(L1, L2).
648
649decode_test() ->
650    [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
651    <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
652
653e2j_vec_test() ->
654    test_one(e2j_test_vec(utf8), 1).
655
656test_one([], _N) ->
657    %% io:format("~p tests passed~n", [N-1]),
658    ok;
659test_one([{E, J} | Rest], N) ->
660    %% io:format("[~p] ~p ~p~n", [N, E, J]),
661    true = equiv(E, decode(J)),
662    true = equiv(E, decode(encode(E))),
663    test_one(Rest, 1+N).
664
665e2j_test_vec(utf8) ->
666    [
667     {1, "1"},
668     {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
669     {-1, "-1"},
670     {-3.1416, "-3.14160"},
671     {12.0e10, "1.20000e+11"},
672     {1.234E+10, "1.23400e+10"},
673     {-1.234E-10, "-1.23400e-10"},
674     {10.0, "1.0e+01"},
675     {123.456, "1.23456E+2"},
676     {10.0, "1e1"},
677     {<<"foo">>, "\"foo\""},
678     {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
679     {<<"">>, "\"\""},
680     {<<"\n\n\n">>, "\"\\n\\n\\n\""},
681     {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
682     {obj_new(), "{}"},
683     {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
684     {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
685      "{\"foo\":\"bar\",\"baz\":123}"},
686     {[], "[]"},
687     {[[]], "[[]]"},
688     {[1, <<"foo">>], "[1,\"foo\"]"},
689
690     %% json array in a json object
691     {obj_from_list([{<<"foo">>, [123]}]),
692      "{\"foo\":[123]}"},
693
694     %% json object in a json object
695     {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
696      "{\"foo\":{\"bar\":true}}"},
697
698     %% fold evaluation order
699     {obj_from_list([{<<"foo">>, []},
700                     {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
701                     {<<"alice">>, <<"bob">>}]),
702      "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
703
704     %% json object in a json array
705     {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
706      "[-123,\"foo\",{\"bar\":[]},null]"}
707    ].
708
709%% test utf8 encoding
710encoder_utf8_test() ->
711    %% safe conversion case (default)
712    [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
713        encode(<<1,"\321\202\320\265\321\201\321\202">>),
714
715    %% raw utf8 output (optional)
716    Enc = mochijson2:encoder([{utf8, true}]),
717    [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
718        Enc(<<1,"\321\202\320\265\321\201\321\202">>).
719
720input_validation_test() ->
721    Good = [
722        {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
723        {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
724        {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
725    ],
726    lists:foreach(fun({CodePoint, UTF8}) ->
727        Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
728        Expect = decode(UTF8)
729    end, Good),
730
731    Bad = [
732        %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
733        <<?Q, 16#80, ?Q>>,
734        %% missing continuations, last byte in each should be 80-BF
735        <<?Q, 16#C2, 16#7F, ?Q>>,
736        <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
737        <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
738        %% we don't support code points > 10FFFF per RFC 3629
739        <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
740        %% escape characters trigger a different code path
741        <<?Q, $\\, $\n, 16#80, ?Q>>
742    ],
743    lists:foreach(
744      fun(X) ->
745              ok = try decode(X) catch invalid_utf8 -> ok end,
746              %% could be {ucs,{bad_utf8_character_code}} or
747              %%          {json_encode,{bad_char,_}}
748              {'EXIT', _} = (catch encode(X))
749      end, Bad).
750
751inline_json_test() ->
752    ?assertEqual(<<"\"iodata iodata\"">>,
753                 iolist_to_binary(
754                   encode({json, [<<"\"iodata">>, " iodata\""]}))),
755    ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
756                 decode(
757                   encode({struct,
758                           [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
759    ok.
760
761big_unicode_test() ->
762    UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
763    ?assertEqual(
764       <<"\"\\ud834\\udd20\"">>,
765       iolist_to_binary(encode(UTF8Seq))),
766    ?assertEqual(
767       UTF8Seq,
768       decode(iolist_to_binary(encode(UTF8Seq)))),
769    ok.
770
771custom_decoder_test() ->
772    ?assertEqual(
773       {struct, [{<<"key">>, <<"value">>}]},
774       (decoder([]))("{\"key\": \"value\"}")),
775    F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
776    ?assertEqual(
777       win,
778       (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
779    ok.
780
781atom_test() ->
782    %% JSON native atoms
783    [begin
784         ?assertEqual(A, decode(atom_to_list(A))),
785         ?assertEqual(iolist_to_binary(atom_to_list(A)),
786                      iolist_to_binary(encode(A)))
787     end || A <- [true, false, null]],
788    %% Atom to string
789    ?assertEqual(
790       <<"\"foo\"">>,
791       iolist_to_binary(encode(foo))),
792    ?assertEqual(
793       <<"\"\\ud834\\udd20\"">>,
794       iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
795    ok.
796
797key_encode_test() ->
798    %% Some forms are accepted as keys that would not be strings in other
799    %% cases
800    ?assertEqual(
801       <<"{\"foo\":1}">>,
802       iolist_to_binary(encode({struct, [{foo, 1}]}))),
803    ?assertEqual(
804       <<"{\"foo\":1}">>,
805       iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
806    ?assertEqual(
807       <<"{\"foo\":1}">>,
808       iolist_to_binary(encode({struct, [{"foo", 1}]}))),
809	?assertEqual(
810       <<"{\"foo\":1}">>,
811       iolist_to_binary(encode([{foo, 1}]))),
812    ?assertEqual(
813       <<"{\"foo\":1}">>,
814       iolist_to_binary(encode([{<<"foo">>, 1}]))),
815    ?assertEqual(
816       <<"{\"foo\":1}">>,
817       iolist_to_binary(encode([{"foo", 1}]))),
818    ?assertEqual(
819       <<"{\"\\ud834\\udd20\":1}">>,
820       iolist_to_binary(
821         encode({struct, [{[16#0001d120], 1}]}))),
822    ?assertEqual(
823       <<"{\"1\":1}">>,
824       iolist_to_binary(encode({struct, [{1, 1}]}))),
825    ok.
826
827unsafe_chars_test() ->
828    Chars = "\"\\\b\f\n\r\t",
829    [begin
830         ?assertEqual(false, json_string_is_safe([C])),
831         ?assertEqual(false, json_bin_is_safe(<<C>>)),
832         ?assertEqual(<<C>>, decode(encode(<<C>>)))
833     end || C <- Chars],
834    ?assertEqual(
835       false,
836       json_string_is_safe([16#0001d120])),
837    ?assertEqual(
838       false,
839       json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
840    ?assertEqual(
841       [16#0001d120],
842       xmerl_ucs:from_utf8(
843         binary_to_list(
844           decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
845    ?assertEqual(
846       false,
847       json_string_is_safe([16#110000])),
848    ?assertEqual(
849       false,
850       json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
851    %% solidus can be escaped but isn't unsafe by default
852    ?assertEqual(
853       <<"/">>,
854       decode(<<"\"\\/\"">>)),
855    ok.
856
857int_test() ->
858    ?assertEqual(0, decode("0")),
859    ?assertEqual(1, decode("1")),
860    ?assertEqual(11, decode("11")),
861    ok.
862
863large_int_test() ->
864    ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
865        iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
866    ?assertEqual(<<"2147483649214748364921474836492147483649">>,
867        iolist_to_binary(encode(2147483649214748364921474836492147483649))),
868    ok.
869
870float_test() ->
871    ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
872    ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
873    ok.
874
875handler_test() ->
876    ?assertEqual(
877       {'EXIT',{json_encode,{bad_term,{x,y}}}},
878       catch encode({x,y})),
879    F = fun ({x,y}) -> [] end,
880    ?assertEqual(
881       <<"[]">>,
882       iolist_to_binary((encoder([{handler, F}]))({x, y}))),
883    ok.
884
885encode_empty_test_() ->
886    [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))}
887     || {A, B} <- [{"eep18 {}", {}},
888                   {"eep18 {[]}", {[]}},
889                   {"{struct, []}", {struct, []}}]].
890
891encode_test_() ->
892    P = [{<<"k">>, <<"v">>}],
893    JSON = iolist_to_binary(encode({struct, P})),
894    [{atom_to_list(F),
895      ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))}
896     || F <- [struct, eep18, proplist]].
897
898format_test_() ->
899    P = [{<<"k">>, <<"v">>}],
900    JSON = iolist_to_binary(encode({struct, P})),
901    [{atom_to_list(F),
902      ?_assertEqual(A, decode(JSON, [{format, F}]))}
903     || {F, A} <- [{struct, {struct, P}},
904                   {eep18, {P}},
905                   {proplist, P}]].
906
907-endif.