PageRenderTime 379ms CodeModel.GetById 80ms app.highlight 276ms RepoModel.GetById 17ms app.codeStats 0ms

/ucengine/src/lib/mochijson/mochijson.erl

http://github.com/AF83/ucengine
Erlang | 403 lines | 323 code | 35 blank | 45 comment | 10 complexity | eff3ed5b5d4a91d3cf679e8971ecff67 MD5 | raw file
  1%% @author Bob Ippolito <bob@mochimedia.com>
  2%% @copyright 2006 Mochi Media, Inc.
  3
  4%% @doc Yet another JSON (RFC 4627) library for Erlang.
  5-module(mochijson).
  6-author('bob@mochimedia.com').
  7-export([encoder/1, encode/1]).
  8-export([decoder/1, decode/1]).
  9-export([binary_encoder/1, binary_encode/1]).
 10-export([binary_decoder/1, binary_decode/1]).
 11
 12% This is a macro to placate syntax highlighters..
 13-define(Q, $\").
 14-define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}).
 15-define(INC_COL(S), S#decoder{column=1+S#decoder.column}).
 16-define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}).
 17
 18%% @type iolist() = [char() | binary() | iolist()]
 19%% @type iodata() = iolist() | binary()
 20%% @type json_string() = atom | string() | binary()
 21%% @type json_number() = integer() | float()
 22%% @type json_array() = {array, [json_term()]}
 23%% @type json_object() = {struct, [{json_string(), json_term()}]}
 24%% @type json_term() = json_string() | json_number() | json_array() |
 25%%                     json_object()
 26%% @type encoding() = utf8 | unicode
 27%% @type encoder_option() = {input_encoding, encoding()} |
 28%%                          {handler, function()}
 29%% @type decoder_option() = {input_encoding, encoding()} |
 30%%                          {object_hook, function()}
 31%% @type bjson_string() = binary()
 32%% @type bjson_number() = integer() | float()
 33%% @type bjson_array() = [bjson_term()]
 34%% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]}
 35%% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() |
 36%%                      bjson_object()
 37%% @type binary_encoder_option() = {handler, function()}
 38%% @type binary_decoder_option() = {object_hook, function()}
 39
 40-record(encoder, {input_encoding=unicode,
 41                  handler=null}).
 42
 43-record(decoder, {input_encoding=utf8,
 44                  object_hook=null,
 45                  line=1,
 46                  column=1,
 47                  state=null}).
 48
 49%% @spec encoder([encoder_option()]) -> function()
 50%% @doc Create an encoder/1 with the given options.
 51encoder(Options) ->
 52    State = parse_encoder_options(Options, #encoder{}),
 53    fun (O) -> json_encode(O, State) end.
 54
 55%% @spec encode(json_term()) -> iolist()
 56%% @doc Encode the given as JSON to an iolist.
 57encode(Any) ->
 58    json_encode(Any, #encoder{}).
 59
 60%% @spec decoder([decoder_option()]) -> function()
 61%% @doc Create a decoder/1 with the given options.
 62decoder(Options) ->
 63    State = parse_decoder_options(Options, #decoder{}),
 64    fun (O) -> json_decode(O, State) end.
 65
 66%% @spec decode(iolist()) -> json_term()
 67%% @doc Decode the given iolist to Erlang terms.
 68decode(S) ->
 69    json_decode(S, #decoder{}).
 70
 71%% @spec binary_decoder([binary_decoder_option()]) -> function()
 72%% @doc Create a binary_decoder/1 with the given options.
 73binary_decoder(Options) ->
 74    mochijson2:decoder(Options).
 75
 76%% @spec binary_encoder([binary_encoder_option()]) -> function()
 77%% @doc Create a binary_encoder/1 with the given options.
 78binary_encoder(Options) ->
 79    mochijson2:encoder(Options).
 80
 81%% @spec binary_encode(bjson_term()) -> iolist()
 82%% @doc Encode the given as JSON to an iolist, using lists for arrays and
 83%%      binaries for strings.
 84binary_encode(Any) ->
 85    mochijson2:encode(Any).
 86
 87%% @spec binary_decode(iolist()) -> bjson_term()
 88%% @doc Decode the given iolist to Erlang terms, using lists for arrays and
 89%%      binaries for strings.
 90binary_decode(S) ->
 91    mochijson2:decode(S).
 92
 93%% Internal API
 94
 95parse_encoder_options([], State) ->
 96    State;
 97parse_encoder_options([{input_encoding, Encoding} | Rest], State) ->
 98    parse_encoder_options(Rest, State#encoder{input_encoding=Encoding});
 99parse_encoder_options([{handler, Handler} | Rest], State) ->
100    parse_encoder_options(Rest, State#encoder{handler=Handler}).
101
102parse_decoder_options([], State) ->
103    State;
104parse_decoder_options([{input_encoding, Encoding} | Rest], State) ->
105    parse_decoder_options(Rest, State#decoder{input_encoding=Encoding});
106parse_decoder_options([{object_hook, Hook} | Rest], State) ->
107    parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
108
109json_encode(true, _State) ->
110    "true";
111json_encode(false, _State) ->
112    "false";
113json_encode(null, _State) ->
114    "null";
115json_encode(I, _State) when is_integer(I) ->
116    integer_to_list(I);
117json_encode(F, _State) when is_float(F) ->
118    mochinum:digits(F);
119json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) ->
120    json_encode_string(L, State);
121json_encode({array, Props}, State) when is_list(Props) ->
122    json_encode_array(Props, State);
123json_encode({struct, Props}, State) when is_list(Props) ->
124    json_encode_proplist(Props, State);
125json_encode(Bad, #encoder{handler=null}) ->
126    exit({json_encode, {bad_term, Bad}});
127json_encode(Bad, State=#encoder{handler=Handler}) ->
128    json_encode(Handler(Bad), State).
129
130json_encode_array([], _State) ->
131    "[]";
132json_encode_array(L, State) ->
133    F = fun (O, Acc) ->
134                [$,, json_encode(O, State) | Acc]
135        end,
136    [$, | Acc1] = lists:foldl(F, "[", L),
137    lists:reverse([$\] | Acc1]).
138
139json_encode_proplist([], _State) ->
140    "{}";
141json_encode_proplist(Props, State) ->
142    F = fun ({K, V}, Acc) ->
143                KS = case K of
144                         K when is_atom(K) ->
145                             json_encode_string_utf8(atom_to_list(K));
146                         K when is_integer(K) ->
147                             json_encode_string(integer_to_list(K), State);
148                         K when is_list(K); is_binary(K) ->
149                             json_encode_string(K, State)
150                     end,
151                VS = json_encode(V, State),
152                [$,, VS, $:, KS | Acc]
153        end,
154    [$, | Acc1] = lists:foldl(F, "{", Props),
155    lists:reverse([$\} | Acc1]).
156
157json_encode_string(A, _State) when is_atom(A) ->
158    json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)));
159json_encode_string(B, _State) when is_binary(B) ->
160    json_encode_string_unicode(xmerl_ucs:from_utf8(B));
161json_encode_string(S, #encoder{input_encoding=utf8}) ->
162    json_encode_string_utf8(S);
163json_encode_string(S, #encoder{input_encoding=unicode}) ->
164    json_encode_string_unicode(S).
165
166json_encode_string_utf8(S) ->
167    [?Q | json_encode_string_utf8_1(S)].
168
169json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f ->
170    NewC = case C of
171               $\\ -> "\\\\";
172               ?Q -> "\\\"";
173               _ when C >= $\s, C < 16#7f -> C;
174               $\t -> "\\t";
175               $\n -> "\\n";
176               $\r -> "\\r";
177               $\f -> "\\f";
178               $\b -> "\\b";
179               _ when C >= 0, C =< 16#7f -> unihex(C);
180               _ -> exit({json_encode, {bad_char, C}})
181           end,
182    [NewC | json_encode_string_utf8_1(Cs)];
183json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF ->
184    [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)),
185    Rest;
186json_encode_string_utf8_1([]) ->
187    "\"".
188
189json_encode_string_unicode(S) ->
190    [?Q | json_encode_string_unicode_1(S)].
191
192json_encode_string_unicode_1([C | Cs]) ->
193    NewC = case C of
194               $\\ -> "\\\\";
195               ?Q -> "\\\"";
196               _ when C >= $\s, C < 16#7f -> C;
197               $\t -> "\\t";
198               $\n -> "\\n";
199               $\r -> "\\r";
200               $\f -> "\\f";
201               $\b -> "\\b";
202               _ when C >= 0, C =< 16#10FFFF -> unihex(C);
203               _ -> exit({json_encode, {bad_char, C}})
204           end,
205    [NewC | json_encode_string_unicode_1(Cs)];
206json_encode_string_unicode_1([]) ->
207    "\"".
208
209dehex(C) when C >= $0, C =< $9 ->
210    C - $0;
211dehex(C) when C >= $a, C =< $f ->
212    C - $a + 10;
213dehex(C) when C >= $A, C =< $F ->
214    C - $A + 10.
215
216hexdigit(C) when C >= 0, C =< 9 ->
217    C + $0;
218hexdigit(C) when C =< 15 ->
219    C + $a - 10.
220
221unihex(C) when C < 16#10000 ->
222    <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
223    Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
224    [$\\, $u | Digits];
225unihex(C) when C =< 16#10FFFF ->
226    N = C - 16#10000,
227    S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
228    S2 = 16#dc00 bor (N band 16#3ff),
229    [unihex(S1), unihex(S2)].
230
231json_decode(B, S) when is_binary(B) ->
232    json_decode(binary_to_list(B), S);
233json_decode(L, S) ->
234    {Res, L1, S1} = decode1(L, S),
235    {eof, [], _} = tokenize(L1, S1#decoder{state=trim}),
236    Res.
237
238decode1(L, S=#decoder{state=null}) ->
239    case tokenize(L, S#decoder{state=any}) of
240        {{const, C}, L1, S1} ->
241            {C, L1, S1};
242        {start_array, L1, S1} ->
243            decode_array(L1, S1#decoder{state=any}, []);
244        {start_object, L1, S1} ->
245            decode_object(L1, S1#decoder{state=key}, [])
246    end.
247
248make_object(V, #decoder{object_hook=null}) ->
249    V;
250make_object(V, #decoder{object_hook=Hook}) ->
251    Hook(V).
252
253decode_object(L, S=#decoder{state=key}, Acc) ->
254    case tokenize(L, S) of
255        {end_object, Rest, S1} ->
256            V = make_object({struct, lists:reverse(Acc)}, S1),
257            {V, Rest, S1#decoder{state=null}};
258        {{const, K}, Rest, S1} when is_list(K) ->
259            {colon, L2, S2} = tokenize(Rest, S1),
260            {V, L3, S3} = decode1(L2, S2#decoder{state=null}),
261            decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc])
262    end;
263decode_object(L, S=#decoder{state=comma}, Acc) ->
264    case tokenize(L, S) of
265        {end_object, Rest, S1} ->
266            V = make_object({struct, lists:reverse(Acc)}, S1),
267            {V, Rest, S1#decoder{state=null}};
268        {comma, Rest, S1} ->
269            decode_object(Rest, S1#decoder{state=key}, Acc)
270    end.
271
272decode_array(L, S=#decoder{state=any}, Acc) ->
273    case tokenize(L, S) of
274        {end_array, Rest, S1} ->
275            {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
276        {start_array, Rest, S1} ->
277            {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []),
278            decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
279        {start_object, Rest, S1} ->
280            {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []),
281            decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
282        {{const, Const}, Rest, S1} ->
283            decode_array(Rest, S1#decoder{state=comma}, [Const | Acc])
284    end;
285decode_array(L, S=#decoder{state=comma}, Acc) ->
286    case tokenize(L, S) of
287        {end_array, Rest, S1} ->
288            {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
289        {comma, Rest, S1} ->
290            decode_array(Rest, S1#decoder{state=any}, Acc)
291    end.
292
293tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc)
294  when is_list(C); is_binary(C); C >= 16#7f ->
295    List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
296    tokenize_string(List, S#decoder{input_encoding=unicode}, Acc);
297tokenize_string("\"" ++ Rest, S, Acc) ->
298    {lists:reverse(Acc), Rest, ?INC_COL(S)};
299tokenize_string("\\\"" ++ Rest, S, Acc) ->
300    tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]);
301tokenize_string("\\\\" ++ Rest, S, Acc) ->
302    tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]);
303tokenize_string("\\/" ++ Rest, S, Acc) ->
304    tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]);
305tokenize_string("\\b" ++ Rest, S, Acc) ->
306    tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]);
307tokenize_string("\\f" ++ Rest, S, Acc) ->
308    tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]);
309tokenize_string("\\n" ++ Rest, S, Acc) ->
310    tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]);
311tokenize_string("\\r" ++ Rest, S, Acc) ->
312    tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]);
313tokenize_string("\\t" ++ Rest, S, Acc) ->
314    tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]);
315tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) ->
316    % coalesce UTF-16 surrogate pair?
317    C = dehex(C0) bor
318        (dehex(C1) bsl 4) bor
319        (dehex(C2) bsl 8) bor
320        (dehex(C3) bsl 12),
321    tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]);
322tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF ->
323    tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]).
324
325tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc)
326  when is_list(C); is_binary(C); C >= 16#7f ->
327    List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
328    tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc);
329tokenize_number([$- | Rest], sign, S, []) ->
330    tokenize_number(Rest, int, ?INC_COL(S), [$-]);
331tokenize_number(Rest, sign, S, []) ->
332    tokenize_number(Rest, int, S, []);
333tokenize_number([$0 | Rest], int, S, Acc) ->
334    tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]);
335tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 ->
336    tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
337tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 ->
338    tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
339tokenize_number(Rest, int1, S, Acc) ->
340    tokenize_number(Rest, frac, S, Acc);
341tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 ->
342    tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
343tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E ->
344    tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
345tokenize_number(Rest, frac, S, Acc) ->
346    {{int, lists:reverse(Acc)}, Rest, S};
347tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 ->
348    tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]);
349tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E ->
350    tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]);
351tokenize_number(Rest, frac1, S, Acc) ->
352    {{float, lists:reverse(Acc)}, Rest, S};
353tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ ->
354    tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]);
355tokenize_number(Rest, esign, S, Acc) ->
356    tokenize_number(Rest, eint, S, Acc);
357tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 ->
358    tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
359tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 ->
360    tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
361tokenize_number(Rest, eint1, S, Acc) ->
362    {{float, lists:reverse(Acc)}, Rest, S}.
363
364tokenize([], S=#decoder{state=trim}) ->
365    {eof, [], S};
366tokenize([L | Rest], S) when is_list(L) ->
367    tokenize(L ++ Rest, S);
368tokenize([B | Rest], S) when is_binary(B) ->
369    tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S);
370tokenize("\r\n" ++ Rest, S) ->
371    tokenize(Rest, ?INC_LINE(S));
372tokenize("\n" ++ Rest, S) ->
373    tokenize(Rest, ?INC_LINE(S));
374tokenize([C | Rest], S) when C == $\s; C == $\t ->
375    tokenize(Rest, ?INC_COL(S));
376tokenize("{" ++ Rest, S) ->
377    {start_object, Rest, ?INC_COL(S)};
378tokenize("}" ++ Rest, S) ->
379    {end_object, Rest, ?INC_COL(S)};
380tokenize("[" ++ Rest, S) ->
381    {start_array, Rest, ?INC_COL(S)};
382tokenize("]" ++ Rest, S) ->
383    {end_array, Rest, ?INC_COL(S)};
384tokenize("," ++ Rest, S) ->
385    {comma, Rest, ?INC_COL(S)};
386tokenize(":" ++ Rest, S) ->
387    {colon, Rest, ?INC_COL(S)};
388tokenize("null" ++ Rest, S) ->
389    {{const, null}, Rest, ?ADV_COL(S, 4)};
390tokenize("true" ++ Rest, S) ->
391    {{const, true}, Rest, ?ADV_COL(S, 4)};
392tokenize("false" ++ Rest, S) ->
393    {{const, false}, Rest, ?ADV_COL(S, 5)};
394tokenize("\"" ++ Rest, S) ->
395    {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []),
396    {{const, String}, Rest1, S1};
397tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- ->
398    case tokenize_number(L, sign, S, []) of
399        {{int, Int}, Rest, S1} ->
400            {{const, list_to_integer(Int)}, Rest, S1};
401        {{float, Float}, Rest, S1} ->
402            {{const, list_to_float(Float)}, Rest, S1}
403    end.