/ucengine/src/lib/mochijson/mochijson.erl
Erlang | 403 lines | 323 code | 35 blank | 45 comment | 10 complexity | eff3ed5b5d4a91d3cf679e8971ecff67 MD5 | raw file
1%% @author Bob Ippolito <bob@mochimedia.com> 2%% @copyright 2006 Mochi Media, Inc. 3 4%% @doc Yet another JSON (RFC 4627) library for Erlang. 5-module(mochijson). 6-author('bob@mochimedia.com'). 7-export([encoder/1, encode/1]). 8-export([decoder/1, decode/1]). 9-export([binary_encoder/1, binary_encode/1]). 10-export([binary_decoder/1, binary_decode/1]). 11 12% This is a macro to placate syntax highlighters.. 13-define(Q, $\"). 14-define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}). 15-define(INC_COL(S), S#decoder{column=1+S#decoder.column}). 16-define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}). 17 18%% @type iolist() = [char() | binary() | iolist()] 19%% @type iodata() = iolist() | binary() 20%% @type json_string() = atom | string() | binary() 21%% @type json_number() = integer() | float() 22%% @type json_array() = {array, [json_term()]} 23%% @type json_object() = {struct, [{json_string(), json_term()}]} 24%% @type json_term() = json_string() | json_number() | json_array() | 25%% json_object() 26%% @type encoding() = utf8 | unicode 27%% @type encoder_option() = {input_encoding, encoding()} | 28%% {handler, function()} 29%% @type decoder_option() = {input_encoding, encoding()} | 30%% {object_hook, function()} 31%% @type bjson_string() = binary() 32%% @type bjson_number() = integer() | float() 33%% @type bjson_array() = [bjson_term()] 34%% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]} 35%% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() | 36%% bjson_object() 37%% @type binary_encoder_option() = {handler, function()} 38%% @type binary_decoder_option() = {object_hook, function()} 39 40-record(encoder, {input_encoding=unicode, 41 handler=null}). 42 43-record(decoder, {input_encoding=utf8, 44 object_hook=null, 45 line=1, 46 column=1, 47 state=null}). 48 49%% @spec encoder([encoder_option()]) -> function() 50%% @doc Create an encoder/1 with the given options. 51encoder(Options) -> 52 State = parse_encoder_options(Options, #encoder{}), 53 fun (O) -> json_encode(O, State) end. 54 55%% @spec encode(json_term()) -> iolist() 56%% @doc Encode the given as JSON to an iolist. 57encode(Any) -> 58 json_encode(Any, #encoder{}). 59 60%% @spec decoder([decoder_option()]) -> function() 61%% @doc Create a decoder/1 with the given options. 62decoder(Options) -> 63 State = parse_decoder_options(Options, #decoder{}), 64 fun (O) -> json_decode(O, State) end. 65 66%% @spec decode(iolist()) -> json_term() 67%% @doc Decode the given iolist to Erlang terms. 68decode(S) -> 69 json_decode(S, #decoder{}). 70 71%% @spec binary_decoder([binary_decoder_option()]) -> function() 72%% @doc Create a binary_decoder/1 with the given options. 73binary_decoder(Options) -> 74 mochijson2:decoder(Options). 75 76%% @spec binary_encoder([binary_encoder_option()]) -> function() 77%% @doc Create a binary_encoder/1 with the given options. 78binary_encoder(Options) -> 79 mochijson2:encoder(Options). 80 81%% @spec binary_encode(bjson_term()) -> iolist() 82%% @doc Encode the given as JSON to an iolist, using lists for arrays and 83%% binaries for strings. 84binary_encode(Any) -> 85 mochijson2:encode(Any). 86 87%% @spec binary_decode(iolist()) -> bjson_term() 88%% @doc Decode the given iolist to Erlang terms, using lists for arrays and 89%% binaries for strings. 90binary_decode(S) -> 91 mochijson2:decode(S). 92 93%% Internal API 94 95parse_encoder_options([], State) -> 96 State; 97parse_encoder_options([{input_encoding, Encoding} | Rest], State) -> 98 parse_encoder_options(Rest, State#encoder{input_encoding=Encoding}); 99parse_encoder_options([{handler, Handler} | Rest], State) -> 100 parse_encoder_options(Rest, State#encoder{handler=Handler}). 101 102parse_decoder_options([], State) -> 103 State; 104parse_decoder_options([{input_encoding, Encoding} | Rest], State) -> 105 parse_decoder_options(Rest, State#decoder{input_encoding=Encoding}); 106parse_decoder_options([{object_hook, Hook} | Rest], State) -> 107 parse_decoder_options(Rest, State#decoder{object_hook=Hook}). 108 109json_encode(true, _State) -> 110 "true"; 111json_encode(false, _State) -> 112 "false"; 113json_encode(null, _State) -> 114 "null"; 115json_encode(I, _State) when is_integer(I) -> 116 integer_to_list(I); 117json_encode(F, _State) when is_float(F) -> 118 mochinum:digits(F); 119json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) -> 120 json_encode_string(L, State); 121json_encode({array, Props}, State) when is_list(Props) -> 122 json_encode_array(Props, State); 123json_encode({struct, Props}, State) when is_list(Props) -> 124 json_encode_proplist(Props, State); 125json_encode(Bad, #encoder{handler=null}) -> 126 exit({json_encode, {bad_term, Bad}}); 127json_encode(Bad, State=#encoder{handler=Handler}) -> 128 json_encode(Handler(Bad), State). 129 130json_encode_array([], _State) -> 131 "[]"; 132json_encode_array(L, State) -> 133 F = fun (O, Acc) -> 134 [$,, json_encode(O, State) | Acc] 135 end, 136 [$, | Acc1] = lists:foldl(F, "[", L), 137 lists:reverse([$\] | Acc1]). 138 139json_encode_proplist([], _State) -> 140 "{}"; 141json_encode_proplist(Props, State) -> 142 F = fun ({K, V}, Acc) -> 143 KS = case K of 144 K when is_atom(K) -> 145 json_encode_string_utf8(atom_to_list(K)); 146 K when is_integer(K) -> 147 json_encode_string(integer_to_list(K), State); 148 K when is_list(K); is_binary(K) -> 149 json_encode_string(K, State) 150 end, 151 VS = json_encode(V, State), 152 [$,, VS, $:, KS | Acc] 153 end, 154 [$, | Acc1] = lists:foldl(F, "{", Props), 155 lists:reverse([$\} | Acc1]). 156 157json_encode_string(A, _State) when is_atom(A) -> 158 json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A))); 159json_encode_string(B, _State) when is_binary(B) -> 160 json_encode_string_unicode(xmerl_ucs:from_utf8(B)); 161json_encode_string(S, #encoder{input_encoding=utf8}) -> 162 json_encode_string_utf8(S); 163json_encode_string(S, #encoder{input_encoding=unicode}) -> 164 json_encode_string_unicode(S). 165 166json_encode_string_utf8(S) -> 167 [?Q | json_encode_string_utf8_1(S)]. 168 169json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f -> 170 NewC = case C of 171 $\\ -> "\\\\"; 172 ?Q -> "\\\""; 173 _ when C >= $\s, C < 16#7f -> C; 174 $\t -> "\\t"; 175 $\n -> "\\n"; 176 $\r -> "\\r"; 177 $\f -> "\\f"; 178 $\b -> "\\b"; 179 _ when C >= 0, C =< 16#7f -> unihex(C); 180 _ -> exit({json_encode, {bad_char, C}}) 181 end, 182 [NewC | json_encode_string_utf8_1(Cs)]; 183json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF -> 184 [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)), 185 Rest; 186json_encode_string_utf8_1([]) -> 187 "\"". 188 189json_encode_string_unicode(S) -> 190 [?Q | json_encode_string_unicode_1(S)]. 191 192json_encode_string_unicode_1([C | Cs]) -> 193 NewC = case C of 194 $\\ -> "\\\\"; 195 ?Q -> "\\\""; 196 _ when C >= $\s, C < 16#7f -> C; 197 $\t -> "\\t"; 198 $\n -> "\\n"; 199 $\r -> "\\r"; 200 $\f -> "\\f"; 201 $\b -> "\\b"; 202 _ when C >= 0, C =< 16#10FFFF -> unihex(C); 203 _ -> exit({json_encode, {bad_char, C}}) 204 end, 205 [NewC | json_encode_string_unicode_1(Cs)]; 206json_encode_string_unicode_1([]) -> 207 "\"". 208 209dehex(C) when C >= $0, C =< $9 -> 210 C - $0; 211dehex(C) when C >= $a, C =< $f -> 212 C - $a + 10; 213dehex(C) when C >= $A, C =< $F -> 214 C - $A + 10. 215 216hexdigit(C) when C >= 0, C =< 9 -> 217 C + $0; 218hexdigit(C) when C =< 15 -> 219 C + $a - 10. 220 221unihex(C) when C < 16#10000 -> 222 <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, 223 Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], 224 [$\\, $u | Digits]; 225unihex(C) when C =< 16#10FFFF -> 226 N = C - 16#10000, 227 S1 = 16#d800 bor ((N bsr 10) band 16#3ff), 228 S2 = 16#dc00 bor (N band 16#3ff), 229 [unihex(S1), unihex(S2)]. 230 231json_decode(B, S) when is_binary(B) -> 232 json_decode(binary_to_list(B), S); 233json_decode(L, S) -> 234 {Res, L1, S1} = decode1(L, S), 235 {eof, [], _} = tokenize(L1, S1#decoder{state=trim}), 236 Res. 237 238decode1(L, S=#decoder{state=null}) -> 239 case tokenize(L, S#decoder{state=any}) of 240 {{const, C}, L1, S1} -> 241 {C, L1, S1}; 242 {start_array, L1, S1} -> 243 decode_array(L1, S1#decoder{state=any}, []); 244 {start_object, L1, S1} -> 245 decode_object(L1, S1#decoder{state=key}, []) 246 end. 247 248make_object(V, #decoder{object_hook=null}) -> 249 V; 250make_object(V, #decoder{object_hook=Hook}) -> 251 Hook(V). 252 253decode_object(L, S=#decoder{state=key}, Acc) -> 254 case tokenize(L, S) of 255 {end_object, Rest, S1} -> 256 V = make_object({struct, lists:reverse(Acc)}, S1), 257 {V, Rest, S1#decoder{state=null}}; 258 {{const, K}, Rest, S1} when is_list(K) -> 259 {colon, L2, S2} = tokenize(Rest, S1), 260 {V, L3, S3} = decode1(L2, S2#decoder{state=null}), 261 decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc]) 262 end; 263decode_object(L, S=#decoder{state=comma}, Acc) -> 264 case tokenize(L, S) of 265 {end_object, Rest, S1} -> 266 V = make_object({struct, lists:reverse(Acc)}, S1), 267 {V, Rest, S1#decoder{state=null}}; 268 {comma, Rest, S1} -> 269 decode_object(Rest, S1#decoder{state=key}, Acc) 270 end. 271 272decode_array(L, S=#decoder{state=any}, Acc) -> 273 case tokenize(L, S) of 274 {end_array, Rest, S1} -> 275 {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}}; 276 {start_array, Rest, S1} -> 277 {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []), 278 decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); 279 {start_object, Rest, S1} -> 280 {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []), 281 decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); 282 {{const, Const}, Rest, S1} -> 283 decode_array(Rest, S1#decoder{state=comma}, [Const | Acc]) 284 end; 285decode_array(L, S=#decoder{state=comma}, Acc) -> 286 case tokenize(L, S) of 287 {end_array, Rest, S1} -> 288 {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}}; 289 {comma, Rest, S1} -> 290 decode_array(Rest, S1#decoder{state=any}, Acc) 291 end. 292 293tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc) 294 when is_list(C); is_binary(C); C >= 16#7f -> 295 List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), 296 tokenize_string(List, S#decoder{input_encoding=unicode}, Acc); 297tokenize_string("\"" ++ Rest, S, Acc) -> 298 {lists:reverse(Acc), Rest, ?INC_COL(S)}; 299tokenize_string("\\\"" ++ Rest, S, Acc) -> 300 tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]); 301tokenize_string("\\\\" ++ Rest, S, Acc) -> 302 tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]); 303tokenize_string("\\/" ++ Rest, S, Acc) -> 304 tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]); 305tokenize_string("\\b" ++ Rest, S, Acc) -> 306 tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]); 307tokenize_string("\\f" ++ Rest, S, Acc) -> 308 tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]); 309tokenize_string("\\n" ++ Rest, S, Acc) -> 310 tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]); 311tokenize_string("\\r" ++ Rest, S, Acc) -> 312 tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]); 313tokenize_string("\\t" ++ Rest, S, Acc) -> 314 tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]); 315tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) -> 316 % coalesce UTF-16 surrogate pair? 317 C = dehex(C0) bor 318 (dehex(C1) bsl 4) bor 319 (dehex(C2) bsl 8) bor 320 (dehex(C3) bsl 12), 321 tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]); 322tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF -> 323 tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]). 324 325tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc) 326 when is_list(C); is_binary(C); C >= 16#7f -> 327 List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), 328 tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc); 329tokenize_number([$- | Rest], sign, S, []) -> 330 tokenize_number(Rest, int, ?INC_COL(S), [$-]); 331tokenize_number(Rest, sign, S, []) -> 332 tokenize_number(Rest, int, S, []); 333tokenize_number([$0 | Rest], int, S, Acc) -> 334 tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]); 335tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 -> 336 tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); 337tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 -> 338 tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); 339tokenize_number(Rest, int1, S, Acc) -> 340 tokenize_number(Rest, frac, S, Acc); 341tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 -> 342 tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); 343tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E -> 344 tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]); 345tokenize_number(Rest, frac, S, Acc) -> 346 {{int, lists:reverse(Acc)}, Rest, S}; 347tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 -> 348 tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]); 349tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E -> 350 tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]); 351tokenize_number(Rest, frac1, S, Acc) -> 352 {{float, lists:reverse(Acc)}, Rest, S}; 353tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ -> 354 tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]); 355tokenize_number(Rest, esign, S, Acc) -> 356 tokenize_number(Rest, eint, S, Acc); 357tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 -> 358 tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); 359tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 -> 360 tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); 361tokenize_number(Rest, eint1, S, Acc) -> 362 {{float, lists:reverse(Acc)}, Rest, S}. 363 364tokenize([], S=#decoder{state=trim}) -> 365 {eof, [], S}; 366tokenize([L | Rest], S) when is_list(L) -> 367 tokenize(L ++ Rest, S); 368tokenize([B | Rest], S) when is_binary(B) -> 369 tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S); 370tokenize("\r\n" ++ Rest, S) -> 371 tokenize(Rest, ?INC_LINE(S)); 372tokenize("\n" ++ Rest, S) -> 373 tokenize(Rest, ?INC_LINE(S)); 374tokenize([C | Rest], S) when C == $\s; C == $\t -> 375 tokenize(Rest, ?INC_COL(S)); 376tokenize("{" ++ Rest, S) -> 377 {start_object, Rest, ?INC_COL(S)}; 378tokenize("}" ++ Rest, S) -> 379 {end_object, Rest, ?INC_COL(S)}; 380tokenize("[" ++ Rest, S) -> 381 {start_array, Rest, ?INC_COL(S)}; 382tokenize("]" ++ Rest, S) -> 383 {end_array, Rest, ?INC_COL(S)}; 384tokenize("," ++ Rest, S) -> 385 {comma, Rest, ?INC_COL(S)}; 386tokenize(":" ++ Rest, S) -> 387 {colon, Rest, ?INC_COL(S)}; 388tokenize("null" ++ Rest, S) -> 389 {{const, null}, Rest, ?ADV_COL(S, 4)}; 390tokenize("true" ++ Rest, S) -> 391 {{const, true}, Rest, ?ADV_COL(S, 4)}; 392tokenize("false" ++ Rest, S) -> 393 {{const, false}, Rest, ?ADV_COL(S, 5)}; 394tokenize("\"" ++ Rest, S) -> 395 {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []), 396 {{const, String}, Rest1, S1}; 397tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- -> 398 case tokenize_number(L, sign, S, []) of 399 {{int, Int}, Rest, S1} -> 400 {{const, list_to_integer(Int)}, Rest, S1}; 401 {{float, Float}, Rest, S1} -> 402 {{const, list_to_float(Float)}, Rest, S1} 403 end.