PageRenderTime 125ms CodeModel.GetById 35ms RepoModel.GetById 1ms app.codeStats 1ms

/ucengine/src/lib/mochijson/mochijson.erl

http://github.com/AF83/ucengine
Erlang | 403 lines | 323 code | 35 blank | 45 comment | 10 complexity | eff3ed5b5d4a91d3cf679e8971ecff67 MD5 | raw file
  1. %% @author Bob Ippolito <bob@mochimedia.com>
  2. %% @copyright 2006 Mochi Media, Inc.
  3. %% @doc Yet another JSON (RFC 4627) library for Erlang.
  4. -module(mochijson).
  5. -author('bob@mochimedia.com').
  6. -export([encoder/1, encode/1]).
  7. -export([decoder/1, decode/1]).
  8. -export([binary_encoder/1, binary_encode/1]).
  9. -export([binary_decoder/1, binary_decode/1]).
  10. % This is a macro to placate syntax highlighters..
  11. -define(Q, $\").
  12. -define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}).
  13. -define(INC_COL(S), S#decoder{column=1+S#decoder.column}).
  14. -define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}).
  15. %% @type iolist() = [char() | binary() | iolist()]
  16. %% @type iodata() = iolist() | binary()
  17. %% @type json_string() = atom | string() | binary()
  18. %% @type json_number() = integer() | float()
  19. %% @type json_array() = {array, [json_term()]}
  20. %% @type json_object() = {struct, [{json_string(), json_term()}]}
  21. %% @type json_term() = json_string() | json_number() | json_array() |
  22. %% json_object()
  23. %% @type encoding() = utf8 | unicode
  24. %% @type encoder_option() = {input_encoding, encoding()} |
  25. %% {handler, function()}
  26. %% @type decoder_option() = {input_encoding, encoding()} |
  27. %% {object_hook, function()}
  28. %% @type bjson_string() = binary()
  29. %% @type bjson_number() = integer() | float()
  30. %% @type bjson_array() = [bjson_term()]
  31. %% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]}
  32. %% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() |
  33. %% bjson_object()
  34. %% @type binary_encoder_option() = {handler, function()}
  35. %% @type binary_decoder_option() = {object_hook, function()}
  36. -record(encoder, {input_encoding=unicode,
  37. handler=null}).
  38. -record(decoder, {input_encoding=utf8,
  39. object_hook=null,
  40. line=1,
  41. column=1,
  42. state=null}).
  43. %% @spec encoder([encoder_option()]) -> function()
  44. %% @doc Create an encoder/1 with the given options.
  45. encoder(Options) ->
  46. State = parse_encoder_options(Options, #encoder{}),
  47. fun (O) -> json_encode(O, State) end.
  48. %% @spec encode(json_term()) -> iolist()
  49. %% @doc Encode the given as JSON to an iolist.
  50. encode(Any) ->
  51. json_encode(Any, #encoder{}).
  52. %% @spec decoder([decoder_option()]) -> function()
  53. %% @doc Create a decoder/1 with the given options.
  54. decoder(Options) ->
  55. State = parse_decoder_options(Options, #decoder{}),
  56. fun (O) -> json_decode(O, State) end.
  57. %% @spec decode(iolist()) -> json_term()
  58. %% @doc Decode the given iolist to Erlang terms.
  59. decode(S) ->
  60. json_decode(S, #decoder{}).
  61. %% @spec binary_decoder([binary_decoder_option()]) -> function()
  62. %% @doc Create a binary_decoder/1 with the given options.
  63. binary_decoder(Options) ->
  64. mochijson2:decoder(Options).
  65. %% @spec binary_encoder([binary_encoder_option()]) -> function()
  66. %% @doc Create a binary_encoder/1 with the given options.
  67. binary_encoder(Options) ->
  68. mochijson2:encoder(Options).
  69. %% @spec binary_encode(bjson_term()) -> iolist()
  70. %% @doc Encode the given as JSON to an iolist, using lists for arrays and
  71. %% binaries for strings.
  72. binary_encode(Any) ->
  73. mochijson2:encode(Any).
  74. %% @spec binary_decode(iolist()) -> bjson_term()
  75. %% @doc Decode the given iolist to Erlang terms, using lists for arrays and
  76. %% binaries for strings.
  77. binary_decode(S) ->
  78. mochijson2:decode(S).
  79. %% Internal API
  80. parse_encoder_options([], State) ->
  81. State;
  82. parse_encoder_options([{input_encoding, Encoding} | Rest], State) ->
  83. parse_encoder_options(Rest, State#encoder{input_encoding=Encoding});
  84. parse_encoder_options([{handler, Handler} | Rest], State) ->
  85. parse_encoder_options(Rest, State#encoder{handler=Handler}).
  86. parse_decoder_options([], State) ->
  87. State;
  88. parse_decoder_options([{input_encoding, Encoding} | Rest], State) ->
  89. parse_decoder_options(Rest, State#decoder{input_encoding=Encoding});
  90. parse_decoder_options([{object_hook, Hook} | Rest], State) ->
  91. parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
  92. json_encode(true, _State) ->
  93. "true";
  94. json_encode(false, _State) ->
  95. "false";
  96. json_encode(null, _State) ->
  97. "null";
  98. json_encode(I, _State) when is_integer(I) ->
  99. integer_to_list(I);
  100. json_encode(F, _State) when is_float(F) ->
  101. mochinum:digits(F);
  102. json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) ->
  103. json_encode_string(L, State);
  104. json_encode({array, Props}, State) when is_list(Props) ->
  105. json_encode_array(Props, State);
  106. json_encode({struct, Props}, State) when is_list(Props) ->
  107. json_encode_proplist(Props, State);
  108. json_encode(Bad, #encoder{handler=null}) ->
  109. exit({json_encode, {bad_term, Bad}});
  110. json_encode(Bad, State=#encoder{handler=Handler}) ->
  111. json_encode(Handler(Bad), State).
  112. json_encode_array([], _State) ->
  113. "[]";
  114. json_encode_array(L, State) ->
  115. F = fun (O, Acc) ->
  116. [$,, json_encode(O, State) | Acc]
  117. end,
  118. [$, | Acc1] = lists:foldl(F, "[", L),
  119. lists:reverse([$\] | Acc1]).
  120. json_encode_proplist([], _State) ->
  121. "{}";
  122. json_encode_proplist(Props, State) ->
  123. F = fun ({K, V}, Acc) ->
  124. KS = case K of
  125. K when is_atom(K) ->
  126. json_encode_string_utf8(atom_to_list(K));
  127. K when is_integer(K) ->
  128. json_encode_string(integer_to_list(K), State);
  129. K when is_list(K); is_binary(K) ->
  130. json_encode_string(K, State)
  131. end,
  132. VS = json_encode(V, State),
  133. [$,, VS, $:, KS | Acc]
  134. end,
  135. [$, | Acc1] = lists:foldl(F, "{", Props),
  136. lists:reverse([$\} | Acc1]).
  137. json_encode_string(A, _State) when is_atom(A) ->
  138. json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)));
  139. json_encode_string(B, _State) when is_binary(B) ->
  140. json_encode_string_unicode(xmerl_ucs:from_utf8(B));
  141. json_encode_string(S, #encoder{input_encoding=utf8}) ->
  142. json_encode_string_utf8(S);
  143. json_encode_string(S, #encoder{input_encoding=unicode}) ->
  144. json_encode_string_unicode(S).
  145. json_encode_string_utf8(S) ->
  146. [?Q | json_encode_string_utf8_1(S)].
  147. json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f ->
  148. NewC = case C of
  149. $\\ -> "\\\\";
  150. ?Q -> "\\\"";
  151. _ when C >= $\s, C < 16#7f -> C;
  152. $\t -> "\\t";
  153. $\n -> "\\n";
  154. $\r -> "\\r";
  155. $\f -> "\\f";
  156. $\b -> "\\b";
  157. _ when C >= 0, C =< 16#7f -> unihex(C);
  158. _ -> exit({json_encode, {bad_char, C}})
  159. end,
  160. [NewC | json_encode_string_utf8_1(Cs)];
  161. json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF ->
  162. [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)),
  163. Rest;
  164. json_encode_string_utf8_1([]) ->
  165. "\"".
  166. json_encode_string_unicode(S) ->
  167. [?Q | json_encode_string_unicode_1(S)].
  168. json_encode_string_unicode_1([C | Cs]) ->
  169. NewC = case C of
  170. $\\ -> "\\\\";
  171. ?Q -> "\\\"";
  172. _ when C >= $\s, C < 16#7f -> C;
  173. $\t -> "\\t";
  174. $\n -> "\\n";
  175. $\r -> "\\r";
  176. $\f -> "\\f";
  177. $\b -> "\\b";
  178. _ when C >= 0, C =< 16#10FFFF -> unihex(C);
  179. _ -> exit({json_encode, {bad_char, C}})
  180. end,
  181. [NewC | json_encode_string_unicode_1(Cs)];
  182. json_encode_string_unicode_1([]) ->
  183. "\"".
  184. dehex(C) when C >= $0, C =< $9 ->
  185. C - $0;
  186. dehex(C) when C >= $a, C =< $f ->
  187. C - $a + 10;
  188. dehex(C) when C >= $A, C =< $F ->
  189. C - $A + 10.
  190. hexdigit(C) when C >= 0, C =< 9 ->
  191. C + $0;
  192. hexdigit(C) when C =< 15 ->
  193. C + $a - 10.
  194. unihex(C) when C < 16#10000 ->
  195. <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
  196. Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
  197. [$\\, $u | Digits];
  198. unihex(C) when C =< 16#10FFFF ->
  199. N = C - 16#10000,
  200. S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
  201. S2 = 16#dc00 bor (N band 16#3ff),
  202. [unihex(S1), unihex(S2)].
  203. json_decode(B, S) when is_binary(B) ->
  204. json_decode(binary_to_list(B), S);
  205. json_decode(L, S) ->
  206. {Res, L1, S1} = decode1(L, S),
  207. {eof, [], _} = tokenize(L1, S1#decoder{state=trim}),
  208. Res.
  209. decode1(L, S=#decoder{state=null}) ->
  210. case tokenize(L, S#decoder{state=any}) of
  211. {{const, C}, L1, S1} ->
  212. {C, L1, S1};
  213. {start_array, L1, S1} ->
  214. decode_array(L1, S1#decoder{state=any}, []);
  215. {start_object, L1, S1} ->
  216. decode_object(L1, S1#decoder{state=key}, [])
  217. end.
  218. make_object(V, #decoder{object_hook=null}) ->
  219. V;
  220. make_object(V, #decoder{object_hook=Hook}) ->
  221. Hook(V).
  222. decode_object(L, S=#decoder{state=key}, Acc) ->
  223. case tokenize(L, S) of
  224. {end_object, Rest, S1} ->
  225. V = make_object({struct, lists:reverse(Acc)}, S1),
  226. {V, Rest, S1#decoder{state=null}};
  227. {{const, K}, Rest, S1} when is_list(K) ->
  228. {colon, L2, S2} = tokenize(Rest, S1),
  229. {V, L3, S3} = decode1(L2, S2#decoder{state=null}),
  230. decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc])
  231. end;
  232. decode_object(L, S=#decoder{state=comma}, Acc) ->
  233. case tokenize(L, S) of
  234. {end_object, Rest, S1} ->
  235. V = make_object({struct, lists:reverse(Acc)}, S1),
  236. {V, Rest, S1#decoder{state=null}};
  237. {comma, Rest, S1} ->
  238. decode_object(Rest, S1#decoder{state=key}, Acc)
  239. end.
  240. decode_array(L, S=#decoder{state=any}, Acc) ->
  241. case tokenize(L, S) of
  242. {end_array, Rest, S1} ->
  243. {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
  244. {start_array, Rest, S1} ->
  245. {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []),
  246. decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
  247. {start_object, Rest, S1} ->
  248. {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []),
  249. decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
  250. {{const, Const}, Rest, S1} ->
  251. decode_array(Rest, S1#decoder{state=comma}, [Const | Acc])
  252. end;
  253. decode_array(L, S=#decoder{state=comma}, Acc) ->
  254. case tokenize(L, S) of
  255. {end_array, Rest, S1} ->
  256. {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
  257. {comma, Rest, S1} ->
  258. decode_array(Rest, S1#decoder{state=any}, Acc)
  259. end.
  260. tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc)
  261. when is_list(C); is_binary(C); C >= 16#7f ->
  262. List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
  263. tokenize_string(List, S#decoder{input_encoding=unicode}, Acc);
  264. tokenize_string("\"" ++ Rest, S, Acc) ->
  265. {lists:reverse(Acc), Rest, ?INC_COL(S)};
  266. tokenize_string("\\\"" ++ Rest, S, Acc) ->
  267. tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]);
  268. tokenize_string("\\\\" ++ Rest, S, Acc) ->
  269. tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]);
  270. tokenize_string("\\/" ++ Rest, S, Acc) ->
  271. tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]);
  272. tokenize_string("\\b" ++ Rest, S, Acc) ->
  273. tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]);
  274. tokenize_string("\\f" ++ Rest, S, Acc) ->
  275. tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]);
  276. tokenize_string("\\n" ++ Rest, S, Acc) ->
  277. tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]);
  278. tokenize_string("\\r" ++ Rest, S, Acc) ->
  279. tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]);
  280. tokenize_string("\\t" ++ Rest, S, Acc) ->
  281. tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]);
  282. tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) ->
  283. % coalesce UTF-16 surrogate pair?
  284. C = dehex(C0) bor
  285. (dehex(C1) bsl 4) bor
  286. (dehex(C2) bsl 8) bor
  287. (dehex(C3) bsl 12),
  288. tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]);
  289. tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF ->
  290. tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]).
  291. tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc)
  292. when is_list(C); is_binary(C); C >= 16#7f ->
  293. List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
  294. tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc);
  295. tokenize_number([$- | Rest], sign, S, []) ->
  296. tokenize_number(Rest, int, ?INC_COL(S), [$-]);
  297. tokenize_number(Rest, sign, S, []) ->
  298. tokenize_number(Rest, int, S, []);
  299. tokenize_number([$0 | Rest], int, S, Acc) ->
  300. tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]);
  301. tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 ->
  302. tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
  303. tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 ->
  304. tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
  305. tokenize_number(Rest, int1, S, Acc) ->
  306. tokenize_number(Rest, frac, S, Acc);
  307. tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 ->
  308. tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
  309. tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E ->
  310. tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
  311. tokenize_number(Rest, frac, S, Acc) ->
  312. {{int, lists:reverse(Acc)}, Rest, S};
  313. tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 ->
  314. tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]);
  315. tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E ->
  316. tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]);
  317. tokenize_number(Rest, frac1, S, Acc) ->
  318. {{float, lists:reverse(Acc)}, Rest, S};
  319. tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ ->
  320. tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]);
  321. tokenize_number(Rest, esign, S, Acc) ->
  322. tokenize_number(Rest, eint, S, Acc);
  323. tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 ->
  324. tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
  325. tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 ->
  326. tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
  327. tokenize_number(Rest, eint1, S, Acc) ->
  328. {{float, lists:reverse(Acc)}, Rest, S}.
  329. tokenize([], S=#decoder{state=trim}) ->
  330. {eof, [], S};
  331. tokenize([L | Rest], S) when is_list(L) ->
  332. tokenize(L ++ Rest, S);
  333. tokenize([B | Rest], S) when is_binary(B) ->
  334. tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S);
  335. tokenize("\r\n" ++ Rest, S) ->
  336. tokenize(Rest, ?INC_LINE(S));
  337. tokenize("\n" ++ Rest, S) ->
  338. tokenize(Rest, ?INC_LINE(S));
  339. tokenize([C | Rest], S) when C == $\s; C == $\t ->
  340. tokenize(Rest, ?INC_COL(S));
  341. tokenize("{" ++ Rest, S) ->
  342. {start_object, Rest, ?INC_COL(S)};
  343. tokenize("}" ++ Rest, S) ->
  344. {end_object, Rest, ?INC_COL(S)};
  345. tokenize("[" ++ Rest, S) ->
  346. {start_array, Rest, ?INC_COL(S)};
  347. tokenize("]" ++ Rest, S) ->
  348. {end_array, Rest, ?INC_COL(S)};
  349. tokenize("," ++ Rest, S) ->
  350. {comma, Rest, ?INC_COL(S)};
  351. tokenize(":" ++ Rest, S) ->
  352. {colon, Rest, ?INC_COL(S)};
  353. tokenize("null" ++ Rest, S) ->
  354. {{const, null}, Rest, ?ADV_COL(S, 4)};
  355. tokenize("true" ++ Rest, S) ->
  356. {{const, true}, Rest, ?ADV_COL(S, 4)};
  357. tokenize("false" ++ Rest, S) ->
  358. {{const, false}, Rest, ?ADV_COL(S, 5)};
  359. tokenize("\"" ++ Rest, S) ->
  360. {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []),
  361. {{const, String}, Rest1, S1};
  362. tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- ->
  363. case tokenize_number(L, sign, S, []) of
  364. {{int, Int}, Rest, S1} ->
  365. {{const, list_to_integer(Int)}, Rest, S1};
  366. {{float, Float}, Rest, S1} ->
  367. {{const, list_to_float(Float)}, Rest, S1}
  368. end.