/src/support/z_parse_multipart.erl

https://code.google.com/p/zotonic/ · Erlang · 349 lines · 258 code · 39 blank · 52 comment · 5 complexity · 243f37001b8a585ff79faa6374447e38 MD5 · raw file

  1. %% @author Bob Ippolito <bob@mochimedia.com>
  2. %% @copyright 2007 Mochi Media, Inc.
  3. %%
  4. %% @author Marc Worrell <marc@worrell.nl>
  5. %% Date: 2009-05-13
  6. %%
  7. %% @doc Parse multipart/form-data request bodies. Uses a callback function to receive the next parts, can call
  8. %% a progress function to report back the progress on receiving the data.
  9. %%
  10. %% Adapted from mochiweb_multipart.erl, integrated with webmachine and zotonic
  11. %% This is the MIT license.
  12. %%
  13. %% Copyright (c) 2007 Mochi Media, Inc.
  14. %%
  15. %% Permission is hereby granted, free of charge, to any person obtaining a copy
  16. %% of this software and associated documentation files (the "Software"), to deal
  17. %% in the Software without restriction, including without limitation the rights
  18. %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  19. %% of the Software, and to permit persons to whom the Software is furnished to do
  20. %% so, subject to the following conditions:
  21. %%
  22. %% The above copyright notice and this permission notice shall be included in all
  23. %% copies or substantial portions of the Software.
  24. %%
  25. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  26. %% INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  27. %% PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  28. %% LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  29. %% TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
  30. %% OR OTHER DEALINGS IN THE SOFTWARE.
  31. -module(z_parse_multipart).
  32. -author("Marc Worrell <marc@worrell.nl").
  33. %% interface functions
  34. -export([
  35. recv_parse/1,
  36. recv_parse/2,
  37. find_boundary/2
  38. ]).
  39. -include("zotonic.hrl").
  40. -define(CHUNKSIZE, 4096).
  41. -record(mp, {state, boundary, content_length, length, percentage=0,
  42. buffer, next_chunk, callback, progress, context}).
  43. %% @doc Receive and parse the form data in the request body.
  44. %% The progress function should accept the parameters [Percentage, Context]
  45. %% @spec recv_parse(Context) -> {form(), NewContext}
  46. recv_parse(Context) ->
  47. recv_parse(fun(_Filename, _ContentType, _Size) -> ok end, Context).
  48. %% @spec recv_parse(UploadCheckFun, Context) -> {form(), NewContext}
  49. recv_parse(UploadCheckFun, Context) ->
  50. Callback = fun(N) -> callback(N, #multipart_form{}, UploadCheckFun) end,
  51. {_LengthRemaining, _RestData, Form, ContextParsed} = parse_multipart_request(fun progress/4, Callback, Context),
  52. if Form#multipart_form.file =/= undefined ->
  53. % Premature end
  54. file:close(Form#multipart_form.file);
  55. true ->
  56. nop
  57. end,
  58. {Form, ContextParsed}.
  59. %% @doc Report progress back to the page.
  60. progress(Percentage, ContentLength, _ReceivedLength, Context) when ContentLength > ?CHUNKSIZE*5 ->
  61. case { z_convert:to_bool(z_context:get_q("z_comet", Context)),
  62. z_context:get_q("z_pageid", Context),
  63. z_context:get_q("z_trigger_id", Context)} of
  64. {true, PageId, TriggerId} when PageId /= undefined; TriggerId /= undefined ->
  65. ContextEnsured = z_context:ensure_all(Context),
  66. z_session_page:add_script("z_progress('"
  67. ++z_utils:js_escape(TriggerId)++"',"
  68. ++integer_to_list(Percentage)++");", ContextEnsured);
  69. _ -> nop
  70. end;
  71. progress(_, _, _, _) ->
  72. nop.
  73. %% @doc Callback function collecting all data found in the multipart/form-data body
  74. %% @spec callback(Next, function(), form()) -> function() | form()
  75. callback(Next, Form, UploadCheckFun) ->
  76. case Next of
  77. {headers, Headers} ->
  78. % Find out if it is a file
  79. ContentDisposition = proplists:get_value("content-disposition", Headers),
  80. case ContentDisposition of
  81. {"form-data", [{"name", Name}, {"filename",Filename}]} ->
  82. ContentLength = case proplists:get_value("content-length", Headers) of
  83. undefined -> undefined;
  84. {CL,_} -> z_convert:to_integer(CL)
  85. end,
  86. ContentType = case proplists:get_value("content-type", Headers) of
  87. undefined -> undefined;
  88. {Mime,_} -> Mime
  89. end,
  90. case UploadCheckFun(Filename, ContentType, ContentLength) of
  91. ok ->
  92. NF = Form#multipart_form{name=Name,
  93. filename=Filename,
  94. content_length=ContentLength,
  95. content_type=ContentType,
  96. tmpfile=z_tempfile:new()},
  97. fun(N) -> callback(N, NF, UploadCheckFun) end;
  98. {error, _Reason} = Error ->
  99. throw(Error)
  100. end;
  101. {"form-data",[{"name",Name}]} ->
  102. NF = Form#multipart_form{name=Name, data=[]},
  103. fun(N) -> callback(N, NF, UploadCheckFun) end;
  104. _ ->
  105. fun(N) -> callback(N, Form, UploadCheckFun) end
  106. end;
  107. {body, Data} ->
  108. if Form#multipart_form.filename =/= undefined ->
  109. if Form#multipart_form.file =/= undefined ->
  110. file:write(Form#multipart_form.file, Data),
  111. NewForm = Form;
  112. true ->
  113. case file:open(Form#multipart_form.tmpfile, [raw,write]) of
  114. {ok, File} ->
  115. file:write(File, Data),
  116. NewForm = Form#multipart_form{file=File};
  117. {error, Error} ->
  118. ?ERROR("Couldn't open ~p for writing, error: ~p~n", [Form#multipart_form.tmpfile, Error]),
  119. NewForm = Form,
  120. exit(could_not_open_file_for_writing)
  121. end
  122. end;
  123. true ->
  124. NewForm = Form#multipart_form{data=[binary_to_list(Data), Form#multipart_form.data]}
  125. end,
  126. fun(N) -> callback(N, NewForm, UploadCheckFun) end;
  127. body_end ->
  128. NewForm = if Form#multipart_form.file =/= undefined ->
  129. file:close(Form#multipart_form.file),
  130. Form#multipart_form{
  131. name=undefined,
  132. data=undefined,
  133. file=undefined,
  134. tmpfile=undefined,
  135. filename=undefined,
  136. content_type=undefined,
  137. content_length=undefined,
  138. files=[{Form#multipart_form.name, Form#multipart_form.filename, Form#multipart_form.tmpfile}|Form#multipart_form.files]
  139. };
  140. Form#multipart_form.name =/= undefined ->
  141. Data = lists:flatten(Form#multipart_form.data),
  142. Form#multipart_form{
  143. name=undefined,
  144. data=undefined,
  145. args=[{Form#multipart_form.name, Data} | Form#multipart_form.args]
  146. };
  147. true ->
  148. Form
  149. end,
  150. fun(N) -> callback(N, NewForm, UploadCheckFun) end;
  151. eof ->
  152. Form
  153. end.
  154. %% @doc Parse the multipart request
  155. parse_multipart_request(ProgressFunction, Callback, Context) ->
  156. ReqData = z_context:get_reqdata(Context),
  157. Length = list_to_integer(wrq:get_req_header_lc("content-length", ReqData)),
  158. Boundary = iolist_to_binary(get_boundary(wrq:get_req_header_lc("content-type", ReqData))),
  159. Prefix = <<"\r\n--", Boundary/binary>>,
  160. BS = size(Boundary),
  161. {{Chunk, Next}, ReqData1} = wrq:stream_req_body(ReqData, ?CHUNKSIZE),
  162. Context1 = z_context:set_reqdata(ReqData1, Context),
  163. <<"--", Boundary:BS/binary, "\r\n", Rest/binary>> = Chunk,
  164. feed_mp(headers, #mp{boundary=Prefix,
  165. length=size(Chunk),
  166. content_length=Length,
  167. buffer=Rest,
  168. callback=Callback,
  169. progress=ProgressFunction,
  170. next_chunk=Next,
  171. context=Context1}).
  172. feed_mp(headers, State=#mp{buffer=Buffer, callback=Callback}) ->
  173. {State1, P} = case find_in_binary(<<"\r\n\r\n">>, Buffer) of
  174. {exact, N} ->
  175. {State, N};
  176. _ ->
  177. S1 = read_more(State),
  178. %% Assume headers must be less than ?CHUNKSIZE
  179. {exact, N} = find_in_binary(<<"\r\n\r\n">>, S1#mp.buffer),
  180. {S1, N}
  181. end,
  182. <<Headers:P/binary, "\r\n\r\n", Rest/binary>> = State1#mp.buffer,
  183. NextCallback = Callback({headers, parse_headers(Headers)}),
  184. feed_mp(body, State1#mp{buffer=Rest, callback=NextCallback});
  185. feed_mp(body, State=#mp{boundary=Prefix, buffer=Buffer, callback=Callback}) ->
  186. case find_boundary(Prefix, Buffer) of
  187. {end_boundary, Start, Skip} ->
  188. <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer,
  189. C1 = Callback({body, Data}),
  190. C2 = C1(body_end),
  191. {State#mp.length, Rest, C2(eof), State#mp.context};
  192. {next_boundary, Start, Skip} ->
  193. <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer,
  194. C1 = Callback({body, Data}),
  195. feed_mp(headers, State#mp{callback=C1(body_end), buffer=Rest});
  196. {maybe, 0} ->
  197. % Found a boundary, without an ending newline
  198. case read_more(State) of
  199. State -> throw({error, incomplete_end_boundary});
  200. S1 -> feed_mp(body, S1)
  201. end;
  202. {maybe, Start} ->
  203. <<Data:Start/binary, Rest/binary>> = Buffer,
  204. feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), buffer=Rest}));
  205. not_found ->
  206. {Data, Rest} = {Buffer, <<>>},
  207. feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), buffer=Rest}))
  208. end.
  209. %% @doc Read more data for the feed_mp functions.
  210. %% @spec read_more(mp()) -> mp()
  211. read_more(State=#mp{next_chunk=done, content_length=ContentLength, length=Length} = State) when ContentLength =:= Length ->
  212. State;
  213. read_more(State=#mp{next_chunk=done} = State) ->
  214. throw({error, wrong_content_length});
  215. read_more(State=#mp{length=Length, content_length=ContentLength,
  216. percentage=Percentage,
  217. buffer=Buffer, next_chunk=Next, context=Context,
  218. progress=ProgressFunction}) ->
  219. {Data, Next1} = Next(),
  220. Buffer1 = <<Buffer/binary, Data/binary>>,
  221. Length1 = Length + size(Data),
  222. NewPercentage = case ContentLength of
  223. 0 -> 100;
  224. _ -> (Length1 * 100) div ContentLength
  225. end,
  226. case NewPercentage > Percentage of
  227. true ->
  228. case ProgressFunction of
  229. undefined -> nop;
  230. F -> F(NewPercentage, ContentLength, Length1, Context)
  231. end;
  232. _ ->
  233. nop
  234. end,
  235. State#mp{length=Length1, buffer=Buffer1, next_chunk=Next1, percentage=NewPercentage}.
  236. %% @doc Parse the headers of a part in the form data
  237. parse_headers(<<>>) ->
  238. [];
  239. parse_headers(Binary) ->
  240. parse_headers(Binary, []).
  241. parse_headers(Binary, Acc) ->
  242. case find_in_binary(<<"\r\n">>, Binary) of
  243. {exact, N} ->
  244. <<Line:N/binary, "\r\n", Rest/binary>> = Binary,
  245. parse_headers(Rest, [split_header(Line) | Acc]);
  246. not_found ->
  247. lists:reverse([split_header(Binary) | Acc])
  248. end.
  249. split_header(Line) ->
  250. {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, binary_to_list(Line)),
  251. {string:to_lower(string:strip(Name)), mochiweb_util:parse_header(Value)}.
  252. %% @doc Get the request boundary separating the parts in the request body
  253. get_boundary(ContentType) ->
  254. {"multipart/form-data", Opts} = mochiweb_util:parse_header(ContentType),
  255. case proplists:get_value("boundary", Opts) of
  256. S when is_list(S) ->
  257. S
  258. end.
  259. %% @doc Find the next boundary in the data
  260. find_boundary(Prefix, Data) ->
  261. case find_in_binary(Prefix, Data) of
  262. {exact, Skip} ->
  263. PrefixSkip = Skip + size(Prefix),
  264. case Data of
  265. <<_:PrefixSkip/binary, "\r\n", _/binary>> ->
  266. {next_boundary, Skip, size(Prefix) + 2};
  267. <<_:PrefixSkip/binary, "--\r\n", _/binary>> ->
  268. {end_boundary, Skip, size(Prefix) + 4};
  269. % POSTs by Adobe Flash don't have the ending newline
  270. <<_:PrefixSkip/binary, "--", _/binary>> ->
  271. {end_boundary, Skip, size(Prefix) + 2};
  272. _ when size(Data) < PrefixSkip + 4 ->
  273. %% Underflow
  274. {maybe, Skip};
  275. _ ->
  276. %% False positive
  277. not_found
  278. end;
  279. {partial, Skip, Length} when (Skip + Length) =:= size(Data) ->
  280. %% Underflow
  281. {maybe, Skip};
  282. _ ->
  283. not_found
  284. end.
  285. find_in_binary(B, Data) when size(B) > 0 ->
  286. case size(Data) - size(B) of
  287. Last when Last < 0 ->
  288. partial_find(B, Data, 0, size(Data));
  289. Last ->
  290. find_in_binary(B, size(B), Data, 0, Last)
  291. end.
  292. find_in_binary(B, BS, D, N, Last) when N =< Last->
  293. case D of
  294. <<_:N/binary, B:BS/binary, _/binary>> ->
  295. {exact, N};
  296. _ ->
  297. find_in_binary(B, BS, D, 1 + N, Last)
  298. end;
  299. find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last ->
  300. partial_find(B, D, N, BS - 1).
  301. partial_find(_B, _D, _N, 0) ->
  302. not_found;
  303. partial_find(B, D, N, K) ->
  304. <<B1:K/binary, _/binary>> = B,
  305. case D of
  306. <<_Skip:N/binary, B1:K/binary>> ->
  307. {partial, N, K};
  308. _ ->
  309. partial_find(B, D, 1 + N, K - 1)
  310. end.