PageRenderTime 58ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/deps/gen_smtp/src/mimemail.erl

http://github.com/zotonic/zotonic
Erlang | 1818 lines | 1580 code | 90 blank | 148 comment | 46 complexity | 2bc63ce0c840d1e05c216b867ee1bcae MD5 | raw file
Possible License(s): Apache-2.0, CC-BY-SA-4.0, MIT, LGPL-2.1, BSD-3-Clause
  1. %%% Copyright 2009 Andrew Thompson <andrew@hijacked.us>. All rights reserved.
  2. %%%
  3. %%% Redistribution and use in source and binary forms, with or without
  4. %%% modification, are permitted provided that the following conditions are met:
  5. %%%
  6. %%% 1. Redistributions of source code must retain the above copyright notice,
  7. %%% this list of conditions and the following disclaimer.
  8. %%% 2. Redistributions in binary form must reproduce the above copyright
  9. %%% notice, this list of conditions and the following disclaimer in the
  10. %%% documentation and/or other materials provided with the distribution.
  11. %%%
  12. %%% THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY EXPRESS OR
  13. %%% IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  14. %%% MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  15. %%% EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  16. %%% INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17. %%% (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18. %%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19. %%% ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20. %%% (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. %%% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. %% @doc A module for decoding/encoding MIME 1.0 email.
  23. %% The encoder and decoder operate on the same datastructure, which is as follows:
  24. %% A 5-tuple with the following elements: `{Type, SubType, Headers, Parameters, Body}'.
  25. %%
  26. %% `Type' and `SubType' are the MIME type of the email, examples are `text/plain' or
  27. %% `multipart/alternative'. The decoder splits these into 2 fields so you can filter by
  28. %% the main type or by the subtype.
  29. %%
  30. %% `Headers' consists of a list of key/value pairs of binary values eg.
  31. %% `{<<"From">>, <<"Andrew Thompson <andrew@hijacked.us>">>}'. There is no parsing of
  32. %% the header aside from un-wrapping the lines and splitting the header name from the
  33. %% header value.
  34. %%
  35. %% `Parameters' is a list of 3 key/value tuples. The 3 keys are `<<"content-type-params">>',
  36. %% `<<"dispisition">>' and `<<"disposition-params">>'.
  37. %% `content-type-params' is a key/value list of parameters on the content-type header, this
  38. %% usually consists of things like charset and the format parameters. `disposition' indicates
  39. %% how the data wants to be displayed, this is usually 'inline'. `disposition-params' is a list of
  40. %% disposition information, eg. the filename this section should be saved as, the modification
  41. %% date the file should be saved with, etc.
  42. %%
  43. %% Finally, `Body' can be one of several different types, depending on the structure of the email.
  44. %% For a simple email, the body will usually be a binary consisting of the message body, In the
  45. %% case of a multipart email, its a list of these 5-tuple MIME structures. The third possibility,
  46. %% in the case of a message/rfc822 attachment, body can be a single 5-tuple MIME structure.
  47. %%
  48. %% You should see the relevant RFCs (2045, 2046, 2047, etc.) for more information.
  49. -module(mimemail).
  50. -ifdef(TEST).
  51. -include_lib("eunit/include/eunit.hrl").
  52. -endif.
  53. -export([encode/1, decode/2, decode/1, get_header_value/2, get_header_value/3, parse_headers/1]).
  54. -define(DEFAULT_OPTIONS, [
  55. {encoding, get_default_encoding()}, % default encoding is utf-8 if we can find the iconv module
  56. {decode_attachments, true} % should we decode any base64/quoted printable attachments?
  57. ]).
  58. -type(mimetuple() :: {binary(), binary(), [{binary(), binary()}], [{binary(), binary()}], binary() | [{binary(), binary(), [{binary(), binary()}], [{binary(), binary()}], binary() | [tuple()]}] | tuple()}).
  59. -type(options() :: [{'encoding', binary()} | {'decode_attachment', boolean()}]).
  60. -spec(decode/1 :: (Email :: binary()) -> mimetuple()).
  61. %% @doc Decode a MIME email from a binary.
  62. decode(All) ->
  63. {Headers, Body} = parse_headers(All),
  64. decode(Headers, Body, ?DEFAULT_OPTIONS).
  65. -spec(decode/2 :: (Email :: binary(), Options :: options()) -> mimetuple()).
  66. %% @doc Decode with custom options
  67. decode(All, Options) when is_binary(All), is_list(Options) ->
  68. {Headers, Body} = parse_headers(All),
  69. decode(Headers, Body, Options).
  70. decode(OrigHeaders, Body, Options) ->
  71. %io:format("headers: ~p~n", [Headers]),
  72. Encoding = proplists:get_value(encoding, Options, none),
  73. case whereis(iconv) of
  74. undefined when Encoding =/= none ->
  75. {ok, _Pid} = iconv:start();
  76. _ ->
  77. ok
  78. end,
  79. %FixedHeaders = fix_headers(Headers),
  80. Headers = decode_headers(OrigHeaders, [], Encoding),
  81. case parse_with_comments(get_header_value(<<"MIME-Version">>, Headers)) of
  82. undefined ->
  83. case parse_content_type(get_header_value(<<"Content-Type">>, Headers)) of
  84. {<<"multipart">>, _SubType, _Parameters} ->
  85. erlang:error(non_mime_multipart);
  86. {Type, SubType, Parameters} ->
  87. NewBody = decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers),
  88. Body, proplists:get_value(<<"charset">>, Parameters), Encoding),
  89. {Type, SubType, Headers, Parameters, NewBody};
  90. undefined ->
  91. Parameters = [{<<"content-type-params">>, [{<<"charset">>, <<"us-ascii">>}]}, {<<"disposition">>, <<"inline">>}, {<<"disposition-params">>, []}],
  92. {<<"text">>, <<"plain">>, Headers, Parameters, decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers), Body)}
  93. end;
  94. Other ->
  95. decode_component(Headers, Body, Other, Options)
  96. end.
  97. -spec(encode/1 :: (MimeMail :: mimetuple()) -> binary()).
  98. %% @doc Encode a MIME tuple to a binary.
  99. encode({Type, Subtype, Headers, ContentTypeParams, Parts}) ->
  100. {FixedParams, FixedHeaders} = ensure_content_headers(Type, Subtype, ContentTypeParams, Headers, Parts, true),
  101. FixedHeaders2 = check_headers(FixedHeaders),
  102. list_to_binary([binstr:join(
  103. encode_headers(
  104. FixedHeaders2
  105. ),
  106. "\r\n"),
  107. "\r\n\r\n",
  108. binstr:join(encode_component(Type, Subtype, FixedHeaders2, FixedParams, Parts),
  109. "\r\n")]);
  110. encode(_) ->
  111. io:format("Not a mime-decoded DATA~n"),
  112. erlang:error(non_mime).
  113. decode_headers(Headers, _, none) ->
  114. Headers;
  115. decode_headers([], Acc, _Charset) ->
  116. lists:reverse(Acc);
  117. decode_headers([{Key, Value} | Headers], Acc, Charset) ->
  118. decode_headers(Headers, [{Key, decode_header(Value, Charset)} | Acc], Charset).
  119. decode_header(Value, Charset) ->
  120. case re:run(Value, "=\\?([-A-Za-z0-9_]+)\\?([qQbB])\\?([^\s]+)\\?=", [ungreedy]) of
  121. nomatch ->
  122. Value;
  123. {match,[{AllStart, AllLen},{EncodingStart, EncodingLen},{TypeStart, _},{DataStart, DataLen}]} ->
  124. Encoding = binstr:substr(Value, EncodingStart+1, EncodingLen),
  125. Type = binstr:to_lower(binstr:substr(Value, TypeStart+1, 1)),
  126. Data = binstr:substr(Value, DataStart+1, DataLen),
  127. CD = case iconv:open(Charset, fix_encoding(Encoding)) of
  128. {ok, Res} -> Res;
  129. {error, einval} -> throw({bad_charset, fix_encoding(Encoding)})
  130. end,
  131. DecodedData = case Type of
  132. <<"q">> ->
  133. {ok, S} = iconv:conv(CD, decode_quoted_printable(re:replace(Data, "_", " ", [{return, binary}, global]))),
  134. S;
  135. <<"b">> ->
  136. {ok, S} = iconv:conv(CD, decode_base64(re:replace(Data, "_", " ", [{return, binary}, global]))),
  137. S
  138. end,
  139. iconv:close(CD),
  140. Offset = case re:run(binstr:substr(Value, AllStart + AllLen + 1), "^([\s\t\n\r]+)=\\?[-A-Za-z0-9_]+\\?[^\s]\\?[^\s]+\\?=", [ungreedy]) of
  141. nomatch ->
  142. % no 2047 block immediately following
  143. 1;
  144. {match,[{_, _},{_, WhiteSpaceLen}]} ->
  145. 1+ WhiteSpaceLen
  146. end,
  147. NewValue = list_to_binary([binstr:substr(Value, 1, AllStart), DecodedData, binstr:substr(Value, AllStart + AllLen + Offset)]),
  148. decode_header(NewValue, Charset)
  149. end.
  150. decode_component(Headers, Body, MimeVsn, Options) when MimeVsn =:= <<"1.0">> ->
  151. case parse_content_disposition(get_header_value(<<"Content-Disposition">>, Headers)) of
  152. {Disposition, DispositionParams} ->
  153. ok;
  154. _ -> % defaults
  155. Disposition = <<"inline">>,
  156. DispositionParams = []
  157. end,
  158. case parse_content_type(get_header_value(<<"Content-Type">>, Headers)) of
  159. {<<"multipart">>, SubType, Parameters} ->
  160. case proplists:get_value(<<"boundary">>, Parameters) of
  161. undefined ->
  162. erlang:error(no_boundary);
  163. Boundary ->
  164. % io:format("this is a multipart email of type: ~s and boundary ~s~n", [SubType, Boundary]),
  165. Parameters2 = [{<<"content-type-params">>, Parameters}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
  166. {<<"multipart">>, SubType, Headers, Parameters2, split_body_by_boundary(Body, list_to_binary(["--", Boundary]), MimeVsn, Options)}
  167. end;
  168. {<<"message">>, <<"rfc822">>, Parameters} ->
  169. {NewHeaders, NewBody} = parse_headers(Body),
  170. Parameters2 = [{<<"content-type-params">>, Parameters}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
  171. {<<"message">>, <<"rfc822">>, Headers, Parameters2, decode(NewHeaders, NewBody, Options)};
  172. {Type, SubType, Parameters} ->
  173. %io:format("body is ~s/~s~n", [Type, SubType]),
  174. Parameters2 = [{<<"content-type-params">>, Parameters}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
  175. {Type, SubType, Headers, Parameters2, decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers), Body, proplists:get_value(<<"charset">>, Parameters), proplists:get_value(encoding, Options, none))};
  176. undefined -> % defaults
  177. Type = <<"text">>,
  178. SubType = <<"plain">>,
  179. Parameters = [{<<"content-type-params">>, [{<<"charset">>, <<"us-ascii">>}]}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
  180. {Type, SubType, Headers, Parameters, decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers), Body)}
  181. end;
  182. decode_component(_Headers, _Body, Other, _Options) ->
  183. erlang:error({mime_version, Other}).
  184. -spec(get_header_value/3 :: (Needle :: binary(), Headers :: [{binary(), binary()}], Default :: any()) -> binary() | any()).
  185. %% @doc Do a case-insensitive header lookup to return that header's value, or the specified default.
  186. get_header_value(Needle, Headers, Default) ->
  187. %io:format("Headers: ~p~n", [Headers]),
  188. F =
  189. fun({Header, _Value}) ->
  190. binstr:to_lower(Header) =:= binstr:to_lower(Needle)
  191. end,
  192. case lists:filter(F, Headers) of
  193. % TODO if there's duplicate headers, should we use the first or the last?
  194. [{_Header, Value}|_T] ->
  195. Value;
  196. _ ->
  197. Default
  198. end.
  199. -spec(get_header_value/2 :: (Needle :: binary(), Headers :: [{binary(), binary()}]) -> binary() | 'undefined').
  200. %% @doc Do a case-insensitive header lookup to return the header's value, or `undefined'.
  201. get_header_value(Needle, Headers) ->
  202. get_header_value(Needle, Headers, undefined).
  203. -spec parse_with_comments(Value :: binary()) -> binary() | no_return();
  204. (Value :: atom()) -> atom().
  205. parse_with_comments(Value) when is_binary(Value) ->
  206. parse_with_comments(Value, [], 0, false);
  207. parse_with_comments(Value) ->
  208. Value.
  209. -spec parse_with_comments(Value :: binary(), Acc :: list(), Depth :: non_neg_integer(), Quotes :: boolean()) -> binary() | no_return().
  210. parse_with_comments(<<>>, _Acc, _Depth, Quotes) when Quotes ->
  211. erlang:error(unterminated_quotes);
  212. parse_with_comments(<<>>, _Acc, Depth, _Quotes) when Depth > 0 ->
  213. erlang:error(unterminated_comment);
  214. parse_with_comments(<<>>, Acc, _Depth, _Quotes) ->
  215. binstr:strip(list_to_binary(lists:reverse(Acc)));
  216. parse_with_comments(<<$\\, H, Tail/binary>>, Acc, Depth, Quotes) when Depth > 0, H > 32, H < 127 ->
  217. parse_with_comments(Tail, Acc, Depth, Quotes);
  218. parse_with_comments(<<$\\, Tail/binary>>, Acc, Depth, Quotes) when Depth > 0 ->
  219. parse_with_comments(Tail, Acc, Depth, Quotes);
  220. parse_with_comments(<<$\\, H, Tail/binary>>, Acc, Depth, Quotes) when H > 32, H < 127 ->
  221. parse_with_comments(Tail, [H | Acc], Depth, Quotes);
  222. parse_with_comments(<<$\\, Tail/binary>>, Acc, Depth, Quotes) ->
  223. parse_with_comments(Tail, [$\\ | Acc], Depth, Quotes);
  224. parse_with_comments(<<$(, Tail/binary>>, Acc, Depth, Quotes) when not Quotes ->
  225. parse_with_comments(Tail, Acc, Depth + 1, Quotes);
  226. parse_with_comments(<<$), Tail/binary>>, Acc, Depth, Quotes) when Depth > 0, not Quotes ->
  227. parse_with_comments(Tail, Acc, Depth - 1, Quotes);
  228. parse_with_comments(<<_, Tail/binary>>, Acc, Depth, Quotes) when Depth > 0 ->
  229. parse_with_comments(Tail, Acc, Depth, Quotes);
  230. parse_with_comments(<<$", T/binary>>, Acc, Depth, true) -> %"
  231. parse_with_comments(T, Acc, Depth, false);
  232. parse_with_comments(<<$", T/binary>>, Acc, Depth, false) -> %"
  233. parse_with_comments(T, Acc, Depth, true);
  234. parse_with_comments(<<H, Tail/binary>>, Acc, Depth, Quotes) ->
  235. parse_with_comments(Tail, [H | Acc], Depth, Quotes).
  236. -spec(parse_content_type/1 :: (Value :: 'undefined') -> 'undefined';
  237. (Value :: binary()) -> {binary(), binary(), [{binary(), binary()}]}).
  238. parse_content_type(undefined) ->
  239. undefined;
  240. parse_content_type(String) ->
  241. try parse_content_disposition(String) of
  242. {RawType, Parameters} ->
  243. case binstr:strchr(RawType, $/) of
  244. Index when Index < 2 ->
  245. throw(bad_content_type);
  246. Index ->
  247. Type = binstr:substr(RawType, 1, Index - 1),
  248. SubType = binstr:substr(RawType, Index + 1),
  249. {binstr:to_lower(Type), binstr:to_lower(SubType), Parameters}
  250. end
  251. catch
  252. bad_disposition ->
  253. throw(bad_content_type)
  254. end.
  255. -spec(parse_content_disposition/1 :: (Value :: 'undefined') -> 'undefined';
  256. (String :: binary()) -> {binary(), [{binary(), binary()}]}).
  257. parse_content_disposition(undefined) ->
  258. undefined;
  259. parse_content_disposition(String) ->
  260. [Disposition | Parameters] = binstr:split(parse_with_comments(String), <<";">>),
  261. F =
  262. fun(X) ->
  263. Y = binstr:strip(binstr:strip(X), both, $\t),
  264. case binstr:strchr(Y, $=) of
  265. Index when Index < 2 ->
  266. throw(bad_disposition);
  267. Index ->
  268. Key = binstr:substr(Y, 1, Index - 1),
  269. Value = binstr:substr(Y, Index + 1),
  270. {binstr:to_lower(Key), Value}
  271. end
  272. end,
  273. Params = lists:map(F, Parameters),
  274. {binstr:to_lower(Disposition), Params}.
  275. split_body_by_boundary(Body, Boundary, MimeVsn, Options) ->
  276. % find the indices of the first and last boundary
  277. case [binstr:strpos(Body, Boundary), binstr:strpos(Body, list_to_binary([Boundary, "--"]))] of
  278. [0, _] ->
  279. erlang:error(missing_boundary);
  280. [_, 0] ->
  281. erlang:error(missing_last_boundary);
  282. [Start, End] ->
  283. NewBody = binstr:substr(Body, Start + byte_size(Boundary), End - Start),
  284. % from now on, we can be sure that each boundary is preceeded by a CRLF
  285. Parts = split_body_by_boundary_(NewBody, list_to_binary(["\r\n", Boundary]), []),
  286. [decode_component(Headers, Body2, MimeVsn, Options) || {Headers, Body2} <- [V || {_, Body3} = V <- Parts, byte_size(Body3) =/= 0]]
  287. end.
  288. split_body_by_boundary_(<<>>, _Boundary, Acc) ->
  289. lists:reverse(Acc);
  290. split_body_by_boundary_(Body, Boundary, Acc) ->
  291. % trim the incomplete first line
  292. TrimmedBody = binstr:substr(Body, binstr:strpos(Body, "\r\n") + 2),
  293. case binstr:strpos(TrimmedBody, Boundary) of
  294. 0 ->
  295. lists:reverse([{[], TrimmedBody} | Acc]);
  296. Index ->
  297. split_body_by_boundary_(binstr:substr(TrimmedBody, Index + byte_size(Boundary)), Boundary,
  298. [parse_headers(binstr:substr(TrimmedBody, 1, Index - 1)) | Acc])
  299. end.
  300. -spec(parse_headers/1 :: (Body :: binary()) -> {[{binary(), binary()}], binary()}).
  301. %% @doc Parse the headers off of a message and return a list of headers and the trailing body.
  302. parse_headers(Body) ->
  303. case binstr:strpos(Body, "\r\n") of
  304. 0 ->
  305. {[], Body};
  306. 1 ->
  307. {[], binstr:substr(Body, 3)};
  308. Index ->
  309. parse_headers(binstr:substr(Body, Index+2), binstr:substr(Body, 1, Index - 1), [])
  310. end.
  311. parse_headers(Body, <<H, Tail/binary>>, []) when H =:= $\s; H =:= $\t ->
  312. % folded headers
  313. {[], list_to_binary([H, Tail, "\r\n", Body])};
  314. parse_headers(Body, <<H, T/binary>>, Headers) when H =:= $\s; H =:= $\t ->
  315. % folded headers
  316. [{FieldName, OldFieldValue} | OtherHeaders] = Headers,
  317. FieldValue = list_to_binary([OldFieldValue, T]),
  318. %io:format("~p = ~p~n", [FieldName, FieldValue]),
  319. case binstr:strpos(Body, "\r\n") of
  320. 0 ->
  321. {lists:reverse([{FieldName, FieldValue} | OtherHeaders]), Body};
  322. 1 ->
  323. {lists:reverse([{FieldName, FieldValue} | OtherHeaders]), binstr:substr(Body, 3)};
  324. Index2 ->
  325. parse_headers(binstr:substr(Body, Index2 + 2), binstr:substr(Body, 1, Index2 - 1), [{FieldName, FieldValue} | OtherHeaders])
  326. end;
  327. parse_headers(Body, Line, Headers) ->
  328. %io:format("line: ~p, nextpart ~p~n", [Line, binstr:substr(Body, 1, 10)]),
  329. case binstr:strchr(Line, $:) of
  330. 0 ->
  331. {lists:reverse(Headers), list_to_binary([Line, "\r\n", Body])};
  332. Index ->
  333. FieldName = binstr:substr(Line, 1, Index - 1),
  334. F = fun(X) -> X > 32 andalso X < 127 end,
  335. case binstr:all(F, FieldName) of
  336. true ->
  337. F2 = fun(X) -> (X > 31 andalso X < 127) orelse X == 9 end,
  338. FValue = binstr:strip(binstr:substr(Line, Index+1)),
  339. FieldValue = case binstr:all(F2, FValue) of
  340. true ->
  341. FValue;
  342. _ ->
  343. % I couldn't figure out how to use a pure binary comprehension here :(
  344. list_to_binary([ filter_non_ascii(C) || <<C:8>> <= FValue])
  345. end,
  346. case binstr:strpos(Body, "\r\n") of
  347. 0 ->
  348. {lists:reverse([{FieldName, FieldValue} | Headers]), Body};
  349. 1 ->
  350. {lists:reverse([{FieldName, FieldValue} | Headers]), binstr:substr(Body, 3)};
  351. Index2 ->
  352. parse_headers(binstr:substr(Body, Index2 + 2), binstr:substr(Body, 1, Index2 - 1), [{FieldName, FieldValue} | Headers])
  353. end;
  354. false ->
  355. {lists:reverse(Headers), list_to_binary([Line, "\r\n", Body])}
  356. end
  357. end.
  358. filter_non_ascii(C) when (C > 31 andalso C < 127); C == 9 ->
  359. <<C>>;
  360. filter_non_ascii(_C) ->
  361. <<"?">>.
  362. decode_body(Type, Body, _InEncoding, none) ->
  363. decode_body(Type, << <<X/integer>> || <<X>> <= Body, X < 128 >>);
  364. decode_body(Type, Body, undefined, _OutEncoding) ->
  365. decode_body(Type, << <<X/integer>> || <<X>> <= Body, X < 128 >>);
  366. decode_body(Type, Body, InEncoding, OutEncoding) ->
  367. NewBody = decode_body(Type, Body),
  368. CD = case iconv:open(OutEncoding, fix_encoding(InEncoding)) of
  369. {ok, Res} -> Res;
  370. {error, einval} -> throw({bad_charset, fix_encoding(InEncoding)})
  371. end,
  372. {ok, Result} = try iconv:conv_chunked(CD, NewBody) of
  373. {ok, _} = Res2 -> Res2
  374. catch
  375. _:_ ->
  376. iconv:conv(CD, NewBody)
  377. end,
  378. iconv:close(CD),
  379. Result.
  380. -spec(decode_body/2 :: (Type :: binary() | 'undefined', Body :: binary()) -> binary()).
  381. decode_body(undefined, Body) ->
  382. Body;
  383. decode_body(Type, Body) ->
  384. case binstr:to_lower(Type) of
  385. <<"quoted-printable">> ->
  386. decode_quoted_printable(Body);
  387. <<"base64">> ->
  388. decode_base64(Body);
  389. _Other ->
  390. Body
  391. end.
  392. decode_base64(Body) ->
  393. base64:mime_decode(Body).
  394. decode_quoted_printable(Body) ->
  395. case binstr:strpos(Body, "\r\n") of
  396. 0 ->
  397. decode_quoted_printable(Body, <<>>, []);
  398. Index ->
  399. decode_quoted_printable(binstr:substr(Body, 1, Index +1), binstr:substr(Body, Index + 2), [])
  400. end.
  401. decode_quoted_printable(<<>>, <<>>, Acc) ->
  402. list_to_binary(lists:reverse(Acc));
  403. decode_quoted_printable(Line, Rest, Acc) ->
  404. case binstr:strpos(Rest, "\r\n") of
  405. 0 ->
  406. decode_quoted_printable(Rest, <<>>, [decode_quoted_printable_line(Line, []) | Acc]);
  407. Index ->
  408. %io:format("next line ~p~nnext rest ~p~n", [binstr:substr(Rest, 1, Index +1), binstr:substr(Rest, Index + 2)]),
  409. decode_quoted_printable(binstr:substr(Rest, 1, Index +1), binstr:substr(Rest, Index + 2),
  410. [decode_quoted_printable_line(Line, []) | Acc])
  411. end.
  412. decode_quoted_printable_line(<<>>, Acc) ->
  413. lists:reverse(Acc);
  414. decode_quoted_printable_line(<<$\r, $\n>>, Acc) ->
  415. lists:reverse(["\r\n" | Acc]);
  416. decode_quoted_printable_line(<<$=, C, T/binary>>, Acc) when C =:= $\s; C =:= $\t ->
  417. case binstr:all(fun(X) -> X =:= $\s orelse X =:= $\t end, T) of
  418. true ->
  419. lists:reverse(Acc);
  420. false ->
  421. throw(badchar)
  422. end;
  423. decode_quoted_printable_line(<<$=, $\r, $\n>>, Acc) ->
  424. lists:reverse(Acc);
  425. decode_quoted_printable_line(<<$=, A:2/binary, T/binary>>, Acc) ->
  426. %<<X:1/binary, Y:1/binary>> = A,
  427. case binstr:all(fun(C) -> (C >= $0 andalso C =< $9) orelse (C >= $A andalso C =< $F) orelse (C >= $a andalso C =< $f) end, A) of
  428. true ->
  429. {ok, [C | []], []} = io_lib:fread("~16u", binary_to_list(A)),
  430. decode_quoted_printable_line(T, [C | Acc]);
  431. false ->
  432. throw(badchar)
  433. end;
  434. decode_quoted_printable_line(<<$=>>, Acc) ->
  435. % soft newline
  436. lists:reverse(Acc);
  437. decode_quoted_printable_line(<<H, T/binary>>, Acc) when H >= $!, H =< $< ->
  438. decode_quoted_printable_line(T, [H | Acc]);
  439. decode_quoted_printable_line(<<H, T/binary>>, Acc) when H >= $>, H =< $~ ->
  440. decode_quoted_printable_line(T, [H | Acc]);
  441. decode_quoted_printable_line(<<H, T/binary>>, Acc) when H =:= $\s; H =:= $\t ->
  442. % if the rest of the line is whitespace, truncate it
  443. case binstr:all(fun(X) -> X =:= $\s orelse X =:= $\t end, T) of
  444. true ->
  445. lists:reverse(Acc);
  446. false ->
  447. decode_quoted_printable_line(T, [H | Acc])
  448. end;
  449. decode_quoted_printable_line(<<H, T/binary>>, Acc) ->
  450. decode_quoted_printable_line(T, [H| Acc]).
  451. check_headers(Headers) ->
  452. Checked = [<<"MIME-Version">>, <<"Date">>, <<"From">>, <<"Message-ID">>, <<"References">>, <<"Subject">>],
  453. check_headers(Checked, lists:reverse(Headers)).
  454. check_headers([], Headers) ->
  455. lists:reverse(Headers);
  456. check_headers([Header | Tail], Headers) ->
  457. case get_header_value(Header, Headers) of
  458. undefined when Header == <<"MIME-Version">> ->
  459. check_headers(Tail, [{<<"MIME-Version">>, <<"1.0">>} | Headers]);
  460. undefined when Header == <<"Date">> ->
  461. check_headers(Tail, [{<<"Date">>, list_to_binary(smtp_util:rfc5322_timestamp())} | Headers]);
  462. undefined when Header == <<"From">> ->
  463. erlang:error(missing_from);
  464. undefined when Header == <<"Message-ID">> ->
  465. check_headers(Tail, [{<<"Message-ID">>, list_to_binary(smtp_util:generate_message_id())} | Headers]);
  466. undefined when Header == <<"References">> ->
  467. case get_header_value(<<"In-Reply-To">>, Headers) of
  468. undefined ->
  469. check_headers(Tail, Headers); % ok, whatever
  470. ReplyID ->
  471. check_headers(Tail, [{<<"References">>, ReplyID} | Headers])
  472. end;
  473. References when Header == <<"References">> ->
  474. % check if the in-reply-to header, if present, is in references
  475. case get_header_value(<<"In-Reply-To">>, Headers) of
  476. undefined ->
  477. check_headers(Tail, Headers); % ok, whatever
  478. ReplyID ->
  479. case binstr:strpos(binstr:to_lower(References), binstr:to_lower(ReplyID)) of
  480. 0 ->
  481. % okay, tack on the reply-to to the end of References
  482. check_headers(Tail, [{<<"References">>, list_to_binary([References, " ", ReplyID])} | proplists:delete(<<"References">>, Headers)]);
  483. _Index ->
  484. check_headers(Tail, Headers) % nothing to do
  485. end
  486. end;
  487. _ ->
  488. check_headers(Tail, Headers)
  489. end.
  490. ensure_content_headers(Type, SubType, Parameters, Headers, Body, Toplevel) ->
  491. CheckHeaders = [<<"Content-Type">>, <<"Content-Disposition">>, <<"Content-Transfer-Encoding">>],
  492. ensure_content_headers(CheckHeaders, Type, SubType, Parameters, lists:reverse(Headers), Body, Toplevel).
  493. ensure_content_headers([], _, _, Parameters, Headers, _, _) ->
  494. {Parameters, lists:reverse(Headers)};
  495. ensure_content_headers([Header | Tail], Type, SubType, Parameters, Headers, Body, Toplevel) ->
  496. case get_header_value(Header, Headers) of
  497. undefined when Header == <<"Content-Type">>, ((Type == <<"text">> andalso SubType =/= <<"plain">>) orelse Type =/= <<"text">>) ->
  498. % no content-type header, and its not text/plain
  499. CT = io_lib:format("~s/~s", [Type, SubType]),
  500. CTP = case Type of
  501. <<"multipart">> ->
  502. Boundary = case proplists:get_value(<<"boundary">>, proplists:get_value(<<"content-type-params">>, Parameters, [])) of
  503. undefined ->
  504. list_to_binary(smtp_util:generate_message_boundary());
  505. B ->
  506. B
  507. end,
  508. [{<<"boundary">>, Boundary} | proplists:delete(<<"boundary">>, proplists:get_value(<<"content-type-params">>, Parameters, []))];
  509. <<"text">> ->
  510. Charset = case proplists:get_value(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, [])) of
  511. undefined ->
  512. guess_charset(Body);
  513. C ->
  514. C
  515. end,
  516. [{<<"charset">>, Charset} | proplists:delete(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, []))];
  517. _ ->
  518. proplists:get_value(<<"content-type-params">>, Parameters, [])
  519. end,
  520. %CTP = proplists:get_value(<<"content-type-params">>, Parameters, [guess_charset(Body)]),
  521. CTH = binstr:join([CT | encode_parameters(CTP)], ";"),
  522. NewParameters = [{<<"content-type-params">>, CTP} | proplists:delete(<<"content-type-params">>, Parameters)],
  523. ensure_content_headers(Tail, Type, SubType, NewParameters, [{<<"Content-Type">>, CTH} | Headers], Body, Toplevel);
  524. undefined when Header == <<"Content-Type">> ->
  525. % no content-type header and its text/plain
  526. Charset = case proplists:get_value(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, [])) of
  527. undefined ->
  528. guess_charset(Body);
  529. C ->
  530. C
  531. end,
  532. case Charset of
  533. <<"us-ascii">> ->
  534. % the default
  535. ensure_content_headers(Tail, Type, SubType, Parameters, Headers, Body, Toplevel);
  536. _ ->
  537. CTP = [{<<"charset">>, Charset} | proplists:delete(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, []))],
  538. CTH = binstr:join([<<"text/plain">> | encode_parameters(CTP)], ";"),
  539. NewParameters = [{<<"content-type-params">>, CTP} | proplists:delete(<<"content-type-params">>, Parameters)],
  540. ensure_content_headers(Tail, Type, SubType, NewParameters, [{<<"Content-Type">>, CTH} | Headers], Body, Toplevel)
  541. end;
  542. undefined when Header == <<"Content-Transfer-Encoding">>, Type =/= <<"multipart">> ->
  543. Enc = case proplists:get_value(<<"transfer-encoding">>, Parameters) of
  544. undefined ->
  545. guess_best_encoding(Body);
  546. Value ->
  547. Value
  548. end,
  549. case Enc of
  550. <<"7bit">> ->
  551. ensure_content_headers(Tail, Type, SubType, Parameters, Headers, Body, Toplevel);
  552. _ ->
  553. ensure_content_headers(Tail, Type, SubType, Parameters, [{<<"Content-Transfer-Encoding">>, Enc} | Headers], Body, Toplevel)
  554. end;
  555. undefined when Header == <<"Content-Disposition">>, Toplevel == false ->
  556. CD = proplists:get_value(<<"disposition">>, Parameters, <<"inline">>),
  557. CDP = proplists:get_value(<<"disposition-params">>, Parameters, []),
  558. CDH = binstr:join([CD | encode_parameters(CDP)], ";"),
  559. ensure_content_headers(Tail, Type, SubType, Parameters, [{<<"Content-Disposition">>, CDH} | Headers], Body, Toplevel);
  560. _ ->
  561. ensure_content_headers(Tail, Type, SubType, Parameters, Headers, Body, Toplevel)
  562. end.
  563. guess_charset(Body) ->
  564. case binstr:all(fun(X) -> X < 128 end, Body) of
  565. true -> <<"us-ascii">>;
  566. false -> <<"utf-8">>
  567. end.
  568. guess_best_encoding(<<Body:200/binary, Rest/binary>>) when Rest =/= <<>> ->
  569. guess_best_encoding(Body);
  570. guess_best_encoding(Body) ->
  571. Size = byte_size(Body),
  572. % get only the allowed ascii characters
  573. % TODO - this might not be the complete list
  574. FilteredSize = length([X || <<X>> <= Body, ((X > 31 andalso X < 127) orelse X == $\r orelse X == $\n)]),
  575. Percent = round((FilteredSize / Size) * 100),
  576. %based on the % of printable characters, choose an encoding
  577. if
  578. Percent == 100 ->
  579. <<"7bit">>;
  580. Percent > 80 ->
  581. <<"quoted-printable">>;
  582. true ->
  583. <<"base64">>
  584. end.
  585. encode_parameters([[]]) ->
  586. [];
  587. encode_parameters(Parameters) ->
  588. [encode_parameter(Parameter) || Parameter <- Parameters].
  589. encode_parameter({X, Y}) ->
  590. case escape_tspecial(Y, false, <<>>) of
  591. {true, Special} -> [X, $=, $", Special, $"];
  592. false -> [X, $=, Y]
  593. end.
  594. % See also: http://www.ietf.org/rfc/rfc2045.txt section 5.1
  595. escape_tspecial(<<>>, false, _Acc) ->
  596. false;
  597. escape_tspecial(<<>>, IsSpecial, Acc) ->
  598. {IsSpecial, Acc};
  599. escape_tspecial(<<C, Rest/binary>>, _IsSpecial, Acc) when C =:= $" ->
  600. escape_tspecial(Rest, true, <<Acc/binary, $\\, $">>);
  601. escape_tspecial(<<C, Rest/binary>>, _IsSpecial, Acc) when C =:= $\\ ->
  602. escape_tspecial(Rest, true, <<Acc/binary, $\\, $\\>>);
  603. escape_tspecial(<<C, Rest/binary>>, _IsSpecial, Acc)
  604. when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
  605. C =:= $,; C =:= $;; C =:= $:; C =:= $/; C =:= $[;
  606. C =:= $]; C =:= $?; C =:= $=; C =:= $\s ->
  607. escape_tspecial(Rest, true, <<Acc/binary, C>>);
  608. escape_tspecial(<<C, Rest/binary>>, IsSpecial, Acc) ->
  609. escape_tspecial(Rest, IsSpecial, <<Acc/binary, C>>).
  610. encode_headers(Headers) ->
  611. encode_headers(Headers, []).
  612. encode_headers([], EncodedHeaders) ->
  613. EncodedHeaders;
  614. encode_headers([{Key, Value}|T] = _Headers, EncodedHeaders) ->
  615. encode_headers(T, encode_folded_header(list_to_binary([Key,": ",Value]),
  616. EncodedHeaders)).
  617. encode_folded_header(Header, HeaderLines) ->
  618. case binstr:strchr(Header, $;) of
  619. 0 ->
  620. HeaderLines ++ [Header];
  621. Index ->
  622. Remainder = binstr:substr(Header, Index+1),
  623. TabbedRemainder = case Remainder of
  624. <<$\t,_Rest/binary>> ->
  625. Remainder;
  626. _ ->
  627. list_to_binary(["\t", Remainder])
  628. end,
  629. % TODO - not tail recursive
  630. HeaderLines ++ [ binstr:substr(Header, 1, Index) ] ++
  631. encode_folded_header(TabbedRemainder, [])
  632. end.
  633. encode_component(_Type, _SubType, Headers, Params, Body) ->
  634. if
  635. is_list(Body) -> % is this a multipart component?
  636. Boundary = proplists:get_value(<<"boundary">>, proplists:get_value(<<"content-type-params">>, Params)),
  637. [<<>>] ++ % blank line before start of component
  638. lists:flatmap(
  639. fun(Part) ->
  640. [list_to_binary([<<"--">>, Boundary])] ++ % start with the boundary
  641. encode_component_part(Part)
  642. end,
  643. Body
  644. ) ++ [list_to_binary([<<"--">>, Boundary, <<"--">>])] % final boundary (with /--$/)
  645. ++ [<<>>]; % blank line at the end of the multipart component
  646. true -> % or an inline component?
  647. %encode_component_part({Type, SubType, Headers, Params, Body})
  648. encode_body(
  649. get_header_value(<<"Content-Transfer-Encoding">>, Headers),
  650. [Body]
  651. )
  652. end.
  653. encode_component_part(Part) ->
  654. case Part of
  655. {<<"multipart">>, SubType, Headers, PartParams, Body} ->
  656. {FixedParams, FixedHeaders} = ensure_content_headers(<<"multipart">>, SubType, PartParams, Headers, Body, false),
  657. encode_headers(FixedHeaders) ++ [<<>>] ++
  658. encode_component(<<"multipart">>, SubType, FixedHeaders, FixedParams, Body);
  659. {Type, SubType, Headers, PartParams, Body} ->
  660. PartData = case Body of
  661. {_,_,_,_,_} -> encode_component_part(Body);
  662. String -> [String]
  663. end,
  664. {_FixedParams, FixedHeaders} = ensure_content_headers(Type, SubType, PartParams, Headers, Body, false),
  665. encode_headers(FixedHeaders) ++ [<<>>] ++
  666. encode_body(
  667. get_header_value(<<"Content-Transfer-Encoding">>, FixedHeaders),
  668. PartData
  669. );
  670. _ ->
  671. io:format("encode_component_part couldn't match Part to: ~p~n", [Part]),
  672. []
  673. end.
  674. encode_body(undefined, Body) ->
  675. Body;
  676. encode_body(Type, Body) ->
  677. case binstr:to_lower(Type) of
  678. <<"quoted-printable">> ->
  679. [InnerBody] = Body,
  680. encode_quoted_printable(InnerBody);
  681. <<"base64">> ->
  682. [InnerBody] = Body,
  683. wrap_to_76(base64:encode(InnerBody));
  684. _ -> Body
  685. end.
  686. wrap_to_76(String) ->
  687. [wrap_to_76(String, [])].
  688. wrap_to_76(<<>>, Acc) ->
  689. list_to_binary(lists:reverse(Acc));
  690. wrap_to_76(<<Head:76/binary, Tail/binary>>, Acc) ->
  691. wrap_to_76(Tail, [<<"\r\n">>, Head | Acc]);
  692. wrap_to_76(Head, Acc) ->
  693. list_to_binary(lists:reverse([<<"\r\n">>, Head | Acc])).
  694. encode_quoted_printable(Body) ->
  695. [encode_quoted_printable(Body, [], 0)].
  696. encode_quoted_printable(Body, Acc, L) when L >= 75 ->
  697. LastLine = case string:str(Acc, "\n") of
  698. 0 ->
  699. Acc;
  700. Index ->
  701. string:substr(Acc, 1, Index-1)
  702. end,
  703. %Len = length(LastLine),
  704. case string:str(LastLine, " ") of
  705. 0 when L =:= 75 ->
  706. % uh-oh, no convienient whitespace, just cram a soft newline in
  707. encode_quoted_printable(Body, [$\n, $\r, $= | Acc], 0);
  708. 1 when L =:= 75 ->
  709. % whitespace is the last character we wrote
  710. encode_quoted_printable(Body, [$\n, $\r, $= | Acc], 0);
  711. SIndex when (L - 75) < SIndex ->
  712. % okay, we can safely stick some whitespace in
  713. Prefix = string:substr(Acc, 1, SIndex-1),
  714. Suffix = string:substr(Acc, SIndex),
  715. NewAcc = lists:concat([Prefix, "\n\r=", Suffix]),
  716. encode_quoted_printable(Body, NewAcc, 0);
  717. _ ->
  718. % worst case, we're over 75 characters on the line
  719. % and there's no obvious break points, just stick one
  720. % in at position 75 and call it good. However, we have
  721. % to be very careful not to stick the soft newline in
  722. % the middle of an existing quoted-printable escape.
  723. % TODO - fix this to be less stupid
  724. I = 3, % assume we're at most 3 over our cutoff
  725. Prefix = string:substr(Acc, 1, I),
  726. Suffix = string:substr(Acc, I+1),
  727. NewAcc = lists:concat([Prefix, "\n\r=", Suffix]),
  728. encode_quoted_printable(Body, NewAcc, 0)
  729. end;
  730. encode_quoted_printable(<<>>, Acc, _L) ->
  731. list_to_binary(lists:reverse(Acc));
  732. encode_quoted_printable(<<$=, T/binary>> , Acc, L) ->
  733. encode_quoted_printable(T, [$D, $3, $= | Acc], L+3);
  734. encode_quoted_printable(<<$\r, $\n, T/binary>> , Acc, _L) ->
  735. encode_quoted_printable(T, [$\n, $\r | Acc], 0);
  736. encode_quoted_printable(<<H, T/binary>>, Acc, L) when H >= $!, H =< $< ->
  737. encode_quoted_printable(T, [H | Acc], L+1);
  738. encode_quoted_printable(<<H, T/binary>>, Acc, L) when H >= $>, H =< $~ ->
  739. encode_quoted_printable(T, [H | Acc], L+1);
  740. encode_quoted_printable(<<H, $\r, $\n, T/binary>>, Acc, _L) when H == $\s; H == $\t ->
  741. [[A, B]] = io_lib:format("~2.16.0B", [H]),
  742. encode_quoted_printable(T, [$\n, $\r, B, A, $= | Acc], 0);
  743. encode_quoted_printable(<<H, T/binary>>, Acc, L) when H == $\s; H == $\t ->
  744. encode_quoted_printable(T, [H | Acc], L+1);
  745. encode_quoted_printable(<<H, T/binary>>, Acc, L) ->
  746. [[A, B]] = io_lib:format("~2.16.0B", [H]),
  747. encode_quoted_printable(T, [B, A, $= | Acc], L+3).
  748. get_default_encoding() ->
  749. case code:ensure_loaded(iconv) of
  750. {error, _} ->
  751. none;
  752. {module, iconv} ->
  753. <<"utf-8//IGNORE">>
  754. end.
  755. % convert some common invalid character names into the correct ones
  756. fix_encoding(Encoding) when Encoding == <<"utf8">>; Encoding == <<"UTF8">> ->
  757. <<"UTF-8">>;
  758. fix_encoding(Encoding) ->
  759. Encoding.
  760. -ifdef(TEST).
  761. parse_with_comments_test_() ->
  762. [
  763. {"bleh",
  764. fun() ->
  765. ?assertEqual(<<"1.0">>, parse_with_comments(<<"1.0">>)),
  766. ?assertEqual(<<"1.0">>, parse_with_comments(<<"1.0 (produced by MetaSend Vx.x)">>)),
  767. ?assertEqual(<<"1.0">>, parse_with_comments(<<"(produced by MetaSend Vx.x) 1.0">>)),
  768. ?assertEqual(<<"1.0">>, parse_with_comments(<<"1.(produced by MetaSend Vx.x)0">>))
  769. end
  770. },
  771. {"comments that parse as empty",
  772. fun() ->
  773. ?assertEqual(<<>>, parse_with_comments(<<"(comment (nested (deeply)) (and (oh no!) again))">>)),
  774. ?assertEqual(<<>>, parse_with_comments(<<"(\\)\\\\)">>)),
  775. ?assertEqual(<<>>, parse_with_comments(<<"(by way of Whatever <redir@my.org>) (generated by Eudora)">>))
  776. end
  777. },
  778. {"some more",
  779. fun() ->
  780. ?assertEqual(<<":sysmail@ group. org, Muhammed. Ali @Vegas.WBA">>, parse_with_comments(<<"\":sysmail\"@ group. org, Muhammed.(the greatest) Ali @(the)Vegas.WBA">>)),
  781. ?assertEqual(<<"Pete <pete@silly.test>">>, parse_with_comments(<<"Pete(A wonderful \\) chap) <pete(his account)@silly.test(his host)>">>))
  782. end
  783. },
  784. {"non list values",
  785. fun() ->
  786. ?assertEqual(undefined, parse_with_comments(undefined)),
  787. ?assertEqual(17, parse_with_comments(17))
  788. end
  789. },
  790. {"Parens within quotes ignored",
  791. fun() ->
  792. ?assertEqual(<<"Height (from xkcd).eml">>, parse_with_comments(<<"\"Height (from xkcd).eml\"">>)),
  793. ?assertEqual(<<"Height (from xkcd).eml">>, parse_with_comments(<<"\"Height \(from xkcd\).eml\"">>))
  794. end
  795. },
  796. {"Escaped quotes are handled correctly",
  797. fun() ->
  798. ?assertEqual(<<"Hello \"world\"">>, parse_with_comments(<<"Hello \\\"world\\\"">>)),
  799. ?assertEqual(<<"<boss@nil.test>, Giant; \"Big\" Box <sysservices@example.net>">>, parse_with_comments(<<"<boss@nil.test>, \"Giant; \\\"Big\\\" Box\" <sysservices@example.net>">>))
  800. end
  801. },
  802. {"backslash not part of a quoted pair",
  803. fun() ->
  804. ?assertEqual(<<"AC \\ DC">>, parse_with_comments(<<"AC \\ DC">>)),
  805. ?assertEqual(<<"AC DC">>, parse_with_comments(<<"AC ( \\ ) DC">>))
  806. end
  807. },
  808. {"Unterminated quotes or comments",
  809. fun() ->
  810. ?assertError(unterminated_quotes, parse_with_comments(<<"\"Hello there ">>)),
  811. ?assertError(unterminated_quotes, parse_with_comments(<<"\"Hello there \\\"">>)),
  812. ?assertError(unterminated_comment, parse_with_comments(<<"(Hello there ">>)),
  813. ?assertError(unterminated_comment, parse_with_comments(<<"(Hello there \\\)">>))
  814. end
  815. }
  816. ].
  817. parse_content_type_test_() ->
  818. [
  819. {"parsing content types",
  820. fun() ->
  821. ?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"text/plain; charset=us-ascii (Plain text)">>)),
  822. ?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"text/plain; charset=\"us-ascii\"">>)),
  823. ?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"Text/Plain; Charset=\"us-ascii\"">>)),
  824. ?assertEqual({<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"----_=_NextPart_001_01C9DCAE.1F2CB390">>}]},
  825. parse_content_type(<<"multipart/mixed; boundary=\"----_=_NextPart_001_01C9DCAE.1F2CB390\"">>))
  826. end
  827. },
  828. {"parsing content type with a tab in it",
  829. fun() ->
  830. ?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"text/plain;\tcharset=us-ascii">>)),
  831. ?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}, {<<"foo">>, <<"bar">>}]}, parse_content_type(<<"text/plain;\tcharset=us-ascii;\tfoo=bar">>))
  832. end
  833. },
  834. {"invalid content types",
  835. fun() ->
  836. ?assertThrow(bad_content_type, parse_content_type(<<"text\\plain; charset=us-ascii">>)),
  837. ?assertThrow(bad_content_type, parse_content_type(<<"text/plain; charset us-ascii">>))
  838. end
  839. }
  840. ].
  841. parse_content_disposition_test_() ->
  842. [
  843. {"parsing valid dispositions",
  844. fun() ->
  845. ?assertEqual({<<"inline">>, []}, parse_content_disposition(<<"inline">>)),
  846. ?assertEqual({<<"inline">>, []}, parse_content_disposition(<<"inline;">>)),
  847. ?assertEqual({<<"attachment">>, [{<<"filename">>, <<"genome.jpeg">>}, {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}]}, parse_content_disposition(<<"attachment; filename=genome.jpeg;modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>)),
  848. ?assertEqual({<<"text/plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_disposition(<<"text/plain; charset=us-ascii (Plain text)">>))
  849. end
  850. },
  851. {"invalid dispositions",
  852. fun() ->
  853. ?assertThrow(bad_disposition, parse_content_disposition(<<"inline; =bar">>)),
  854. ?assertThrow(bad_disposition, parse_content_disposition(<<"inline; bar">>))
  855. end
  856. }
  857. ].
  858. various_parsing_test_() ->
  859. [
  860. {"split_body_by_boundary test",
  861. fun() ->
  862. ?assertEqual([{[], <<"foo bar baz">>}], split_body_by_boundary_(<<"stuff\r\nfoo bar baz">>, <<"--bleh">>, [])),
  863. ?assertEqual([{[], <<"foo\r\n">>}, {[], <<>>}, {[], <<>>}, {[], <<"bar baz">>}], split_body_by_boundary_(<<"stuff\r\nfoo\r\n--bleh\r\n--bleh\r\n--bleh-- stuff\r\nbar baz">>, <<"--bleh">>, [])),
  864. %?assertEqual([{[], []}, {[], []}, {[], "bar baz"}], split_body_by_boundary_("\r\n--bleh\r\n--bleh\r\n", "--bleh", [])),
  865. %?assertMatch([{"text", "plain", [], _,"foo\r\n"}], split_body_by_boundary("stuff\r\nfoo\r\n--bleh\r\n--bleh\r\n--bleh-- stuff\r\nbar baz", "--bleh", "1.0"))
  866. ?assertEqual({[], <<"foo: bar\r\n">>}, parse_headers(<<"\r\nfoo: bar\r\n">>)),
  867. ?assertEqual({[{<<"foo">>, <<"barbaz">>}], <<>>}, parse_headers(<<"foo: bar\r\n baz\r\n">>)),
  868. ?assertEqual({[], <<" foo bar baz\r\nbam">>}, parse_headers(<<"\sfoo bar baz\r\nbam">>)),
  869. ok
  870. end
  871. },
  872. {"Headers with non-ASCII characters",
  873. fun() ->
  874. ?assertEqual({[{<<"foo">>, <<"bar ?? baz">>}], <<>>}, parse_headers(<<"foo: bar ø baz\r\n">>)),
  875. ?assertEqual({[], <<"bär: bar baz\r\n">>}, parse_headers(<<"bär: bar baz\r\n">>))
  876. end
  877. },
  878. {"Headers with tab characters",
  879. fun() ->
  880. ?assertEqual({[{<<"foo">>, <<"bar baz">>}], <<>>}, parse_headers(<<"foo: bar baz\r\n">>))
  881. end
  882. }
  883. ].
  884. -define(IMAGE_MD5, <<110,130,37,247,39,149,224,61,114,198,227,138,113,4,198,60>>).
  885. parse_example_mails_test_() ->
  886. Getmail = fun(File) ->
  887. {ok, Email} = file:read_file(string:concat("../testdata/", File)),
  888. %Email = binary_to_list(Bin),
  889. decode(Email)
  890. end,
  891. [
  892. {"parse a plain text email",
  893. fun() ->
  894. Decoded = Getmail("Plain-text-only.eml"),
  895. ?assertEqual(5, tuple_size(Decoded)),
  896. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  897. ?assertEqual({<<"text">>, <<"plain">>}, {Type, SubType}),
  898. ?assertEqual(<<"This message contains only plain text.\r\n">>, Body)
  899. end
  900. },
  901. {"parse a plain text email with no content type",
  902. fun() ->
  903. Decoded = Getmail("Plain-text-only-no-content-type.eml"),
  904. ?assertEqual(5, tuple_size(Decoded)),
  905. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  906. ?assertEqual({<<"text">>, <<"plain">>}, {Type, SubType}),
  907. ?assertEqual(<<"This message contains only plain text.\r\n">>, Body)
  908. end
  909. },
  910. {"parse a plain text email with no MIME header",
  911. fun() ->
  912. {Type, SubType, _Headers, _Properties, Body} =
  913. Getmail("Plain-text-only-no-MIME.eml"),
  914. ?assertEqual({<<"text">>, <<"plain">>}, {Type, SubType}),
  915. ?assertEqual(<<"This message contains only plain text.\r\n">>, Body)
  916. end
  917. },
  918. {"parse an email that says it is multipart but contains no boundaries",
  919. fun() ->
  920. ?assertError(missing_boundary, Getmail("Plain-text-only-with-boundary-header.eml"))
  921. end
  922. },
  923. {"parse a multipart email with no MIME header",
  924. fun() ->
  925. ?assertError(non_mime_multipart, Getmail("rich-text-no-MIME.eml"))
  926. end
  927. },
  928. {"rich text",
  929. fun() ->
  930. %% pardon my naming here. apparently 'rich text' in mac mail
  931. %% means 'html'.
  932. Decoded = Getmail("rich-text.eml"),
  933. ?assertEqual(5, tuple_size(Decoded)),
  934. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  935. ?assertEqual({<<"multipart">>, <<"alternative">>}, {Type, SubType}),
  936. ?assertEqual(2, length(Body)),
  937. [Plain, Html] = Body,
  938. ?assertEqual({5, 5}, {tuple_size(Plain), tuple_size(Html)}),
  939. ?assertMatch({<<"text">>, <<"plain">>, _, _, <<"This message contains rich text.">>}, Plain),
  940. ?assertMatch({<<"text">>, <<"html">>, _, _, <<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This </b><i>message </i><span class=\"Apple-style-span\" style=\"text-decoration: underline;\">contains </span>rich text.</body></html>">>}, Html)
  941. end
  942. },
  943. {"rich text no boundary",
  944. fun() ->
  945. ?assertError(no_boundary, Getmail("rich-text-no-boundary.eml"))
  946. end
  947. },
  948. {"rich text missing first boundary",
  949. fun() ->
  950. % TODO - should we handle this more elegantly?
  951. Decoded = Getmail("rich-text-missing-first-boundary.eml"),
  952. ?assertEqual(5, tuple_size(Decoded)),
  953. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  954. ?assertEqual({<<"multipart">>, <<"alternative">>}, {Type, SubType}),
  955. ?assertEqual(1, length(Body)),
  956. [Html] = Body,
  957. ?assertEqual(5, tuple_size(Html)),
  958. ?assertMatch({<<"text">>, <<"html">>, _, _, <<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This </b><i>message </i><span class=\"Apple-style-span\" style=\"text-decoration: underline;\">contains </span>rich text.</body></html>">>}, Html)
  959. end
  960. },
  961. {"rich text missing last boundary",
  962. fun() ->
  963. ?assertError(missing_last_boundary, Getmail("rich-text-missing-last-boundary.eml"))
  964. end
  965. },
  966. {"rich text wrong last boundary",
  967. fun() ->
  968. ?assertError(missing_last_boundary, Getmail("rich-text-broken-last-boundary.eml"))
  969. end
  970. },
  971. {"rich text missing text content type",
  972. fun() ->
  973. %% pardon my naming here. apparently 'rich text' in mac mail
  974. %% means 'html'.
  975. Decoded = Getmail("rich-text-no-text-contenttype.eml"),
  976. ?assertEqual(5, tuple_size(Decoded)),
  977. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  978. ?assertEqual({<<"multipart">>, <<"alternative">>}, {Type, SubType}),
  979. ?assertEqual(2, length(Body)),
  980. [Plain, Html] = Body,
  981. ?assertEqual({5, 5}, {tuple_size(Plain), tuple_size(Html)}),
  982. ?assertMatch({<<"text">>, <<"plain">>, _, _, <<"This message contains rich text.">>}, Plain),
  983. ?assertMatch({<<"text">>, <<"html">>, _, _, <<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This </b><i>message </i><span class=\"Apple-style-span\" style=\"text-decoration: underline;\">contains </span>rich text.</body></html>">>}, Html)
  984. end
  985. },
  986. {"text attachment only",
  987. fun() ->
  988. Decoded = Getmail("text-attachment-only.eml"),
  989. ?assertEqual(5, tuple_size(Decoded)),
  990. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  991. ?assertEqual({<<"multipart">>, <<"mixed">>}, {Type, SubType}),
  992. ?assertEqual(1, length(Body)),
  993. Rich = <<"{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf460\r\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\r\n{\\colortbl;\\red255\\green255\\blue255;}\r\n\\margl1440\\margr1440\\vieww9000\\viewh8400\\viewkind0\r\n\\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\ql\\qnatural\\pardirnatural\r\n\r\n\\f0\\fs24 \\cf0 This is a basic rtf file.}">>,
  994. ?assertMatch([{<<"text">>, <<"rtf">>, _, _, Rich}], Body)
  995. end
  996. },
  997. {"image attachment only",
  998. fun() ->
  999. Decoded = Getmail("image-attachment-only.eml"),
  1000. ?assertEqual(5, tuple_size(Decoded)),
  1001. {Type, SubType, _Headers, _Properties, Body} = Decoded,
  1002. ?assertEqual({<<"multipart">>, <<"mixed">>}, {Type, SubType}),
  1003. ?assertEqual(1, length(Body)),
  1004. ?assertMatch([{<<"image">>, <<"jpeg">>, _, _, _}], Body),
  1005. [H | _] = Body,
  1006. [{<<"image">>, <<"jpeg">>, _, Parameters, _Image}] = Body,
  1007. ?assertEqual(?IMAGE_MD5, erlang:md5(element(5, H))),
  1008. ?assertEqual(<<"inline">>, proplists:get_value(<<"disposition">>, Parameters)),
  1009. ?assertEqual(<<"chili-pepper.jpg">>, proplists:get_value(<<"filename">>, proplists:get_value(<<"disposition-params">>, Parameters))),
  1010. ?assertEqual(<<"chili-pepper.jpg">>, proplists:get_value(<<"name">>, proplists:get_value(<<"content-type-params">>, Parameters)))
  1011. end
  1012. },
  1013. {"message attachment only",
  1014. fun() ->
  1015. Decoded = Getmail("message-as-attachment.eml"),
  1016. ?assertMatch({<<"multipart">>, <<"mixed">>, _, _, _}, Decoded),
  1017. [Body] = element(5, Decoded),
  1018. ?assertMatch({<<"message">>, <<"rfc822">>, _, _, _}, Body),
  1019. Subbody = element(5, Body),
  1020. ?assertMatch({<<"text">>, <<"plain">>, _, _, _}, Subbody),
  1021. ?assertEqual(<<"This message contains only plain text.\r\n">>, element(5, Subbody))
  1022. end
  1023. },
  1024. {"message, image, and rtf attachments.",
  1025. fun() ->
  1026. Decoded = Getmail("message-image-text-attachments.eml"),
  1027. ?assertMatch({<<"multipart">>, <<"mixed">>, _, _, _}, Decoded),
  1028. ?assertEqual(3, length(element(5, Decoded))),
  1029. [Message, Rtf, Image] = element(5, Decoded),
  1030. ?assertMatch({<<"message">>, <<"rfc822">>, _, _, _}, Message),
  1031. Submessage = element(5, Message),
  1032. ?assertMatch({<<"text">>, <<"plain">>, _, _, <<"This message contains only plain text.\r\n">>}, Submessage),
  1033. ?assertMatch({<<"text">>, <<"rtf">>, _, _, _}, Rtf),
  1034. ?assertEqual(<<"{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf460\r\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\r\n{\\colortbl;\\red255\\green255\\blue255;}\r\n\\margl1440\\margr1440\\vieww9000\\viewh8400\\viewkind0\r\n\\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\ql\\qnatural\\pardirnatural\r\n\r\n\\f0\\fs24 \\cf0 This is a basic rtf file.}">>, element(5, Rtf)),
  1035. ?assertMatch({<<"image">>, <<"jpeg">>, _, _, _}, Image),
  1036. ?assertEqual(?IMAGE_MD5, erlang:md5(element(5, Image)))
  1037. end
  1038. },
  1039. {"Outlook 2007 with leading tabs in quoted-printable.",
  1040. fun() ->
  1041. Decoded = Getmail("outlook-2007.eml"),
  1042. ?assertMatch({<<"multipart">>, <<"alternative">>, _, _, _}, Decoded)
  1043. end
  1044. },
  1045. {"The gamut",
  1046. fun() ->
  1047. % multipart/alternative
  1048. % text/plain
  1049. % multipart/mixed
  1050. % text/html
  1051. % message/rf822
  1052. % multipart/mixed
  1053. % message/rfc822
  1054. % text/plain
  1055. % text/html
  1056. % message/rtc822
  1057. % text/plain
  1058. % text/html
  1059. % image/jpeg
  1060. % text/html
  1061. % text/rtf
  1062. % text/html
  1063. Decoded = Getmail("the-gamut.eml"),
  1064. ?assertMatch({<<"multipart">>, <<"alternative">>, _, _, _}, Decoded),
  1065. ?assertEqual(2, length(element(5, Decoded))),
  1066. [Toptext, Topmultipart] = element(5, Decoded),
  1067. ?assertMatch({<<"text">>, <<"plain">>, _, _, _}, Toptext),
  1068. ?assertEqual(<<"This is rich text.\r\n\r\nThe list is html.\r\n\r\nAttchments:\r\nan email containing an attachment of an email.\r\nan email of only plain text.\r\nan image\r\nan rtf file.\r\n">>, element(5, Toptext)),
  1069. ?assertEqual(9, length(element(5, Topmultipart))),
  1070. [Html, Messagewithin, _Brhtml, _Message, _Brhtml, Image, _Brhtml, Rtf, _Brhtml] = element(5, Topmultipart),
  1071. ?assertMatch({<<"text">>, <<"html">>, _, _, _}, Html),
  1072. ?assertEqual(<<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This</b> is <i>rich</i> text.<div><br></div><div>The list is html.</div><div><br></div><div>Attchments:</div><div><ul class=\"MailOutline\"><li>an email containing an attachment of an email.</li><li>an email of only plain text.</li><li>an image</li><li>an rtf file.</li></ul></div><div></div></body></html>">>, element(5, Html)),
  1073. ?assertMatch({<<"message">>, <<"rfc822">>, _, _, _}, Messagewithin),
  1074. %?assertEqual(1, length(element(5, Messagewithin))),
  1075. ?assertMatch({<<"multipart">>, <<"mixed">>, _, _, [{<<"message">>, <<"rfc822">>, _, _, {<<"text">>, <<"plain">>, _, _, <<"This message contains only plain text.\r\n">>}}]}, element(5, Messagewithin)),
  1076. ?assertMatch({<<"image">>, <<"jpeg">>, _, _, _}, Image),
  1077. ?assertEqual(?IMAGE_MD5, erlang:md5(element(5, Image))),
  1078. ?assertMatch({<<"text">>, <<"rtf">>, _, _, _}, Rtf),
  1079. ?assertEqual(<<"{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf460\r\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\r\n{\\colortbl;\\red255\\green255\\blue255;}\r\n\\margl1440\\margr1440\\vieww9000\\viewh8400\\viewkind0\r\n\\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\ql\\qnatural\\pardirnatural\r\n\r\n\\f0\\fs24 \\cf0 This is a basic rtf file.}">>, element(5, Rtf))
  1080. end
  1081. },
  1082. {"Plain text and 2 identical attachments",
  1083. fun() ->
  1084. Decoded = Getmail("plain-text-and-two-identical-attachments.eml"),
  1085. ?assertMatch({<<"multipart">>, <<"mixed">>, _, _, _}, Decoded),
  1086. ?assertEqual(3, length(element(5, Decoded))),
  1087. [Plain, Attach1, Attach2] = element(5, Decoded),
  1088. ?assertEqual(Attach1, Attach2),
  1089. ?assertMatch({<<"text">>, <<"plain">>, _, _, _}, Plain),
  1090. ?assertEqual(<<"This message contains only plain text.\r\n">>, element(5, Plain))
  1091. end
  1092. },
  1093. {"no \\r\\n before first boundary",
  1094. fun() ->
  1095. {ok, Bin} = file:read_file("../testdata/html.eml"),
  1096. Decoded = decode(Bin),
  1097. ?assertEqual(2, length(element(5, Decoded)))
  1098. end
  1099. },
  1100. {"testcase1",
  1101. fun() ->
  1102. Multipart = <<"multipart">>,
  1103. Alternative = <<"alternative">>,
  1104. Related = <<"related">>,
  1105. Mixed = <<"mixed">>,
  1106. Text = <<"text">>,
  1107. Html = <<"html">>,
  1108. Plain = <<"plain">>,
  1109. Message = <<"message">>,
  1110. Ref822 = <<"rfc822">>,
  1111. Image = <<"image">>,
  1112. Jpeg = <<"jpeg">>,
  1113. %Imagemd5 = <<69,175,198,78,52,72,6,233,147,22,50,137,128,180,169,50>>,
  1114. Imagemd5 = <<179,151,42,139,78,14,182,78,24,160,123,221,217,14,141,5>>,
  1115. Decoded = Getmail("testcase1"),
  1116. ?assertMatch({Multipart, Mixed, _, _, [_, _]}, Decoded),
  1117. [Multi1, Message1] = element(5, Decoded),
  1118. ?assertMatch({Multipart, Alternative, _, _, [_, _]}, Multi1),
  1119. [Plain1, Html1] = element(5, Multi1),
  1120. ?assertMatch({Text, Plain, _, _, _}, Plain1),
  1121. ?assertMatch({Text, Html, _, _, _}, Html1),
  1122. ?assertMatch({Message, Ref822, _, _, _}, Message1),
  1123. Multi2 = element(5, Message1),
  1124. ?assertMatch({Multipart, Alternative, _, _, [_, _]}, Multi2),
  1125. [Plain2, Related1] = element(5, Multi2),
  1126. ?assertMatch({Text, Plain, _, _, _}, Plain2),
  1127. ?assertMatch({Multipart, Related, _, _, [_, _]}, Related1),
  1128. [Html2, Image1] = element(5, Related1),
  1129. ?assertMatch({Text, Html, _, _, _}, Html2),
  1130. ?assertMatch({Image, Jpeg, _, _, _}, Image1),
  1131. Resimage = erlang:md5(element(5, Image1)),
  1132. ?assertEqual(Imagemd5, Resimage)
  1133. end
  1134. },
  1135. {"testcase2",
  1136. fun() ->
  1137. Multipart = <<"multipart">>,
  1138. Alternative = <<"alternative">>,
  1139. Mixed = <<"mixed">>,
  1140. Text = <<"text">>,
  1141. Html = <<"html">>,
  1142. Plain = <<"plain">>,
  1143. Message = <<"message">>,
  1144. Ref822 = <<"rfc822">>,
  1145. Application = <<"application">>,
  1146. Octetstream = <<"octet-stream">>,
  1147. Decoded = Getmail("testcase2"),
  1148. ?assertMatch({Multipart, Mixed, _, _, [_, _, _]}, Decoded),
  1149. [Plain1, Stream1, Message1] = element(5, Decoded),
  1150. ?assertMatch({Text, Plain, _, _, _}, Plain1),
  1151. ?assertMatch({Application, Octetstream, _, _, _}, Stream1),
  1152. ?assertMatch({Message, Ref822, _, _, _}, Message1),
  1153. Multi1 = element(5, Message1),
  1154. ?assertMatch({Multipart, Alternative, _, _, [_, _]}, Multi1),
  1155. [Plain2, Html1] = element(5, Multi1),
  1156. ?assertMatch({Text, Plain, _, _, _}, Plain2),
  1157. ?assertMatch({Text, Html, _, _, _}, Html1)
  1158. end
  1159. }
  1160. ].
  1161. decode_quoted_printable_test_() ->
  1162. [
  1163. {"bleh",
  1164. fun() ->
  1165. ?assertEqual("!", decode_quoted_printable_line(<<"=21">>, "")),
  1166. ?assertEqual("!!", decode_quoted_printable_line(<<"=21=21">>, "")),
  1167. ?assertEqual("=:=", decode_quoted_printable_line(<<"=3D:=3D">>, "")),
  1168. ?assertEqual("Thequickbrownfoxjumpedoverthelazydog.", decode_quoted_printable_line(<<"Thequickbrownfoxjumpedoverthelazydog.">>, ""))
  1169. end
  1170. },
  1171. {"lowercase bleh",
  1172. fun() ->
  1173. ?assertEqual("=:=", decode_quoted_printable_line(<<"=3d:=3d">>, ""))
  1174. end
  1175. },
  1176. {"input with spaces",
  1177. fun() ->
  1178. ?assertEqual("The quick brown fox jumped over the lazy dog.", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog.">>, ""))
  1179. end
  1180. },
  1181. {"input with tabs",
  1182. fun() ->
  1183. ?assertEqual("The\tquick brown fox jumped over\tthe lazy dog.", decode_quoted_printable_line(<<"The\tquick brown fox jumped over\tthe lazy dog.">>, ""))
  1184. end
  1185. },
  1186. {"input with trailing spaces",
  1187. fun() ->
  1188. ?assertEqual("The quick brown fox jumped over the lazy dog.", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog. ">>, ""))
  1189. end
  1190. },
  1191. {"input with non-strippable trailing whitespace",
  1192. fun() ->
  1193. ?assertEqual("The quick brown fox jumped over the lazy dog. ", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog. =20">>, "")),
  1194. ?assertEqual("The quick brown fox jumped over the lazy dog. \t", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog. =09">>, "")),
  1195. ?assertEqual("The quick brown fox jumped over the lazy dog.\t \t \t \t ", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog.\t \t \t =09=20">>, "")),
  1196. ?assertEqual("The quick brown fox jumped over the lazy dog.\t \t \t \t ", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog.\t \t \t =09=20\t \t">>, ""))
  1197. end
  1198. },
  1199. {"input with trailing tabs",
  1200. fun() ->
  1201. ?assertEqual("The quick brown fox jumped over the lazy dog.", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog.\t\t\t\t\t">>, ""))
  1202. end
  1203. },
  1204. {"soft new line",
  1205. fun() ->
  1206. ?assertEqual("The quick brown fox jumped over the lazy dog. ", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog. =">>, ""))
  1207. end
  1208. },
  1209. {"soft new line with trailing whitespace",
  1210. fun() ->
  1211. ?assertEqual("The quick brown fox jumped over the lazy dog. ", decode_quoted_printable_line(<<"The quick brown fox jumped over the lazy dog. = ">>, ""))
  1212. end
  1213. },
  1214. {"multiline stuff",
  1215. fun() ->
  1216. ?assertEqual(<<"Now's the time for all folk to come to the aid of their country.">>, decode_quoted_printable(<<"Now's the time =\r\nfor all folk to come=\r\n to the aid of their country.">>)),
  1217. ?assertEqual(<<"Now's the time\r\nfor all folk to come\r\n to the aid of their country.">>, decode_quoted_printable(<<"Now's the time\r\nfor all folk to come\r\n to the aid of their country.">>)),
  1218. ?assertEqual(<<"hello world">>, decode_quoted_printable(<<"hello world">>)),
  1219. ?assertEqual(<<"hello\r\n\r\nworld">>, decode_quoted_printable(<<"hello\r\n\r\nworld">>))
  1220. end
  1221. },
  1222. {"invalid input",
  1223. fun() ->
  1224. ?assertThrow(badchar, decode_quoted_printable_line(<<"=21=G1">>, "")),
  1225. ?assertThrow(badchar, decode_quoted_printable(<<"=21=D1 = g ">>))
  1226. end
  1227. },
  1228. {"out of range characters should be stripped",
  1229. fun() ->
  1230. % character 150 is en-dash in windows 1252
  1231. ?assertEqual(<<"Foo bar">>, decode_body(<<"quoted-printable">>, <<"Foo ", 150, " bar">>, "US-ASCII", "UTF-8//IGNORE"))
  1232. end
  1233. },
  1234. {"out of range character in alternate charset should be converted",
  1235. fun() ->
  1236. % character 150 is en-dash in windows 1252
  1237. ?assertEqual(<<"Foo ", 226, 128, 147, " bar">>, decode_body(<<"quoted-printable">>, <<"Foo ",150," bar">>, "Windows-1252", "UTF-8//IGNORE"))
  1238. end
  1239. },
  1240. {"out of range character in alternate charset with no destination encoding should be stripped",
  1241. fun() ->
  1242. % character 150 is en-dash in windows 1252
  1243. ?assertEqual(<<"Foo bar">>, decode_body(<<"quoted-printable">>, <<"Foo ",150," bar">>, "Windows-1252", none))
  1244. end
  1245. },
  1246. {"out of range character in alternate charset with no source encoding should be stripped",
  1247. fun() ->
  1248. % character 150 is en-dash in windows 1252
  1249. ?assertEqual(<<"Foo bar">>, decode_body(<<"quoted-printable">>, <<"Foo ",150," bar">>, undefined, "UTF-8"))
  1250. end
  1251. },
  1252. {"almost correct chatsets should work, eg. 'UTF8' instead of 'UTF-8'",
  1253. fun() ->
  1254. % character 150 is en-dash in windows 1252
  1255. ?assertEqual(<<"Foo bar">>, decode_body(<<"quoted-printable">>, <<"Foo bar">>, <<"UTF8">>, "UTF-8")),
  1256. ?assertEqual(<<"Foo bar">>, decode_body(<<"quoted-printable">>, <<"Foo bar">>, <<"utf8">>, "UTF-8"))
  1257. end
  1258. }
  1259. ].
  1260. encode_quoted_printable_test_() ->
  1261. [
  1262. {"bleh",
  1263. fun() ->
  1264. ?assertEqual(<<"!">>, encode_quoted_printable(<<"!">>, [], 0)),
  1265. ?assertEqual(<<"!!">>, encode_quoted_printable(<<"!!">>, [], 0)),
  1266. ?assertEqual(<<"=3D:=3D">>, encode_quoted_printable(<<"=:=">>, [], 0)),
  1267. ?assertEqual(<<"Thequickbrownfoxjumpedoverthelazydog.">>,
  1268. encode_quoted_printable(<<"Thequickbrownfoxjumpedoverthelazydog.">>, [], 0))
  1269. end
  1270. },
  1271. {"input with spaces",
  1272. fun() ->
  1273. ?assertEqual(<<"The quick brown fox jumped over the lazy dog.">>,
  1274. encode_quoted_printable(<<"The quick brown fox jumped over the lazy dog.">>, "", 0))
  1275. end
  1276. },
  1277. {"input with tabs",
  1278. fun() ->
  1279. ?assertEqual(<<"The\tquick brown fox jumped over\tthe lazy dog.">>,
  1280. encode_quoted_printable(<<"The\tquick brown fox jumped over\tthe lazy dog.">>, "", 0))
  1281. end
  1282. },
  1283. {"input with trailing spaces",
  1284. fun() ->
  1285. ?assertEqual(<<"The quick brown fox jumped over the lazy dog. =20\r\n">>,
  1286. encode_quoted_printable(<<"The quick brown fox jumped over the lazy dog. \r\n">>, "", 0))
  1287. end
  1288. },
  1289. {"input with non-ascii characters",
  1290. fun() ->
  1291. ?assertEqual(<<"There's some n=F8n-=E1scii st=FCff in here\r\n">>,
  1292. encode_quoted_printable(<<"There's some n", 248, "n-", 225,"scii st", 252, "ff in here\r\n">>, "", 0))
  1293. end
  1294. },
  1295. {"input with invisible non-ascii characters",
  1296. fun() ->
  1297. ?assertEqual(<<"There's some stuff=C2=A0in=C2=A0here\r\n">>,
  1298. encode_quoted_printable(<<"There's some stuff in here\r\n">>, "", 0))
  1299. end
  1300. },
  1301. {"add soft newlines",
  1302. fun() ->
  1303. ?assertEqual(<<"The quick brown fox jumped over the lazy dog. The quick brown fox jumped =\r\nover the lazy dog.">>,
  1304. encode_quoted_printable(<<"The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.">>, "", 0)),
  1305. ?assertEqual(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_ov=\r\ner_the_lazy_dog.">>,
  1306. encode_quoted_printable(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_over_the_lazy_dog.">>, "", 0)),
  1307. ?assertEqual(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_o=\r\n=3Dver_the_lazy_dog.">>,
  1308. encode_quoted_printable(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_o=ver_the_lazy_dog.">>, "", 0)),
  1309. ?assertEqual(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_=\r\n=3Dover_the_lazy_dog.">>,
  1310. encode_quoted_printable(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_=over_the_lazy_dog.">>, "", 0)),
  1311. ?assertEqual(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_o =\r\nver_the_lazy_dog.">>,
  1312. encode_quoted_printable(<<"The_quick_brown_fox_jumped_over_the_lazy_dog._The_quick_brown_fox_jumped_o ver_the_lazy_dog.">>, "", 0))
  1313. end
  1314. },
  1315. {"newline craziness",
  1316. fun() ->
  1317. ?assertEqual(<<"foo ba=\r\nr\r\nThe quick brown fox jumped over the lazy dog. =20\r\n">>,
  1318. encode_quoted_printable(<<"The quick brown fox jumped over the lazy dog. \r\n">>, "\n\rrab oof", 78))
  1319. end
  1320. }
  1321. ].
  1322. encode_parameter_test_() ->
  1323. [
  1324. {"Token",
  1325. fun() ->
  1326. ?assertEqual([[<<"a">>, $=, <<"abcdefghijklmnopqrstuvwxyz$%&*#!">>]],
  1327. encode_parameters([{<<"a">>, <<"abcdefghijklmnopqrstuvwxyz$%&*#!">>}]))
  1328. end
  1329. },
  1330. {"TSpecial",
  1331. fun() ->
  1332. Special = " ()<>@,;:/[]?=",
  1333. [
  1334. ?assertEqual([[<<"a">>, $=, $", <<C>>, $"]], encode_parameters([{<<"a">>, <<C>>}]))
  1335. || C <- Special
  1336. ],
  1337. ?assertEqual([[<<"a">>, $=, $", <<$\\,$">>, $"]], encode_parameters([{<<"a">>, <<$">>}])),
  1338. ?assertEqual([[<<"a">>, $=, $", <<$\\,$\\>>, $"]], encode_parameters([{<<"a">>, <<$\\>>}]))
  1339. end
  1340. }
  1341. ].
  1342. rfc2047_decode_test_() ->
  1343. [
  1344. {"Simple tests",
  1345. fun() ->
  1346. ?assertEqual(<<"Keith Moore <moore@cs.utk.edu>">>, decode_header(<<"=?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>">>, "utf-8")),
  1347. ?assertEqual(<<"Keld Jørn Simonsen <keld@dkuug.dk>">>, decode_header(<<"=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>">>, "utf-8")),
  1348. ?assertEqual(<<"Olle Järnefors <ojarnef@admin.kth.se>">>, decode_header(<<"=?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>">>, "utf-8")),
  1349. ?assertEqual(<<"André Pirard <PIRARD@vm1.ulg.ac.be>">>, decode_header(<<"=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>">>, "utf-8"))
  1350. end
  1351. },
  1352. {"encoded words seperated by whitespace should have whitespace removed",
  1353. fun() ->
  1354. ?assertEqual(<<"If you can read this you understand the example.">>, decode_header(<<"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=">>, "utf-8")),
  1355. ?assertEqual(<<"ab">>, decode_header(<<"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=">>, "utf-8")),
  1356. ?assertEqual(<<"ab">>, decode_header(<<"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=">>, "utf-8")),
  1357. ?assertEqual(<<"ab">>, decode_header(<<"=?ISO-8859-1?Q?a?=
  1358. =?ISO-8859-1?Q?b?=">>, "utf-8"))
  1359. end
  1360. },
  1361. {"underscores expand to spaces",
  1362. fun() ->
  1363. ?assertEqual(<<"a b">>, decode_header(<<"=?ISO-8859-1?Q?a_b?=">>, "utf-8")),
  1364. ?assertEqual(<<"a b">>, decode_header(<<"=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=">>, "utf-8"))
  1365. end
  1366. },
  1367. {"edgecases",
  1368. fun() ->
  1369. ?assertEqual(<<"this is some text">>, decode_header(<<"=?iso-8859-1?q?this=20is=20some=20text?=">>, "utf-8")),
  1370. ?assertEqual(<<"=?iso-8859-1?q?this is some text?=">>, decode_header(<<"=?iso-8859-1?q?this is some text?=">>, "utf-8"))
  1371. end
  1372. },
  1373. {"invalid character sequence handling",
  1374. fun() ->
  1375. ?assertError({badmatch, {error, eilseq}}, decode_header(<<"=?us-ascii?B?dGhpcyBjb250YWlucyBhIGNvcHlyaWdodCCpIHN5bWJvbA==?=">>, "utf-8")),
  1376. ?assertEqual(<<"this contains a copyright symbol">>, decode_header(<<"=?us-ascii?B?dGhpcyBjb250YWlucyBhIGNvcHlyaWdodCCpIHN5bWJvbA==?=">>, "utf-8//IGNORE")),
  1377. ?assertEqual(<<"this contains a copyright © symbol">>, decode_header(<<"=?iso-8859-1?B?dGhpcyBjb250YWlucyBhIGNvcHlyaWdodCCpIHN5bWJvbA==?=">>, "utf-8//IGNORE"))
  1378. end
  1379. },
  1380. {"multiple unicode email addresses",
  1381. fun() ->
  1382. ?assertEqual(<<"Jacek Złydach <jacek.zlydach@erlang-solutions.com>, chak de planet óóóó <jz@erlang-solutions.com>, Jacek Złydach <jacek.zlydach@erlang-solutions.com>, chak de planet óóóó <jz@erlang-solutions.com>">>, decode_header(<<"=?UTF-8?B?SmFjZWsgWsWCeWRhY2g=?= <jacek.zlydach@erlang-solutions.com>, =?UTF-8?B?Y2hhayBkZSBwbGFuZXQgw7PDs8Ozw7M=?= <jz@erlang-solutions.com>, =?UTF-8?B?SmFjZWsgWsWCeWRhY2g=?= <jacek.zlydach@erlang-solutions.com>, =?UTF-8?B?Y2hhayBkZSBwbGFuZXQgw7PDs8Ozw7M=?= <jz@erlang-solutions.com>">>, "utf-8"))
  1383. end
  1384. }
  1385. ].
  1386. encoding_test_() ->
  1387. [
  1388. {"Simple email",
  1389. fun() ->
  1390. Email = {<<"text">>, <<"plain">>, [
  1391. {<<"From">>, <<"me@example.com">>},
  1392. {<<"To">>, <<"you@example.com">>},
  1393. {<<"Subject">>, <<"This is a test">>},
  1394. {<<"Message-ID">>, <<"<abcd@example.com>">>},
  1395. {<<"MIME-Version">>, <<"1.0">>},
  1396. {<<"Date">>, <<"Sun, 01 Nov 2009 14:44:47 +0200">>}],
  1397. [{<<"content-type-params">>,
  1398. [{<<"charset">>,<<"US-ASCII">>}],
  1399. {<<"disposition">>,<<"inline">>}}],
  1400. <<"This is a plain message">>},
  1401. Result = <<"From: me@example.com\r\nTo: you@example.com\r\nSubject: This is a test\r\nMessage-ID: <abcd@example.com>\r\nMIME-Version: 1.0\r\nDate: Sun, 01 Nov 2009 14:44:47 +0200\r\n\r\nThis is a plain message">>,
  1402. ?assertEqual(Result, encode(Email))
  1403. end
  1404. },
  1405. {"multipart/alternative email",
  1406. fun() ->
  1407. Email = {<<"multipart">>, <<"alternative">>, [
  1408. {<<"From">>, <<"me@example.com">>},
  1409. {<<"To">>, <<"you@example.com">>},
  1410. {<<"Subject">>, <<"This is a test">>},
  1411. {<<"MIME-Version">>, <<"1.0">>},
  1412. {<<"Content-Type">>,
  1413. <<"multipart/alternative; boundary=wtf-123234234">>}],
  1414. [{<<"content-type-params">>,
  1415. [{<<"boundary">>, <<"wtf-123234234">>}]},
  1416. {<<"disposition">>,<<"inline">>},
  1417. {<<"disposition-params">>,[]}],
  1418. [{<<"text">>,<<"plain">>,
  1419. [{<<"Content-Type">>,
  1420. <<"text/plain;charset=US-ASCII;format=flowed">>},
  1421. {<<"Content-Transfer-Encoding">>,<<"7bit">>}],
  1422. [{<<"content-type-params">>,
  1423. [{<<"charset">>,<<"US-ASCII">>},
  1424. {<<"format">>,<<"flowed">>}]},
  1425. {<<"disposition">>,<<"inline">>},
  1426. {<<"disposition-params">>,[]}],
  1427. <<"This message contains rich text.">>},
  1428. {<<"text">>,<<"html">>,
  1429. [{<<"Content-Type">>,<<"text/html;charset=US-ASCII">>},
  1430. {<<"Content-Transfer-Encoding">>,<<"7bit">>}],
  1431. [{<<"content-type-params">>,
  1432. [{<<"charset">>,<<"US-ASCII">>}]},
  1433. {<<"disposition">>,<<"inline">>},
  1434. {<<"disposition-params">>,[]}],
  1435. <<"<html><body>This message also contains HTML</body></html>">>}]},
  1436. Result = decode(encode(Email)),
  1437. ?assertMatch({<<"multipart">>, <<"alternative">>, _, _, [{<<"text">>,
  1438. <<"plain">>, _, _, _}, {<<"text">>, <<"html">>, _, _, _}]},
  1439. Result)
  1440. end
  1441. },
  1442. {"multipart/alternative email with encoding",
  1443. fun() ->
  1444. Email = {<<"multipart">>, <<"alternative">>, [
  1445. {<<"From">>, <<"me@example.com">>},
  1446. {<<"To">>, <<"you@example.com">>},
  1447. {<<"Subject">>, <<"This is a test">>},
  1448. {<<"MIME-Version">>, <<"1.0">>},
  1449. {<<"Content-Type">>,
  1450. <<"multipart/alternative; boundary=wtf-123234234">>}],
  1451. [{<<"content-type-params">>,
  1452. [{<<"boundary">>, <<"wtf-123234234">>}]},
  1453. {<<"disposition">>,<<"inline">>},
  1454. {<<"disposition-params">>,[]}],
  1455. [{<<"text">>,<<"plain">>,
  1456. [{<<"Content-Type">>,
  1457. <<"text/plain;charset=US-ASCII;format=flowed">>},
  1458. {<<"Content-Transfer-Encoding">>,<<"quoted-printable">>}],
  1459. [{<<"content-type-params">>,
  1460. [{<<"charset">>,<<"US-ASCII">>},
  1461. {<<"format">>,<<"flowed">>}]},
  1462. {<<"disposition">>,<<"inline">>},
  1463. {<<"disposition-params">>,[]}],
  1464. <<"This message contains rich text.\r\n",
  1465. "and is =quoted printable= encoded!">>},
  1466. {<<"text">>,<<"html">>,
  1467. [{<<"Content-Type">>,<<"text/html;charset=US-ASCII">>},
  1468. {<<"Content-Transfer-Encoding">>,<<"base64">>}],
  1469. [{<<"content-type-params">>,
  1470. [{<<"charset">>,<<"US-ASCII">>}]},
  1471. {<<"disposition">>,<<"inline">>},
  1472. {<<"disposition-params">>,[]}],
  1473. <<"<html><body>This message also contains",
  1474. "HTML and is base64",
  1475. "encoded\r\n\r\n</body></html>">>}]},
  1476. Result = decode(encode(Email)),
  1477. ?assertMatch({<<"multipart">>, <<"alternative">>, _, _, [{<<"text">>,
  1478. <<"plain">>, _, _, <<"This message contains rich text.\r\n",
  1479. "and is =quoted printable= encoded!">>},
  1480. {<<"text">>, <<"html">>, _, _,
  1481. <<"<html><body>This message also contains",
  1482. "HTML and is base64",
  1483. "encoded\r\n\r\n</body></html>">>}]},
  1484. Result)
  1485. end
  1486. },
  1487. {"Missing headers should be added",
  1488. fun() ->
  1489. Email = {<<"text">>, <<"plain">>, [
  1490. {<<"From">>, <<"me@example.com">>},
  1491. {<<"To">>, <<"you@example.com">>},
  1492. {<<"Subject">>, <<"This is a test">>}],
  1493. [{<<"content-type-params">>,
  1494. [{<<"charset">>,<<"US-ASCII">>}],
  1495. {<<"disposition">>,<<"inline">>}}],
  1496. <<"This is a plain message">>},
  1497. Result = decode(encode(Email)),
  1498. ?assertNot(undefined == proplists:get_value(<<"Message-ID">>, element(3, Result))),
  1499. ?assertNot(undefined == proplists:get_value(<<"Date">>, element(3, Result))),
  1500. ?assertEqual(undefined, proplists:get_value(<<"References">>, element(3, Result)))
  1501. end
  1502. },
  1503. {"Reference header should be added in presence of In-Reply-To",
  1504. fun() ->
  1505. Email = {<<"text">>, <<"plain">>, [
  1506. {<<"From">>, <<"me@example.com">>},
  1507. {<<"To">>, <<"you@example.com">>},
  1508. {<<"In-Reply-To">>, <<"<abcd@example.com>">>},
  1509. {<<"Subject">>, <<"This is a test">>}],
  1510. [{<<"content-type-params">>,
  1511. [{<<"charset">>,<<"US-ASCII">>}],
  1512. {<<"disposition">>,<<"inline">>}}],
  1513. <<"This is a plain message">>},
  1514. Result = decode(encode(Email)),
  1515. ?assertEqual(<<"<abcd@example.com>">>, proplists:get_value(<<"References">>, element(3, Result)))
  1516. end
  1517. },
  1518. {"Reference header should be appended to in presence of In-Reply-To, if appropiate",
  1519. fun() ->
  1520. Email = {<<"text">>, <<"plain">>, [
  1521. {<<"From">>, <<"me@example.com">>},
  1522. {<<"To">>, <<"you@example.com">>},
  1523. {<<"In-Reply-To">>, <<"<abcd@example.com>">>},
  1524. {<<"References">>, <<"<wxyz@example.com>">>},
  1525. {<<"Subject">>, <<"This is a test">>}],
  1526. [{<<"content-type-params">>,
  1527. [{<<"charset">>,<<"US-ASCII">>}],
  1528. {<<"disposition">>,<<"inline">>}}],
  1529. <<"This is a plain message">>},
  1530. Result = decode(encode(Email)),
  1531. ?assertEqual(<<"<wxyz@example.com> <abcd@example.com>">>, proplists:get_value(<<"References">>, element(3, Result)))
  1532. end
  1533. },
  1534. {"Reference header should NOT be appended to in presence of In-Reply-To, if already present",
  1535. fun() ->
  1536. Email = {<<"text">>, <<"plain">>, [
  1537. {<<"From">>, <<"me@example.com">>},
  1538. {<<"To">>, <<"you@example.com">>},
  1539. {<<"In-Reply-To">>, <<"<abcd@example.com>">>},
  1540. {<<"References">>, <<"<wxyz@example.com> <abcd@example.com>">>},
  1541. {<<"Subject">>, <<"This is a test">>}],
  1542. [{<<"content-type-params">>,
  1543. [{<<"charset">>,<<"US-ASCII">>}],
  1544. {<<"disposition">>,<<"inline">>}}],
  1545. <<"This is a plain message">>},
  1546. Result = decode(encode(Email)),
  1547. ?assertEqual(<<"<wxyz@example.com> <abcd@example.com>">>, proplists:get_value(<<"References">>, element(3, Result)))
  1548. end
  1549. },
  1550. {"Content-Transfer-Encoding header should be added if missing and appropriate",
  1551. fun() ->
  1552. Email = {<<"text">>, <<"plain">>, [
  1553. {<<"From">>, <<"me@example.com">>},
  1554. {<<"To">>, <<"you@example.com">>},
  1555. {<<"Subject">>, <<"This is a test">>}],
  1556. [],
  1557. <<"This is a plain message with some non-ascii characters øÿ\r\nso there">>},
  1558. Encoded = encode(Email),
  1559. Result = decode(Encoded),
  1560. ?assertEqual(<<"quoted-printable">>, proplists:get_value(<<"Content-Transfer-Encoding">>, element(3, Result))),
  1561. Email2 = {<<"text">>, <<"plain">>, [
  1562. {<<"From">>, <<"me@example.com">>},
  1563. {<<"To">>, <<"you@example.com">>},
  1564. {<<"Subject">>, <<"This is a test">>}],
  1565. [],
  1566. <<"This is a plain message with no non-ascii characters">>},
  1567. Encoded2 = encode(Email2),
  1568. Result2 = decode(Encoded2),
  1569. ?assertEqual(undefined, proplists:get_value(<<"Content-Transfer-Encoding">>, element(3, Result2))),
  1570. Email3 = {<<"text">>, <<"plain">>, [
  1571. {<<"From">>, <<"me@example.com">>},
  1572. {<<"To">>, <<"you@example.com">>},
  1573. {<<"Subject">>, <<"This is a test">>}],
  1574. [{<<"transfer-encoding">>, <<"base64">>}],
  1575. <<"This is a plain message with no non-ascii characters">>},
  1576. Encoded3 = encode(Email3),
  1577. Result3 = decode(Encoded3),
  1578. ?assertEqual(<<"base64">>, proplists:get_value(<<"Content-Transfer-Encoding">>, element(3, Result3)))
  1579. end
  1580. },
  1581. {"Content-Type header should be added if missing and appropriate",
  1582. fun() ->
  1583. Email = {<<"text">>, <<"html">>, [
  1584. {<<"From">>, <<"me@example.com">>},
  1585. {<<"To">>, <<"you@example.com">>},
  1586. {<<"Subject">>, <<"This is a test">>}],
  1587. [],
  1588. <<"This is a HTML message with some non-ascii characters øÿ\r\nso there">>},
  1589. Encoded = encode(Email),
  1590. Result = decode(Encoded),
  1591. ?assertEqual(<<"quoted-printable">>, proplists:get_value(<<"Content-Transfer-Encoding">>, element(3, Result))),
  1592. ?assertMatch(<<"text/html;charset=utf-8">>, proplists:get_value(<<"Content-Type">>, element(3, Result))),
  1593. Email2 = {<<"text">>, <<"html">>, [
  1594. {<<"From">>, <<"me@example.com">>},
  1595. {<<"To">>, <<"you@example.com">>},
  1596. {<<"Subject">>, <<"This is a test">>}],
  1597. [],
  1598. <<"This is a HTML message with no non-ascii characters\r\nso there">>},
  1599. Encoded2 = encode(Email2),
  1600. Result2 = decode(Encoded2),
  1601. ?assertMatch(<<"text/html;charset=us-ascii">>, proplists:get_value(<<"Content-Type">>, element(3, Result2))),
  1602. Email3 = {<<"text">>, <<"html">>, [
  1603. {<<"From">>, <<"me@example.com">>},
  1604. {<<"To">>, <<"you@example.com">>},
  1605. {<<"Subject">>, <<"This is a test">>}],
  1606. [],
  1607. <<"This is a text message with some invisible non-ascii characters\r\nso there">>},
  1608. Encoded3 = encode(Email3),
  1609. Result3 = decode(Encoded3),
  1610. ?assertMatch(<<"text/html;charset=utf-8">>, proplists:get_value(<<"Content-Type">>, element(3, Result3)))
  1611. end
  1612. },
  1613. {"Content-Type header should be added for subparts too, if missing and appropriate",
  1614. fun() ->
  1615. Email4 = {<<"multipart">>, <<"alternative">>, [
  1616. {<<"From">>, <<"me@example.com">>},
  1617. {<<"To">>, <<"you@example.com">>},
  1618. {<<"Subject">>, <<"This is a test">>}],
  1619. [],
  1620. [{<<"text">>, <<"plain">>, [], [], <<"This is a multipart message with some invisible non-ascii characters\r\nso there">>}]},
  1621. Encoded4 = encode(Email4),
  1622. Result4 = decode(Encoded4),
  1623. ?assertMatch(<<"text/plain;charset=utf-8">>, proplists:get_value(<<"Content-Type">>, element(3, lists:nth(1,element(5, Result4)))))
  1624. end
  1625. },
  1626. {"Content-Type header should be not added for subparts if they're text/plain us-ascii",
  1627. fun() ->
  1628. Email4 = {<<"multipart">>, <<"alternative">>, [
  1629. {<<"From">>, <<"me@example.com">>},
  1630. {<<"To">>, <<"you@example.com">>},
  1631. {<<"Subject">>, <<"This is a test">>}],
  1632. [],
  1633. [{<<"text">>, <<"plain">>, [], [], <<"This is a multipart message with no non-ascii characters\r\nso there">>}]},
  1634. Encoded4 = encode(Email4),
  1635. Result4 = decode(Encoded4),
  1636. ?assertMatch(undefined, proplists:get_value(<<"Content-Type">>, element(3, lists:nth(1,element(5, Result4)))))
  1637. end
  1638. },
  1639. {"Content-Type header should be added for subparts if they're text/html us-ascii",
  1640. fun() ->
  1641. Email4 = {<<"multipart">>, <<"alternative">>, [
  1642. {<<"From">>, <<"me@example.com">>},
  1643. {<<"To">>, <<"you@example.com">>},
  1644. {<<"Subject">>, <<"This is a test">>}],
  1645. [],
  1646. [{<<"text">>, <<"html">>, [], [], <<"This is a multipart message with no non-ascii characters\r\nso there">>}]},
  1647. Encoded4 = encode(Email4),
  1648. Result4 = decode(Encoded4),
  1649. ?assertMatch(<<"text/html;charset=us-ascii">>, proplists:get_value(<<"Content-Type">>, element(3, lists:nth(1,element(5, Result4)))))
  1650. end
  1651. },
  1652. {"A boundary should be generated if applicable",
  1653. fun() ->
  1654. Email = {<<"multipart">>, <<"alternative">>, [
  1655. {<<"From">>, <<"me@example.com">>},
  1656. {<<"To">>, <<"you@example.com">>},
  1657. {<<"Subject">>, <<"This is a test">>}],
  1658. [],
  1659. [{<<"text">>,<<"plain">>,
  1660. [],
  1661. [],
  1662. <<"This message contains rich text.\r\n",
  1663. "and is =quoted printable= encoded!">>},
  1664. {<<"text">>,<<"html">>,
  1665. [],
  1666. [],
  1667. <<"<html><body>This message also contains",
  1668. "HTML and is base64",
  1669. "encoded\r\n\r\n</body></html>">>}]},
  1670. Encoded = encode(Email),
  1671. Result = decode(Encoded),
  1672. Boundary = proplists:get_value(<<"boundary">>, proplists:get_value(<<"content-type-params">>, element(4, Result))),
  1673. ?assert(is_binary(Boundary)),
  1674. % ensure we don't add the header multiple times
  1675. ?assertEqual(1, length(proplists:get_all_values(<<"Content-Type">>, element(3, Result)))),
  1676. % headers should be appended, not prepended
  1677. ?assertMatch({<<"From">>, _}, lists:nth(1, element(3, Result))),
  1678. ok
  1679. end
  1680. }
  1681. ].
  1682. roundtrip_test_() ->
  1683. [
  1684. {"roundtrip test for the gamut",
  1685. fun() ->
  1686. {ok, Email} = file:read_file("../testdata/the-gamut.eml"),
  1687. Decoded = decode(Email),
  1688. _Encoded = encode(Decoded),
  1689. %{ok, F1} = file:open("f1", [write]),
  1690. %{ok, F2} = file:open("f2", [write]),
  1691. %file:write(F1, Email),
  1692. %file:write(F2, Encoded),
  1693. %file:close(F1),
  1694. %file:close(F2),
  1695. ?assertEqual(Email, Email)
  1696. end
  1697. },
  1698. {"round trip plain text only email",
  1699. fun() ->
  1700. {ok, Email} = file:read_file("../testdata/Plain-text-only.eml"),
  1701. Decoded = decode(Email),
  1702. _Encoded = encode(Decoded),
  1703. %{ok, F1} = file:open("f1", [write]),
  1704. %{ok, F2} = file:open("f2", [write]),
  1705. %file:write(F1, Email),
  1706. %file:write(F2, Encoded),
  1707. %file:close(F1),
  1708. %file:close(F2),
  1709. ?assertEqual(Email, Email)
  1710. end
  1711. },
  1712. {"round trip quoted-printable email",
  1713. fun() ->
  1714. {ok, Email} = file:read_file("../testdata/testcase1"),
  1715. Decoded = decode(Email),
  1716. _Encoded = encode(Decoded),
  1717. %{ok, F1} = file:open("f1", [write]),
  1718. %{ok, F2} = file:open("f2", [write]),
  1719. %file:write(F1, Email),
  1720. %file:write(F2, Encoded),
  1721. %file:close(F1),
  1722. %file:close(F2),
  1723. ?assertEqual(Email, Email)
  1724. %ok
  1725. end
  1726. }
  1727. ].
  1728. -endif.