PageRenderTime 161ms CodeModel.GetById 16ms app.highlight 130ms RepoModel.GetById 1ms app.codeStats 0ms

/deps/gen_smtp/src/mimemail.erl

http://github.com/zotonic/zotonic
Erlang | 1818 lines | 1580 code | 90 blank | 148 comment | 46 complexity | 2bc63ce0c840d1e05c216b867ee1bcae MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1%%% Copyright 2009 Andrew Thompson <andrew@hijacked.us>. All rights reserved.
   2%%%
   3%%% Redistribution and use in source and binary forms, with or without
   4%%% modification, are permitted provided that the following conditions are met:
   5%%%
   6%%%   1. Redistributions of source code must retain the above copyright notice,
   7%%%      this list of conditions and the following disclaimer.
   8%%%   2. Redistributions in binary form must reproduce the above copyright
   9%%%      notice, this list of conditions and the following disclaimer in the
  10%%%      documentation and/or other materials provided with the distribution.
  11%%%
  12%%% THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY EXPRESS OR
  13%%% IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  14%%% MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  15%%% EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  16%%% INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17%%% (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18%%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19%%% ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20%%% (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21%%% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22
  23%% @doc A module for decoding/encoding MIME 1.0 email.
  24%% The encoder and decoder operate on the same datastructure, which is as follows:
  25%% A 5-tuple with the following elements: `{Type, SubType, Headers, Parameters, Body}'.
  26%%
  27%% `Type' and `SubType' are the MIME type of the email, examples are `text/plain' or
  28%% `multipart/alternative'. The decoder splits these into 2 fields so you can filter by
  29%% the main type or by the subtype.
  30%%
  31%% `Headers' consists of a list of key/value pairs of binary values eg.
  32%% `{<<"From">>, <<"Andrew Thompson <andrew@hijacked.us>">>}'. There is no parsing of
  33%% the header aside from un-wrapping the lines and splitting the header name from the
  34%% header value.
  35%%
  36%% `Parameters' is a list of 3 key/value tuples. The 3 keys are `<<"content-type-params">>',
  37%% `<<"dispisition">>' and `<<"disposition-params">>'.
  38%% `content-type-params' is a key/value list of parameters on the content-type header, this
  39%% usually consists of things like charset and the format parameters. `disposition' indicates
  40%% how the data wants to be displayed, this is usually 'inline'. `disposition-params' is a list of
  41%% disposition information, eg. the filename this section should be saved as, the modification
  42%% date the file should be saved with, etc.
  43%%
  44%% Finally, `Body' can be one of several different types, depending on the structure of the email.
  45%% For a simple email, the body will usually be a binary consisting of the message body, In the
  46%% case of a multipart email, its a list of these 5-tuple MIME structures. The third possibility,
  47%% in the case of a message/rfc822 attachment, body can be a single 5-tuple MIME structure.
  48%% 
  49%% You should see the relevant RFCs (2045, 2046, 2047, etc.) for more information.
  50-module(mimemail).
  51
  52-ifdef(TEST).
  53-include_lib("eunit/include/eunit.hrl").
  54-endif.
  55
  56-export([encode/1, decode/2, decode/1, get_header_value/2, get_header_value/3, parse_headers/1]).
  57
  58-define(DEFAULT_OPTIONS, [
  59		{encoding, get_default_encoding()}, % default encoding is utf-8 if we can find the iconv module
  60		{decode_attachments, true} % should we decode any base64/quoted printable attachments?
  61	]).
  62
  63-type(mimetuple() :: {binary(), binary(), [{binary(), binary()}], [{binary(), binary()}], binary() | [{binary(), binary(), [{binary(), binary()}], [{binary(), binary()}], binary() | [tuple()]}] | tuple()}).
  64
  65-type(options() :: [{'encoding', binary()} | {'decode_attachment', boolean()}]).
  66
  67-spec(decode/1 :: (Email :: binary()) -> mimetuple()).
  68%% @doc Decode a MIME email from a binary.
  69decode(All) ->
  70	{Headers, Body} = parse_headers(All),
  71	decode(Headers, Body, ?DEFAULT_OPTIONS).
  72
  73-spec(decode/2 :: (Email :: binary(), Options :: options()) -> mimetuple()).
  74%% @doc Decode with custom options
  75decode(All, Options) when is_binary(All), is_list(Options) ->
  76	{Headers, Body} = parse_headers(All),
  77	decode(Headers, Body, Options).
  78
  79decode(OrigHeaders, Body, Options) ->
  80	%io:format("headers: ~p~n", [Headers]),
  81	Encoding = proplists:get_value(encoding, Options, none),
  82	case whereis(iconv) of
  83		undefined when Encoding =/= none ->
  84			{ok, _Pid} = iconv:start();
  85		_ ->
  86			ok
  87	end,
  88
  89	%FixedHeaders = fix_headers(Headers),
  90	Headers = decode_headers(OrigHeaders, [], Encoding),
  91	case parse_with_comments(get_header_value(<<"MIME-Version">>, Headers)) of
  92		undefined ->
  93			case parse_content_type(get_header_value(<<"Content-Type">>, Headers)) of
  94				{<<"multipart">>, _SubType, _Parameters} ->
  95					erlang:error(non_mime_multipart);
  96				{Type, SubType, Parameters} ->
  97					NewBody = decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers),
  98						Body, proplists:get_value(<<"charset">>, Parameters), Encoding),
  99					{Type, SubType, Headers, Parameters, NewBody};
 100				undefined ->
 101					Parameters = [{<<"content-type-params">>, [{<<"charset">>, <<"us-ascii">>}]}, {<<"disposition">>, <<"inline">>}, {<<"disposition-params">>, []}],
 102					{<<"text">>, <<"plain">>, Headers, Parameters, decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers), Body)}
 103			end;
 104		Other ->
 105			decode_component(Headers, Body, Other, Options)
 106	end.
 107
 108-spec(encode/1 :: (MimeMail :: mimetuple()) -> binary()).
 109%% @doc Encode a MIME tuple to a binary.
 110encode({Type, Subtype, Headers, ContentTypeParams, Parts}) ->
 111	{FixedParams, FixedHeaders} = ensure_content_headers(Type, Subtype, ContentTypeParams, Headers, Parts, true),
 112	FixedHeaders2 = check_headers(FixedHeaders),
 113	list_to_binary([binstr:join(
 114				encode_headers(
 115					FixedHeaders2
 116					),
 117				"\r\n"),
 118			"\r\n\r\n",
 119		binstr:join(encode_component(Type, Subtype, FixedHeaders2, FixedParams, Parts),
 120			"\r\n")]);
 121encode(_) ->
 122	io:format("Not a mime-decoded DATA~n"),
 123	erlang:error(non_mime).
 124
 125
 126decode_headers(Headers, _, none) ->
 127	Headers;
 128decode_headers([], Acc, _Charset) ->
 129	lists:reverse(Acc);
 130decode_headers([{Key, Value} | Headers], Acc, Charset) ->
 131	decode_headers(Headers, [{Key, decode_header(Value, Charset)} | Acc], Charset).
 132
 133decode_header(Value, Charset) ->
 134	case re:run(Value, "=\\?([-A-Za-z0-9_]+)\\?([qQbB])\\?([^\s]+)\\?=", [ungreedy]) of
 135		nomatch ->
 136			Value;
 137		{match,[{AllStart, AllLen},{EncodingStart, EncodingLen},{TypeStart, _},{DataStart, DataLen}]} ->
 138			Encoding = binstr:substr(Value, EncodingStart+1, EncodingLen),
 139			Type = binstr:to_lower(binstr:substr(Value, TypeStart+1, 1)),
 140			Data = binstr:substr(Value, DataStart+1, DataLen),
 141
 142			CD = case iconv:open(Charset, fix_encoding(Encoding)) of
 143				{ok, Res} -> Res;
 144				{error, einval} -> throw({bad_charset, fix_encoding(Encoding)})
 145			end,
 146
 147			DecodedData = case Type of
 148				<<"q">> ->
 149					{ok, S} = iconv:conv(CD, decode_quoted_printable(re:replace(Data, "_", " ", [{return, binary}, global]))),
 150					S;
 151				<<"b">> ->
 152					{ok, S} = iconv:conv(CD, decode_base64(re:replace(Data, "_", " ", [{return, binary}, global]))),
 153					S
 154			end,
 155
 156			iconv:close(CD),
 157
 158
 159			Offset = case re:run(binstr:substr(Value, AllStart + AllLen + 1), "^([\s\t\n\r]+)=\\?[-A-Za-z0-9_]+\\?[^\s]\\?[^\s]+\\?=", [ungreedy]) of
 160				nomatch ->
 161					% no 2047 block immediately following
 162					1;
 163				{match,[{_, _},{_, WhiteSpaceLen}]} ->
 164					1+ WhiteSpaceLen
 165			end,
 166
 167
 168			NewValue = list_to_binary([binstr:substr(Value, 1, AllStart), DecodedData, binstr:substr(Value, AllStart + AllLen + Offset)]),
 169			decode_header(NewValue, Charset)
 170	end.
 171
 172
 173decode_component(Headers, Body, MimeVsn, Options) when MimeVsn =:= <<"1.0">> ->
 174	case parse_content_disposition(get_header_value(<<"Content-Disposition">>, Headers)) of
 175		{Disposition, DispositionParams} ->
 176			ok;
 177		_ -> % defaults
 178			Disposition = <<"inline">>,
 179			DispositionParams = []
 180	end,
 181
 182	case parse_content_type(get_header_value(<<"Content-Type">>, Headers)) of
 183		{<<"multipart">>, SubType, Parameters} ->
 184			case proplists:get_value(<<"boundary">>, Parameters) of
 185				undefined ->
 186					erlang:error(no_boundary);
 187				Boundary ->
 188					% io:format("this is a multipart email of type:  ~s and boundary ~s~n", [SubType, Boundary]),
 189					Parameters2 = [{<<"content-type-params">>, Parameters}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
 190					{<<"multipart">>, SubType, Headers, Parameters2, split_body_by_boundary(Body, list_to_binary(["--", Boundary]), MimeVsn, Options)}
 191			end;
 192		{<<"message">>, <<"rfc822">>, Parameters} ->
 193			{NewHeaders, NewBody} = parse_headers(Body),
 194			Parameters2 = [{<<"content-type-params">>, Parameters}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
 195			{<<"message">>, <<"rfc822">>, Headers, Parameters2, decode(NewHeaders, NewBody, Options)};
 196		{Type, SubType, Parameters} ->
 197			%io:format("body is ~s/~s~n", [Type, SubType]),
 198			Parameters2 = [{<<"content-type-params">>, Parameters}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
 199			{Type, SubType, Headers, Parameters2, decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers), Body, proplists:get_value(<<"charset">>, Parameters), proplists:get_value(encoding, Options, none))};
 200		undefined -> % defaults
 201			Type = <<"text">>,
 202			SubType = <<"plain">>,
 203			Parameters = [{<<"content-type-params">>, [{<<"charset">>, <<"us-ascii">>}]}, {<<"disposition">>, Disposition}, {<<"disposition-params">>, DispositionParams}],
 204			{Type, SubType, Headers, Parameters, decode_body(get_header_value(<<"Content-Transfer-Encoding">>, Headers), Body)}
 205	end;
 206decode_component(_Headers, _Body, Other, _Options) ->
 207	erlang:error({mime_version, Other}).
 208
 209-spec(get_header_value/3 :: (Needle :: binary(), Headers :: [{binary(), binary()}], Default :: any()) -> binary() | any()).
 210%% @doc Do a case-insensitive header lookup to return that header's value, or the specified default.
 211get_header_value(Needle, Headers, Default) ->
 212	%io:format("Headers: ~p~n", [Headers]),
 213	F =
 214	fun({Header, _Value}) ->
 215			binstr:to_lower(Header) =:= binstr:to_lower(Needle)
 216	end,
 217	case lists:filter(F, Headers) of
 218		% TODO if there's duplicate headers, should we use the first or the last?
 219		[{_Header, Value}|_T] ->
 220			Value;
 221		_ ->
 222			Default
 223	end.
 224
 225-spec(get_header_value/2 :: (Needle :: binary(), Headers :: [{binary(), binary()}]) -> binary() | 'undefined').
 226%% @doc Do a case-insensitive header lookup to return the header's value, or `undefined'.
 227get_header_value(Needle, Headers) ->
 228	get_header_value(Needle, Headers, undefined).
 229
 230-spec parse_with_comments(Value :: binary()) -> binary() | no_return();
 231	(Value :: atom()) -> atom().
 232parse_with_comments(Value) when is_binary(Value) ->
 233	parse_with_comments(Value, [], 0, false);
 234parse_with_comments(Value) ->
 235	Value.
 236
 237-spec parse_with_comments(Value :: binary(), Acc :: list(), Depth :: non_neg_integer(), Quotes :: boolean()) -> binary() | no_return().
 238parse_with_comments(<<>>, _Acc, _Depth, Quotes) when Quotes ->
 239	erlang:error(unterminated_quotes);
 240parse_with_comments(<<>>, _Acc, Depth, _Quotes) when Depth > 0 ->
 241	erlang:error(unterminated_comment);
 242parse_with_comments(<<>>, Acc, _Depth, _Quotes) ->
 243	binstr:strip(list_to_binary(lists:reverse(Acc)));
 244parse_with_comments(<<$\\, H, Tail/binary>>, Acc, Depth, Quotes) when Depth > 0, H > 32, H < 127 ->
 245	parse_with_comments(Tail, Acc, Depth, Quotes);
 246parse_with_comments(<<$\\, Tail/binary>>, Acc, Depth, Quotes) when Depth > 0 ->
 247	parse_with_comments(Tail, Acc, Depth, Quotes);
 248parse_with_comments(<<$\\, H, Tail/binary>>, Acc, Depth, Quotes) when H > 32, H < 127 ->
 249	parse_with_comments(Tail, [H | Acc], Depth, Quotes);
 250parse_with_comments(<<$\\, Tail/binary>>, Acc, Depth, Quotes) ->
 251	parse_with_comments(Tail, [$\\ | Acc], Depth, Quotes);
 252parse_with_comments(<<$(, Tail/binary>>, Acc, Depth, Quotes) when not Quotes ->
 253	parse_with_comments(Tail, Acc, Depth + 1, Quotes);
 254parse_with_comments(<<$), Tail/binary>>, Acc, Depth, Quotes) when Depth > 0, not Quotes ->
 255	parse_with_comments(Tail, Acc, Depth - 1, Quotes);
 256parse_with_comments(<<_, Tail/binary>>, Acc, Depth, Quotes) when Depth > 0 ->
 257	parse_with_comments(Tail, Acc, Depth, Quotes);
 258parse_with_comments(<<$", T/binary>>, Acc, Depth, true) -> %"
 259	parse_with_comments(T, Acc, Depth, false);
 260parse_with_comments(<<$", T/binary>>, Acc, Depth, false) -> %"
 261	parse_with_comments(T, Acc, Depth, true);
 262parse_with_comments(<<H, Tail/binary>>, Acc, Depth, Quotes) ->
 263	parse_with_comments(Tail, [H | Acc], Depth, Quotes).
 264
 265-spec(parse_content_type/1 :: (Value :: 'undefined') -> 'undefined';
 266	(Value :: binary()) -> {binary(), binary(), [{binary(), binary()}]}).
 267parse_content_type(undefined) ->
 268	undefined;
 269parse_content_type(String) ->
 270	try parse_content_disposition(String) of
 271		{RawType, Parameters} ->
 272			case binstr:strchr(RawType, $/) of
 273				Index when Index < 2 ->
 274					throw(bad_content_type);
 275				Index ->
 276					Type = binstr:substr(RawType, 1, Index - 1),
 277					SubType = binstr:substr(RawType, Index + 1),
 278					{binstr:to_lower(Type), binstr:to_lower(SubType), Parameters}
 279			end
 280		catch
 281			bad_disposition ->
 282				throw(bad_content_type)
 283	end.
 284
 285-spec(parse_content_disposition/1 :: (Value :: 'undefined') -> 'undefined';
 286	(String :: binary()) -> {binary(), [{binary(), binary()}]}).
 287parse_content_disposition(undefined) ->
 288	undefined;
 289parse_content_disposition(String) ->
 290	[Disposition | Parameters] = binstr:split(parse_with_comments(String), <<";">>),
 291	F =
 292	fun(X) ->
 293		Y = binstr:strip(binstr:strip(X), both, $\t),
 294		case binstr:strchr(Y, $=) of
 295			Index when Index < 2 ->
 296				throw(bad_disposition);
 297			Index ->
 298				Key = binstr:substr(Y, 1, Index - 1),
 299				Value = binstr:substr(Y, Index + 1),
 300				{binstr:to_lower(Key), Value}
 301		end
 302	end,
 303	Params = lists:map(F, Parameters),
 304	{binstr:to_lower(Disposition), Params}.
 305
 306split_body_by_boundary(Body, Boundary, MimeVsn, Options) ->
 307	% find the indices of the first and last boundary
 308	case [binstr:strpos(Body, Boundary), binstr:strpos(Body, list_to_binary([Boundary, "--"]))] of
 309		[0, _] ->
 310			erlang:error(missing_boundary);
 311		[_, 0] ->
 312			erlang:error(missing_last_boundary);
 313		[Start, End] ->
 314			NewBody = binstr:substr(Body, Start + byte_size(Boundary), End - Start),
 315			% from now on, we can be sure that each boundary is preceeded by a CRLF
 316			Parts = split_body_by_boundary_(NewBody, list_to_binary(["\r\n", Boundary]), []),
 317			[decode_component(Headers, Body2, MimeVsn, Options) || {Headers, Body2} <- [V || {_, Body3} = V <- Parts, byte_size(Body3) =/= 0]]
 318		end.
 319
 320split_body_by_boundary_(<<>>, _Boundary, Acc) ->
 321	lists:reverse(Acc);
 322split_body_by_boundary_(Body, Boundary, Acc) ->
 323	% trim the incomplete first line
 324	TrimmedBody = binstr:substr(Body, binstr:strpos(Body, "\r\n") + 2),
 325	case binstr:strpos(TrimmedBody, Boundary) of
 326		0 ->
 327			lists:reverse([{[], TrimmedBody} | Acc]);
 328		Index ->
 329			split_body_by_boundary_(binstr:substr(TrimmedBody, Index + byte_size(Boundary)), Boundary,
 330				[parse_headers(binstr:substr(TrimmedBody, 1, Index - 1)) | Acc])
 331	end.
 332
 333-spec(parse_headers/1 :: (Body :: binary()) -> {[{binary(), binary()}], binary()}).
 334%% @doc Parse the headers off of a message and return a list of headers and the trailing body.
 335parse_headers(Body) ->
 336	case binstr:strpos(Body, "\r\n") of
 337		0 ->
 338			{[], Body};
 339		1 ->
 340			{[], binstr:substr(Body, 3)};
 341		Index ->
 342			parse_headers(binstr:substr(Body, Index+2), binstr:substr(Body, 1, Index - 1), [])
 343	end.
 344
 345
 346parse_headers(Body, <<H, Tail/binary>>, []) when H =:= $\s; H =:= $\t ->
 347	% folded headers
 348	{[], list_to_binary([H, Tail, "\r\n", Body])};
 349parse_headers(Body, <<H, T/binary>>, Headers) when H =:= $\s; H =:= $\t ->
 350	% folded headers
 351	[{FieldName, OldFieldValue} | OtherHeaders] = Headers,
 352	FieldValue = list_to_binary([OldFieldValue, T]),
 353	%io:format("~p = ~p~n", [FieldName, FieldValue]),
 354	case binstr:strpos(Body, "\r\n") of
 355		0 ->
 356			{lists:reverse([{FieldName, FieldValue} | OtherHeaders]), Body};
 357		1 ->
 358			{lists:reverse([{FieldName, FieldValue} | OtherHeaders]), binstr:substr(Body, 3)};
 359		Index2 ->
 360			parse_headers(binstr:substr(Body, Index2 + 2), binstr:substr(Body, 1, Index2 - 1), [{FieldName, FieldValue} | OtherHeaders])
 361	end;
 362parse_headers(Body, Line, Headers) ->
 363	%io:format("line: ~p, nextpart ~p~n", [Line, binstr:substr(Body, 1, 10)]),
 364	case binstr:strchr(Line, $:) of
 365		0 ->
 366			{lists:reverse(Headers), list_to_binary([Line, "\r\n", Body])};
 367		Index ->
 368			FieldName = binstr:substr(Line, 1, Index - 1),
 369			F = fun(X) -> X > 32 andalso X < 127 end,
 370			case binstr:all(F, FieldName) of
 371				true ->
 372					F2 = fun(X) -> (X > 31 andalso X < 127) orelse X == 9 end,
 373					FValue = binstr:strip(binstr:substr(Line, Index+1)),
 374					FieldValue = case binstr:all(F2, FValue) of
 375						true ->
 376							FValue;
 377						_ ->
 378							% I couldn't figure out how to use a pure binary comprehension here :(
 379							list_to_binary([ filter_non_ascii(C) || <<C:8>> <= FValue])
 380					end,
 381					case binstr:strpos(Body, "\r\n") of
 382						0 ->
 383							{lists:reverse([{FieldName, FieldValue} | Headers]), Body};
 384						1 ->
 385							{lists:reverse([{FieldName, FieldValue} | Headers]), binstr:substr(Body, 3)};
 386						Index2 ->
 387							parse_headers(binstr:substr(Body, Index2 + 2), binstr:substr(Body, 1, Index2 - 1), [{FieldName, FieldValue} | Headers])
 388					end;
 389				false ->
 390					{lists:reverse(Headers), list_to_binary([Line, "\r\n", Body])}
 391			end
 392	end.
 393
 394filter_non_ascii(C) when (C > 31 andalso C < 127); C == 9 ->
 395	<<C>>;
 396filter_non_ascii(_C) ->
 397	<<"?">>.
 398
 399decode_body(Type, Body, _InEncoding, none) ->
 400	decode_body(Type, << <<X/integer>> || <<X>> <= Body, X < 128 >>);
 401decode_body(Type, Body, undefined, _OutEncoding) ->
 402	decode_body(Type, << <<X/integer>> || <<X>> <= Body, X < 128 >>);
 403decode_body(Type, Body, InEncoding, OutEncoding) ->
 404	NewBody = decode_body(Type, Body),
 405	CD = case iconv:open(OutEncoding, fix_encoding(InEncoding)) of
 406		{ok, Res} -> Res;
 407		{error, einval} -> throw({bad_charset, fix_encoding(InEncoding)})
 408	end,
 409	{ok, Result} = try iconv:conv_chunked(CD, NewBody) of
 410		{ok, _} = Res2 -> Res2
 411	catch
 412		_:_ ->
 413			iconv:conv(CD, NewBody)
 414	end,
 415	iconv:close(CD),
 416	Result.
 417
 418-spec(decode_body/2 :: (Type :: binary() | 'undefined', Body :: binary()) -> binary()).
 419decode_body(undefined, Body) ->
 420	Body;
 421decode_body(Type, Body) ->
 422	case binstr:to_lower(Type) of
 423		<<"quoted-printable">> ->
 424			decode_quoted_printable(Body);
 425		<<"base64">> ->
 426			decode_base64(Body);
 427		_Other ->
 428			Body
 429	end.
 430
 431decode_base64(Body) ->
 432	base64:mime_decode(Body).
 433
 434decode_quoted_printable(Body) ->
 435	case binstr:strpos(Body, "\r\n") of
 436		0 ->
 437			decode_quoted_printable(Body, <<>>, []);
 438		Index ->
 439			decode_quoted_printable(binstr:substr(Body, 1, Index +1), binstr:substr(Body, Index + 2), [])
 440	end.
 441
 442decode_quoted_printable(<<>>, <<>>, Acc) ->
 443	list_to_binary(lists:reverse(Acc));
 444decode_quoted_printable(Line, Rest, Acc) ->
 445	case binstr:strpos(Rest, "\r\n") of
 446		0 ->
 447			decode_quoted_printable(Rest, <<>>, [decode_quoted_printable_line(Line, []) | Acc]);
 448		Index ->
 449			%io:format("next line ~p~nnext rest ~p~n", [binstr:substr(Rest, 1, Index +1), binstr:substr(Rest, Index + 2)]),
 450			decode_quoted_printable(binstr:substr(Rest, 1, Index +1), binstr:substr(Rest, Index + 2),
 451				[decode_quoted_printable_line(Line, []) | Acc])
 452	end.
 453
 454decode_quoted_printable_line(<<>>, Acc) ->
 455	lists:reverse(Acc);
 456decode_quoted_printable_line(<<$\r, $\n>>, Acc) ->
 457	lists:reverse(["\r\n" | Acc]);
 458decode_quoted_printable_line(<<$=, C, T/binary>>, Acc) when C =:= $\s; C =:= $\t ->
 459	case binstr:all(fun(X) -> X =:= $\s orelse X =:= $\t end, T) of
 460		true ->
 461			lists:reverse(Acc);
 462		false ->
 463			throw(badchar)
 464	end;
 465decode_quoted_printable_line(<<$=, $\r, $\n>>, Acc) ->
 466	lists:reverse(Acc);
 467decode_quoted_printable_line(<<$=, A:2/binary, T/binary>>, Acc) ->
 468	%<<X:1/binary, Y:1/binary>> = A,
 469	case binstr:all(fun(C) -> (C >= $0 andalso C =< $9) orelse (C >= $A andalso C =< $F) orelse (C >= $a andalso C =< $f) end, A) of
 470		true ->
 471			{ok, [C | []], []} = io_lib:fread("~16u", binary_to_list(A)),
 472			decode_quoted_printable_line(T, [C | Acc]);
 473		false ->
 474			throw(badchar)
 475	end;
 476decode_quoted_printable_line(<<$=>>, Acc) ->
 477	% soft newline
 478	lists:reverse(Acc);
 479decode_quoted_printable_line(<<H, T/binary>>, Acc) when H >= $!, H =< $< ->
 480	decode_quoted_printable_line(T, [H | Acc]);
 481decode_quoted_printable_line(<<H, T/binary>>, Acc) when H >= $>, H =< $~ ->
 482	decode_quoted_printable_line(T, [H | Acc]);
 483decode_quoted_printable_line(<<H, T/binary>>, Acc) when H =:= $\s; H =:= $\t ->
 484	% if the rest of the line is whitespace, truncate it
 485	case binstr:all(fun(X) -> X =:= $\s orelse X =:= $\t end, T) of
 486		true ->
 487			lists:reverse(Acc);
 488		false ->
 489			decode_quoted_printable_line(T, [H | Acc])
 490	end;
 491decode_quoted_printable_line(<<H, T/binary>>, Acc) ->
 492	decode_quoted_printable_line(T, [H| Acc]).
 493
 494check_headers(Headers) ->
 495	Checked = [<<"MIME-Version">>, <<"Date">>, <<"From">>, <<"Message-ID">>, <<"References">>, <<"Subject">>],
 496	check_headers(Checked, lists:reverse(Headers)).
 497
 498check_headers([], Headers) ->
 499	lists:reverse(Headers);
 500check_headers([Header | Tail], Headers) ->
 501	case get_header_value(Header, Headers) of
 502		undefined when Header == <<"MIME-Version">> ->
 503			check_headers(Tail, [{<<"MIME-Version">>, <<"1.0">>} | Headers]);
 504		undefined when Header == <<"Date">> ->
 505			check_headers(Tail, [{<<"Date">>, list_to_binary(smtp_util:rfc5322_timestamp())} | Headers]);
 506		undefined when Header == <<"From">> ->
 507			erlang:error(missing_from);
 508		undefined when Header == <<"Message-ID">> ->
 509			check_headers(Tail, [{<<"Message-ID">>, list_to_binary(smtp_util:generate_message_id())} | Headers]);
 510		undefined when Header == <<"References">> ->
 511			case get_header_value(<<"In-Reply-To">>, Headers) of
 512				undefined ->
 513					check_headers(Tail, Headers); % ok, whatever
 514				ReplyID ->
 515					check_headers(Tail, [{<<"References">>, ReplyID} | Headers])
 516			end;
 517		References when Header == <<"References">> ->
 518			% check if the in-reply-to header, if present, is in references
 519			case get_header_value(<<"In-Reply-To">>, Headers) of
 520				undefined ->
 521					check_headers(Tail, Headers); % ok, whatever
 522				ReplyID ->
 523					case binstr:strpos(binstr:to_lower(References), binstr:to_lower(ReplyID)) of
 524						0 ->
 525							% okay, tack on the reply-to to the end of References
 526							check_headers(Tail, [{<<"References">>, list_to_binary([References, " ", ReplyID])} | proplists:delete(<<"References">>, Headers)]);
 527						_Index ->
 528							check_headers(Tail, Headers) % nothing to do
 529					end
 530				end;
 531		_ ->
 532			check_headers(Tail, Headers)
 533	end.
 534
 535ensure_content_headers(Type, SubType, Parameters, Headers, Body, Toplevel) ->
 536	CheckHeaders = [<<"Content-Type">>, <<"Content-Disposition">>, <<"Content-Transfer-Encoding">>],
 537	ensure_content_headers(CheckHeaders, Type, SubType, Parameters, lists:reverse(Headers), Body, Toplevel).
 538
 539ensure_content_headers([], _, _, Parameters, Headers, _, _) ->
 540	{Parameters, lists:reverse(Headers)};
 541ensure_content_headers([Header | Tail], Type, SubType, Parameters, Headers, Body, Toplevel) ->
 542	case get_header_value(Header, Headers) of
 543		undefined when Header == <<"Content-Type">>, ((Type == <<"text">> andalso SubType =/= <<"plain">>) orelse Type =/= <<"text">>) ->
 544			% no content-type header, and its not text/plain
 545			CT = io_lib:format("~s/~s", [Type, SubType]),
 546			CTP = case Type of
 547				<<"multipart">> ->
 548					Boundary = case proplists:get_value(<<"boundary">>, proplists:get_value(<<"content-type-params">>, Parameters, [])) of
 549						undefined ->
 550							list_to_binary(smtp_util:generate_message_boundary());
 551						B ->
 552							B
 553					end,
 554					[{<<"boundary">>, Boundary} | proplists:delete(<<"boundary">>, proplists:get_value(<<"content-type-params">>, Parameters, []))];
 555				<<"text">> ->
 556					Charset = case proplists:get_value(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, [])) of
 557						undefined ->
 558							guess_charset(Body);
 559						C ->
 560							C
 561					end,
 562					[{<<"charset">>, Charset} | proplists:delete(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, []))];
 563				_ ->
 564					proplists:get_value(<<"content-type-params">>, Parameters, [])
 565			end,
 566
 567			%CTP = proplists:get_value(<<"content-type-params">>, Parameters, [guess_charset(Body)]),
 568			CTH = binstr:join([CT | encode_parameters(CTP)], ";"),
 569			NewParameters = [{<<"content-type-params">>, CTP} | proplists:delete(<<"content-type-params">>, Parameters)],
 570			ensure_content_headers(Tail, Type, SubType, NewParameters, [{<<"Content-Type">>, CTH} | Headers], Body, Toplevel);
 571		undefined when Header == <<"Content-Type">> ->
 572			% no content-type header and its text/plain
 573			Charset = case proplists:get_value(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, [])) of
 574				undefined ->
 575					guess_charset(Body);
 576				C ->
 577					C
 578			end,
 579			case Charset of
 580				<<"us-ascii">> ->
 581					% the default
 582					ensure_content_headers(Tail, Type, SubType, Parameters, Headers, Body, Toplevel);
 583				_ ->
 584					CTP = [{<<"charset">>, Charset} | proplists:delete(<<"charset">>, proplists:get_value(<<"content-type-params">>, Parameters, []))],
 585					CTH = binstr:join([<<"text/plain">> | encode_parameters(CTP)], ";"),
 586					NewParameters = [{<<"content-type-params">>, CTP} | proplists:delete(<<"content-type-params">>, Parameters)],
 587					ensure_content_headers(Tail, Type, SubType, NewParameters, [{<<"Content-Type">>, CTH} | Headers], Body, Toplevel)
 588			end;
 589		undefined when Header == <<"Content-Transfer-Encoding">>, Type =/= <<"multipart">> ->
 590			Enc = case proplists:get_value(<<"transfer-encoding">>, Parameters) of
 591				undefined ->
 592					guess_best_encoding(Body);
 593				Value ->
 594					Value
 595			end,
 596			case Enc of
 597				<<"7bit">> ->
 598					ensure_content_headers(Tail, Type, SubType, Parameters, Headers, Body, Toplevel);
 599				_ ->
 600					ensure_content_headers(Tail, Type, SubType, Parameters, [{<<"Content-Transfer-Encoding">>, Enc} | Headers], Body, Toplevel)
 601			end;
 602		undefined when Header == <<"Content-Disposition">>, Toplevel == false ->
 603			CD = proplists:get_value(<<"disposition">>, Parameters, <<"inline">>),
 604			CDP = proplists:get_value(<<"disposition-params">>, Parameters, []),
 605			CDH = binstr:join([CD | encode_parameters(CDP)], ";"),
 606			ensure_content_headers(Tail, Type, SubType, Parameters, [{<<"Content-Disposition">>, CDH} | Headers], Body, Toplevel);
 607		_ ->
 608			ensure_content_headers(Tail, Type, SubType, Parameters, Headers, Body, Toplevel)
 609	end.
 610
 611guess_charset(Body) ->
 612	case binstr:all(fun(X) -> X < 128 end, Body) of
 613		true -> <<"us-ascii">>;
 614		false -> <<"utf-8">>
 615	end.
 616
 617guess_best_encoding(<<Body:200/binary, Rest/binary>>) when Rest =/= <<>> ->
 618	guess_best_encoding(Body);
 619guess_best_encoding(Body) ->
 620	Size = byte_size(Body),
 621	% get only the allowed ascii characters
 622	% TODO - this might not be the complete list
 623	FilteredSize = length([X || <<X>> <= Body, ((X > 31 andalso X < 127) orelse X == $\r orelse X == $\n)]),
 624
 625	Percent = round((FilteredSize / Size) * 100),
 626
 627	%based on the % of printable characters, choose an encoding
 628	if
 629		Percent == 100 ->
 630			<<"7bit">>;
 631		Percent > 80 ->
 632			<<"quoted-printable">>;
 633		true ->
 634			<<"base64">>
 635	end.
 636
 637encode_parameters([[]]) ->
 638	[];
 639encode_parameters(Parameters) ->
 640	[encode_parameter(Parameter) || Parameter <- Parameters].
 641
 642encode_parameter({X, Y}) ->
 643	case escape_tspecial(Y, false, <<>>) of
 644		{true, Special} -> [X, $=, $", Special, $"];
 645		false -> [X, $=, Y]
 646	end.
 647
 648% See also: http://www.ietf.org/rfc/rfc2045.txt section 5.1
 649escape_tspecial(<<>>, false, _Acc) ->
 650	false;
 651escape_tspecial(<<>>, IsSpecial, Acc) ->
 652	{IsSpecial, Acc};
 653escape_tspecial(<<C, Rest/binary>>, _IsSpecial, Acc) when C =:= $" ->
 654	escape_tspecial(Rest, true, <<Acc/binary, $\\, $">>);
 655escape_tspecial(<<C, Rest/binary>>, _IsSpecial, Acc) when C =:= $\\ ->
 656	escape_tspecial(Rest, true, <<Acc/binary, $\\, $\\>>);
 657escape_tspecial(<<C, Rest/binary>>, _IsSpecial, Acc)
 658	when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
 659		C =:= $,; C =:= $;; C =:= $:; C =:= $/; C =:= $[;
 660		C =:= $]; C =:= $?; C =:= $=; C =:= $\s ->
 661	escape_tspecial(Rest, true, <<Acc/binary, C>>);
 662escape_tspecial(<<C, Rest/binary>>, IsSpecial, Acc) ->
 663	escape_tspecial(Rest, IsSpecial, <<Acc/binary, C>>).
 664
 665encode_headers(Headers) ->
 666	encode_headers(Headers, []).
 667
 668encode_headers([], EncodedHeaders) ->
 669	EncodedHeaders;
 670encode_headers([{Key, Value}|T] = _Headers, EncodedHeaders) ->
 671	encode_headers(T, encode_folded_header(list_to_binary([Key,": ",Value]),
 672			EncodedHeaders)).
 673
 674encode_folded_header(Header, HeaderLines) ->
 675	case binstr:strchr(Header, $;) of
 676		0 ->
 677			HeaderLines ++ [Header];
 678		Index ->
 679			Remainder = binstr:substr(Header, Index+1),
 680			TabbedRemainder = case Remainder of
 681				<<$\t,_Rest/binary>> ->
 682					Remainder;
 683				_ ->
 684					list_to_binary(["\t", Remainder])
 685			end,
 686			% TODO - not tail recursive
 687			HeaderLines ++ [ binstr:substr(Header, 1, Index) ] ++
 688				encode_folded_header(TabbedRemainder, [])
 689	end.
 690
 691encode_component(_Type, _SubType, Headers, Params, Body) ->
 692	if
 693		is_list(Body) -> % is this a multipart component?
 694			Boundary = proplists:get_value(<<"boundary">>, proplists:get_value(<<"content-type-params">>, Params)),
 695			[<<>>] ++  % blank line before start of component
 696			lists:flatmap(
 697				fun(Part) ->
 698						[list_to_binary([<<"--">>, Boundary])] ++ % start with the boundary
 699						encode_component_part(Part)
 700				end,
 701				Body
 702			) ++ [list_to_binary([<<"--">>, Boundary, <<"--">>])] % final boundary (with /--$/)
 703			  ++ [<<>>]; % blank line at the end of the multipart component
 704		true -> % or an inline component?
 705			%encode_component_part({Type, SubType, Headers, Params, Body})
 706			encode_body(
 707					get_header_value(<<"Content-Transfer-Encoding">>, Headers),
 708					[Body]
 709			 )
 710	end.
 711
 712encode_component_part(Part) ->
 713	case Part of
 714		{<<"multipart">>, SubType, Headers, PartParams, Body} ->
 715			{FixedParams, FixedHeaders} = ensure_content_headers(<<"multipart">>, SubType, PartParams, Headers, Body, false),
 716			encode_headers(FixedHeaders) ++ [<<>>] ++
 717			encode_component(<<"multipart">>, SubType, FixedHeaders, FixedParams, Body);
 718		{Type, SubType, Headers, PartParams, Body} ->
 719			PartData = case Body of
 720				{_,_,_,_,_} -> encode_component_part(Body);
 721				String      -> [String]
 722			end,
 723			{_FixedParams, FixedHeaders} = ensure_content_headers(Type, SubType, PartParams, Headers, Body, false),
 724			encode_headers(FixedHeaders) ++ [<<>>] ++
 725			encode_body(
 726					get_header_value(<<"Content-Transfer-Encoding">>, FixedHeaders),
 727					PartData
 728			 );
 729		_ ->
 730			io:format("encode_component_part couldn't match Part to: ~p~n", [Part]),
 731			[]
 732	end.
 733
 734encode_body(undefined, Body) ->
 735	Body;
 736encode_body(Type, Body) ->
 737	case binstr:to_lower(Type) of
 738		<<"quoted-printable">> ->
 739			[InnerBody] = Body,
 740			encode_quoted_printable(InnerBody);
 741		<<"base64">> ->
 742			[InnerBody] = Body,
 743			wrap_to_76(base64:encode(InnerBody));
 744		_ -> Body
 745	end.
 746
 747wrap_to_76(String) ->
 748	[wrap_to_76(String, [])].
 749
 750wrap_to_76(<<>>, Acc) ->
 751	list_to_binary(lists:reverse(Acc));
 752wrap_to_76(<<Head:76/binary, Tail/binary>>, Acc) ->
 753	wrap_to_76(Tail, [<<"\r\n">>, Head | Acc]);
 754wrap_to_76(Head, Acc) ->
 755	list_to_binary(lists:reverse([<<"\r\n">>, Head | Acc])).
 756
 757encode_quoted_printable(Body) ->
 758	[encode_quoted_printable(Body, [], 0)].
 759
 760encode_quoted_printable(Body, Acc, L) when L >= 75 ->
 761	LastLine = case string:str(Acc, "\n") of
 762		0 ->
 763			Acc;
 764		Index ->
 765			string:substr(Acc, 1, Index-1)
 766	end,
 767	%Len = length(LastLine),
 768	case string:str(LastLine, " ") of
 769		0 when L =:= 75 ->
 770			% uh-oh, no convienient whitespace, just cram a soft newline in
 771			encode_quoted_printable(Body, [$\n, $\r, $= | Acc], 0);
 772		1 when L =:= 75 ->
 773			% whitespace is the last character we wrote
 774			encode_quoted_printable(Body, [$\n, $\r, $= | Acc], 0);
 775		SIndex when (L - 75) < SIndex ->
 776			% okay, we can safely stick some whitespace in
 777			Prefix = string:substr(Acc, 1, SIndex-1),
 778			Suffix = string:substr(Acc, SIndex),
 779			NewAcc = lists:concat([Prefix, "\n\r=", Suffix]),
 780			encode_quoted_printable(Body, NewAcc, 0);
 781		_ ->
 782			% worst case, we're over 75 characters on the line
 783			% and there's no obvious break points, just stick one
 784			% in at position 75 and call it good. However, we have
 785			% to be very careful not to stick the soft newline in
 786			% the middle of an existing quoted-printable escape.
 787
 788			% TODO - fix this to be less stupid
 789			I = 3, % assume we're at most 3 over our cutoff
 790			Prefix = string:substr(Acc, 1, I),
 791			Suffix = string:substr(Acc, I+1),
 792			NewAcc = lists:concat([Prefix, "\n\r=", Suffix]),
 793			encode_quoted_printable(Body, NewAcc, 0)
 794	end;
 795encode_quoted_printable(<<>>, Acc, _L) ->
 796	list_to_binary(lists:reverse(Acc));
 797encode_quoted_printable(<<$=, T/binary>> , Acc, L) ->
 798	encode_quoted_printable(T, [$D, $3, $= | Acc], L+3);
 799encode_quoted_printable(<<$\r, $\n, T/binary>> , Acc, _L) ->
 800	encode_quoted_printable(T, [$\n, $\r | Acc], 0);
 801encode_quoted_printable(<<H, T/binary>>, Acc, L) when H >= $!, H =< $< ->
 802	encode_quoted_printable(T, [H | Acc], L+1);
 803encode_quoted_printable(<<H, T/binary>>, Acc, L) when H >= $>, H =< $~ ->
 804	encode_quoted_printable(T, [H | Acc], L+1);
 805encode_quoted_printable(<<H, $\r, $\n, T/binary>>, Acc, _L) when H == $\s; H == $\t ->
 806	[[A, B]] = io_lib:format("~2.16.0B", [H]),
 807	encode_quoted_printable(T, [$\n, $\r, B, A, $= | Acc], 0);
 808encode_quoted_printable(<<H, T/binary>>, Acc, L) when H == $\s; H == $\t ->
 809	encode_quoted_printable(T, [H | Acc], L+1);
 810encode_quoted_printable(<<H, T/binary>>, Acc, L) ->
 811	[[A, B]] = io_lib:format("~2.16.0B", [H]),
 812	encode_quoted_printable(T, [B, A, $= | Acc], L+3).
 813
 814get_default_encoding() ->
 815	case code:ensure_loaded(iconv) of
 816		{error, _} ->
 817			none;
 818		{module, iconv} ->
 819			<<"utf-8//IGNORE">>
 820	end.
 821
 822% convert some common invalid character names into the correct ones
 823fix_encoding(Encoding) when Encoding == <<"utf8">>; Encoding == <<"UTF8">> ->
 824	<<"UTF-8">>;
 825fix_encoding(Encoding) ->
 826	Encoding.
 827
 828-ifdef(TEST).
 829
 830parse_with_comments_test_() ->
 831	[
 832		{"bleh",
 833			fun() ->
 834					?assertEqual(<<"1.0">>, parse_with_comments(<<"1.0">>)),
 835					?assertEqual(<<"1.0">>, parse_with_comments(<<"1.0  (produced by MetaSend Vx.x)">>)),
 836					?assertEqual(<<"1.0">>, parse_with_comments(<<"(produced by MetaSend Vx.x) 1.0">>)),
 837					?assertEqual(<<"1.0">>, parse_with_comments(<<"1.(produced by MetaSend Vx.x)0">>))
 838			end
 839		},
 840		{"comments that parse as empty",
 841			fun() ->
 842					?assertEqual(<<>>, parse_with_comments(<<"(comment (nested (deeply)) (and (oh no!) again))">>)),
 843					?assertEqual(<<>>, parse_with_comments(<<"(\\)\\\\)">>)),
 844					?assertEqual(<<>>, parse_with_comments(<<"(by way of Whatever <redir@my.org>)    (generated by Eudora)">>))
 845			end
 846		},
 847		{"some more",
 848			fun() ->
 849					?assertEqual(<<":sysmail@  group. org, Muhammed. Ali @Vegas.WBA">>, parse_with_comments(<<"\":sysmail\"@  group. org, Muhammed.(the greatest) Ali @(the)Vegas.WBA">>)),
 850					?assertEqual(<<"Pete <pete@silly.test>">>, parse_with_comments(<<"Pete(A wonderful \\) chap) <pete(his account)@silly.test(his host)>">>))
 851			end
 852		},
 853		{"non list values",
 854			fun() ->
 855					?assertEqual(undefined, parse_with_comments(undefined)),
 856					?assertEqual(17, parse_with_comments(17))
 857			end
 858		},
 859		{"Parens within quotes ignored",
 860			fun() ->
 861				?assertEqual(<<"Height (from xkcd).eml">>, parse_with_comments(<<"\"Height (from xkcd).eml\"">>)),
 862				?assertEqual(<<"Height (from xkcd).eml">>, parse_with_comments(<<"\"Height \(from xkcd\).eml\"">>))
 863			end
 864		},
 865		{"Escaped quotes are handled correctly",
 866			fun() ->
 867					?assertEqual(<<"Hello \"world\"">>, parse_with_comments(<<"Hello \\\"world\\\"">>)),
 868					?assertEqual(<<"<boss@nil.test>, Giant; \"Big\" Box <sysservices@example.net>">>, parse_with_comments(<<"<boss@nil.test>, \"Giant; \\\"Big\\\" Box\" <sysservices@example.net>">>))
 869			end
 870		},
 871		{"backslash not part of a quoted pair",
 872			fun() ->
 873					?assertEqual(<<"AC \\ DC">>, parse_with_comments(<<"AC \\ DC">>)),
 874					?assertEqual(<<"AC  DC">>, parse_with_comments(<<"AC ( \\ ) DC">>))
 875			end
 876		},
 877		{"Unterminated quotes or comments",
 878			fun() ->
 879					?assertError(unterminated_quotes, parse_with_comments(<<"\"Hello there ">>)),
 880					?assertError(unterminated_quotes, parse_with_comments(<<"\"Hello there \\\"">>)),
 881					?assertError(unterminated_comment, parse_with_comments(<<"(Hello there ">>)),
 882					?assertError(unterminated_comment, parse_with_comments(<<"(Hello there \\\)">>))
 883			end
 884		}
 885	].
 886	
 887parse_content_type_test_() ->
 888	[
 889		{"parsing content types",
 890			fun() ->
 891					?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"text/plain; charset=us-ascii (Plain text)">>)),
 892					?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"text/plain; charset=\"us-ascii\"">>)),
 893					?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"Text/Plain; Charset=\"us-ascii\"">>)),
 894					?assertEqual({<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"----_=_NextPart_001_01C9DCAE.1F2CB390">>}]},
 895						parse_content_type(<<"multipart/mixed; boundary=\"----_=_NextPart_001_01C9DCAE.1F2CB390\"">>))
 896			end
 897		},
 898		{"parsing content type with a tab in it",
 899			fun() ->
 900					?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_type(<<"text/plain;\tcharset=us-ascii">>)),
 901					?assertEqual({<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}, {<<"foo">>, <<"bar">>}]}, parse_content_type(<<"text/plain;\tcharset=us-ascii;\tfoo=bar">>))
 902			end
 903		},
 904		{"invalid content types",
 905			fun() ->
 906					?assertThrow(bad_content_type, parse_content_type(<<"text\\plain; charset=us-ascii">>)),
 907					?assertThrow(bad_content_type, parse_content_type(<<"text/plain; charset us-ascii">>))
 908				end
 909			}
 910	].
 911
 912parse_content_disposition_test_() ->
 913	[
 914		{"parsing valid dispositions",
 915			fun() ->
 916					?assertEqual({<<"inline">>, []}, parse_content_disposition(<<"inline">>)),
 917					?assertEqual({<<"inline">>, []}, parse_content_disposition(<<"inline;">>)),
 918					?assertEqual({<<"attachment">>, [{<<"filename">>, <<"genome.jpeg">>}, {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}]}, parse_content_disposition(<<"attachment; filename=genome.jpeg;modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>)),
 919					?assertEqual({<<"text/plain">>, [{<<"charset">>, <<"us-ascii">>}]}, parse_content_disposition(<<"text/plain; charset=us-ascii (Plain text)">>))
 920			end
 921		},
 922		{"invalid dispositions",
 923			fun() ->
 924					?assertThrow(bad_disposition, parse_content_disposition(<<"inline; =bar">>)),
 925					?assertThrow(bad_disposition, parse_content_disposition(<<"inline; bar">>))
 926			end
 927		}
 928	].
 929
 930various_parsing_test_() ->
 931	[
 932		{"split_body_by_boundary test",
 933			fun() ->
 934					?assertEqual([{[], <<"foo bar baz">>}], split_body_by_boundary_(<<"stuff\r\nfoo bar baz">>, <<"--bleh">>, [])),
 935					?assertEqual([{[], <<"foo\r\n">>}, {[], <<>>}, {[], <<>>}, {[], <<"bar baz">>}], split_body_by_boundary_(<<"stuff\r\nfoo\r\n--bleh\r\n--bleh\r\n--bleh-- stuff\r\nbar baz">>, <<"--bleh">>, [])),
 936					%?assertEqual([{[], []}, {[], []}, {[], "bar baz"}], split_body_by_boundary_("\r\n--bleh\r\n--bleh\r\n", "--bleh", [])),
 937					%?assertMatch([{"text", "plain", [], _,"foo\r\n"}], split_body_by_boundary("stuff\r\nfoo\r\n--bleh\r\n--bleh\r\n--bleh-- stuff\r\nbar baz", "--bleh", "1.0"))
 938					?assertEqual({[], <<"foo: bar\r\n">>}, parse_headers(<<"\r\nfoo: bar\r\n">>)),
 939					?assertEqual({[{<<"foo">>, <<"barbaz">>}], <<>>}, parse_headers(<<"foo: bar\r\n baz\r\n">>)),
 940					?assertEqual({[], <<" foo bar baz\r\nbam">>}, parse_headers(<<"\sfoo bar baz\r\nbam">>)),
 941					ok
 942			end
 943		},
 944		{"Headers with non-ASCII characters",
 945			fun() ->
 946					?assertEqual({[{<<"foo">>, <<"bar ?? baz">>}], <<>>}, parse_headers(<<"foo: bar ø baz\r\n">>)),
 947					?assertEqual({[], <<"bär: bar baz\r\n">>}, parse_headers(<<"bär: bar baz\r\n">>))
 948			end
 949		},
 950		{"Headers with tab characters",
 951			fun() ->
 952					?assertEqual({[{<<"foo">>, <<"bar		baz">>}], <<>>}, parse_headers(<<"foo: bar		baz\r\n">>))
 953			end
 954		}
 955
 956	].
 957
 958-define(IMAGE_MD5, <<110,130,37,247,39,149,224,61,114,198,227,138,113,4,198,60>>).
 959
 960parse_example_mails_test_() ->
 961	Getmail = fun(File) ->
 962		{ok, Email} = file:read_file(string:concat("../testdata/", File)),
 963		%Email = binary_to_list(Bin),
 964		decode(Email)
 965	end,
 966	[
 967		{"parse a plain text email",
 968			fun() ->
 969				Decoded = Getmail("Plain-text-only.eml"),
 970				?assertEqual(5, tuple_size(Decoded)),
 971				{Type, SubType, _Headers, _Properties, Body} = Decoded,
 972				?assertEqual({<<"text">>, <<"plain">>}, {Type, SubType}),
 973				?assertEqual(<<"This message contains only plain text.\r\n">>, Body)
 974			end
 975		},
 976		{"parse a plain text email with no content type",
 977			fun() ->
 978				Decoded = Getmail("Plain-text-only-no-content-type.eml"),
 979				?assertEqual(5, tuple_size(Decoded)),
 980				{Type, SubType, _Headers, _Properties, Body} = Decoded,
 981				?assertEqual({<<"text">>, <<"plain">>}, {Type, SubType}),
 982				?assertEqual(<<"This message contains only plain text.\r\n">>, Body)
 983			end
 984		},
 985		{"parse a plain text email with no MIME header",
 986			fun() ->
 987				{Type, SubType, _Headers, _Properties, Body} =
 988					Getmail("Plain-text-only-no-MIME.eml"),
 989				?assertEqual({<<"text">>, <<"plain">>}, {Type, SubType}),
 990				?assertEqual(<<"This message contains only plain text.\r\n">>, Body)
 991			end
 992		},
 993		{"parse an email that says it is multipart but contains no boundaries",
 994			fun() ->
 995					?assertError(missing_boundary, Getmail("Plain-text-only-with-boundary-header.eml"))
 996			end
 997		},
 998		{"parse a multipart email with no MIME header",
 999			fun() ->
1000					?assertError(non_mime_multipart, Getmail("rich-text-no-MIME.eml"))
1001			end
1002		},
1003		{"rich text",
1004			fun() ->
1005				%% pardon my naming here.  apparently 'rich text' in mac mail
1006				%% means 'html'.
1007				Decoded = Getmail("rich-text.eml"),
1008				?assertEqual(5, tuple_size(Decoded)),
1009				{Type, SubType, _Headers, _Properties, Body} = Decoded,
1010				?assertEqual({<<"multipart">>, <<"alternative">>}, {Type, SubType}),
1011				?assertEqual(2, length(Body)),
1012				[Plain, Html] = Body,
1013				?assertEqual({5, 5}, {tuple_size(Plain), tuple_size(Html)}),
1014				?assertMatch({<<"text">>, <<"plain">>, _, _, <<"This message contains rich text.">>}, Plain),
1015				?assertMatch({<<"text">>, <<"html">>, _, _, <<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This </b><i>message </i><span class=\"Apple-style-span\" style=\"text-decoration: underline;\">contains </span>rich text.</body></html>">>}, Html)
1016			end
1017		},
1018		{"rich text no boundary",
1019			fun() ->
1020				?assertError(no_boundary, Getmail("rich-text-no-boundary.eml"))
1021			end
1022		},
1023		{"rich text missing first boundary",
1024			fun() ->
1025				% TODO - should we handle this more elegantly?
1026				Decoded = Getmail("rich-text-missing-first-boundary.eml"),
1027				?assertEqual(5, tuple_size(Decoded)),
1028				{Type, SubType, _Headers, _Properties, Body} = Decoded,
1029				?assertEqual({<<"multipart">>, <<"alternative">>}, {Type, SubType}),
1030				?assertEqual(1, length(Body)),
1031				[Html] = Body,
1032				?assertEqual(5, tuple_size(Html)),
1033				?assertMatch({<<"text">>, <<"html">>, _, _, <<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This </b><i>message </i><span class=\"Apple-style-span\" style=\"text-decoration: underline;\">contains </span>rich text.</body></html>">>}, Html)
1034			end
1035		},
1036		{"rich text missing last boundary",
1037			fun() ->
1038				?assertError(missing_last_boundary, Getmail("rich-text-missing-last-boundary.eml"))
1039			end
1040		},
1041		{"rich text wrong last boundary",
1042			fun() ->
1043				?assertError(missing_last_boundary, Getmail("rich-text-broken-last-boundary.eml"))
1044			end
1045		},
1046		{"rich text missing text content type",
1047			fun() ->
1048				%% pardon my naming here.  apparently 'rich text' in mac mail
1049				%% means 'html'.
1050				Decoded = Getmail("rich-text-no-text-contenttype.eml"),
1051				?assertEqual(5, tuple_size(Decoded)),
1052				{Type, SubType, _Headers, _Properties, Body} = Decoded,
1053				?assertEqual({<<"multipart">>, <<"alternative">>}, {Type, SubType}),
1054				?assertEqual(2, length(Body)),
1055				[Plain, Html] = Body,
1056				?assertEqual({5, 5}, {tuple_size(Plain), tuple_size(Html)}),
1057				?assertMatch({<<"text">>, <<"plain">>, _, _, <<"This message contains rich text.">>}, Plain),
1058				?assertMatch({<<"text">>, <<"html">>, _, _, <<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This </b><i>message </i><span class=\"Apple-style-span\" style=\"text-decoration: underline;\">contains </span>rich text.</body></html>">>}, Html)
1059			end
1060		},
1061		{"text attachment only",
1062			fun() ->
1063				Decoded = Getmail("text-attachment-only.eml"),
1064				?assertEqual(5, tuple_size(Decoded)),
1065				{Type, SubType, _Headers, _Properties, Body} = Decoded,
1066				?assertEqual({<<"multipart">>, <<"mixed">>}, {Type, SubType}),
1067				?assertEqual(1, length(Body)),
1068				Rich = <<"{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf460\r\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\r\n{\\colortbl;\\red255\\green255\\blue255;}\r\n\\margl1440\\margr1440\\vieww9000\\viewh8400\\viewkind0\r\n\\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\ql\\qnatural\\pardirnatural\r\n\r\n\\f0\\fs24 \\cf0 This is a basic rtf file.}">>,
1069				?assertMatch([{<<"text">>, <<"rtf">>, _, _, Rich}], Body)
1070			end
1071		},
1072		{"image attachment only",
1073			fun() ->
1074				Decoded = Getmail("image-attachment-only.eml"),
1075				?assertEqual(5, tuple_size(Decoded)),
1076				{Type, SubType, _Headers, _Properties, Body} = Decoded,
1077				?assertEqual({<<"multipart">>, <<"mixed">>}, {Type, SubType}),
1078				?assertEqual(1, length(Body)),
1079				?assertMatch([{<<"image">>, <<"jpeg">>, _, _, _}], Body),
1080				[H | _] = Body,
1081				[{<<"image">>, <<"jpeg">>, _, Parameters, _Image}] = Body,
1082				?assertEqual(?IMAGE_MD5, erlang:md5(element(5, H))),
1083				?assertEqual(<<"inline">>, proplists:get_value(<<"disposition">>, Parameters)),
1084				?assertEqual(<<"chili-pepper.jpg">>, proplists:get_value(<<"filename">>, proplists:get_value(<<"disposition-params">>, Parameters))),
1085				?assertEqual(<<"chili-pepper.jpg">>, proplists:get_value(<<"name">>, proplists:get_value(<<"content-type-params">>, Parameters)))
1086			end
1087		},
1088		{"message attachment only",
1089			fun() ->
1090				Decoded = Getmail("message-as-attachment.eml"),
1091				?assertMatch({<<"multipart">>, <<"mixed">>, _, _, _}, Decoded),
1092				[Body] = element(5, Decoded),
1093				?assertMatch({<<"message">>, <<"rfc822">>, _, _, _}, Body),
1094				Subbody = element(5, Body),
1095				?assertMatch({<<"text">>, <<"plain">>, _, _, _}, Subbody),
1096				?assertEqual(<<"This message contains only plain text.\r\n">>, element(5, Subbody))
1097			end
1098		},
1099		{"message, image, and rtf attachments.",
1100			fun() ->
1101				Decoded = Getmail("message-image-text-attachments.eml"),
1102				?assertMatch({<<"multipart">>, <<"mixed">>, _, _, _}, Decoded),
1103				?assertEqual(3, length(element(5, Decoded))),
1104				[Message, Rtf, Image] = element(5, Decoded),
1105				?assertMatch({<<"message">>, <<"rfc822">>, _, _, _}, Message),
1106				Submessage = element(5, Message),
1107				?assertMatch({<<"text">>, <<"plain">>, _, _, <<"This message contains only plain text.\r\n">>}, Submessage),
1108				
1109				?assertMatch({<<"text">>, <<"rtf">>, _, _, _}, Rtf),
1110				?assertEqual(<<"{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf460\r\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\r\n{\\colortbl;\\red255\\green255\\blue255;}\r\n\\margl1440\\margr1440\\vieww9000\\viewh8400\\viewkind0\r\n\\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\ql\\qnatural\\pardirnatural\r\n\r\n\\f0\\fs24 \\cf0 This is a basic rtf file.}">>, element(5, Rtf)),
1111				
1112				?assertMatch({<<"image">>, <<"jpeg">>, _, _, _}, Image),
1113				?assertEqual(?IMAGE_MD5, erlang:md5(element(5, Image)))				
1114			end
1115		},
1116		{"Outlook 2007 with leading tabs in quoted-printable.",
1117			fun() ->
1118				Decoded = Getmail("outlook-2007.eml"),
1119				?assertMatch({<<"multipart">>, <<"alternative">>, _, _, _}, Decoded)
1120			end
1121		},
1122		{"The gamut",
1123			fun() ->
1124				% multipart/alternative
1125				%	text/plain
1126				%	multipart/mixed
1127				%		text/html
1128				%		message/rf822
1129				%			multipart/mixed
1130				%				message/rfc822
1131				%					text/plain
1132				%		text/html
1133				%		message/rtc822
1134				%			text/plain
1135				%		text/html
1136				%		image/jpeg
1137				%		text/html
1138				%		text/rtf
1139				%		text/html
1140				Decoded = Getmail("the-gamut.eml"),
1141				?assertMatch({<<"multipart">>, <<"alternative">>, _, _, _}, Decoded),
1142				?assertEqual(2, length(element(5, Decoded))),
1143				[Toptext, Topmultipart] = element(5, Decoded),
1144				?assertMatch({<<"text">>, <<"plain">>, _, _, _}, Toptext),
1145				?assertEqual(<<"This is rich text.\r\n\r\nThe list is html.\r\n\r\nAttchments:\r\nan email containing an attachment of an email.\r\nan email of only plain text.\r\nan image\r\nan rtf file.\r\n">>, element(5, Toptext)),
1146				?assertEqual(9, length(element(5, Topmultipart))),
1147				[Html, Messagewithin, _Brhtml, _Message, _Brhtml, Image, _Brhtml, Rtf, _Brhtml] = element(5, Topmultipart),
1148				?assertMatch({<<"text">>, <<"html">>, _, _, _}, Html),
1149				?assertEqual(<<"<html><body style=\"word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; \"><b>This</b> is <i>rich</i> text.<div><br></div><div>The list is html.</div><div><br></div><div>Attchments:</div><div><ul class=\"MailOutline\"><li>an email containing an attachment of an email.</li><li>an email of only plain text.</li><li>an image</li><li>an rtf file.</li></ul></div><div></div></body></html>">>, element(5, Html)),
1150				
1151				?assertMatch({<<"message">>, <<"rfc822">>, _, _, _}, Messagewithin),
1152				%?assertEqual(1, length(element(5, Messagewithin))),
1153				?assertMatch({<<"multipart">>, <<"mixed">>, _, _, [{<<"message">>, <<"rfc822">>, _, _, {<<"text">>, <<"plain">>, _, _, <<"This mess…

Large files files are truncated, but you can click here to view the full file