/src/markdown.erl
Erlang | 397 lines | 299 code | 24 blank | 74 comment | 1 complexity | a40d72353bd9962a6b499c148617e202 MD5 | raw file
1%% Copyright (c) 2009 Will Larson <lethain@gmail.com> 2%% <insert MIT License here> 3%% @todo support for horizontal rule 4%% @todo support for secondary title syntax "Title\n=====" 5%% @todo support for multi-level indentation 6-module(markdown). 7-author("Will Larson <lethain@gmail.com>"). 8-version("0.0.2"). 9-export([markdown/1]). 10-export([line_start/5, single_line/5]). 11-export([trim_whitespace/2, preserve_line/5, start_of_next_line/1, starts_with_number/1, remove_top_tag/3]). 12-export([identify_line_type/1]). 13-export([toggle_tag/3, exclusive_insert_tag/3]). 14-export([parse_link/2, parse_link_text/2, parse_link_remainder/2]). 15 16-define(DEBUG_LOGGER, fun(_X,_Y) -> ok end). 17%-define(DEBUG_LOGGER, fun(X,Y) -> io:format(X,Y) end). 18 19%% 20%% Primary Interface 21%% 22 23markdown(Text) when is_list(Text) -> 24 markdown(list_to_binary(Text)); 25markdown(Binary) when is_binary(Binary) -> 26 line_start(Binary, [], [], [], []). 27 28 29%% 30%% Multi-line Entities 31%% 32 33identify_line_type(<<"">>) -> {empty_line, <<"">>}; 34identify_line_type(<<"\n", Binary/binary>>) -> {empty_line, Binary}; 35%identify_line_type(<<"- -", Binary/binary>>) -> {hr, Binary}; 36%identify_line_type(<<"--", Binary/binary>>) -> {hr, Binary}; 37identify_line_type(<<"- ", Binary/binary>>) -> {ul, Binary}; 38identify_line_type(<<" - ", Binary/binary>>) -> {deep_ul, Binary}; 39identify_line_type(<<"* ", Binary/binary>>) -> {ul, Binary}; 40identify_line_type(<<" * ", Binary/binary>>) -> {deep_ul, Binary}; 41identify_line_type(<<">> ", Binary/binary>>) -> {blockquote, Binary}; 42identify_line_type(<<"> ", Binary/binary>>) -> {blockquote, Binary}; 43identify_line_type(<<" ", Binary/binary>>) -> 44 case starts_with_number(Binary) of 45 {true, Binary2} -> 46 {deep_ol, Binary2}; 47 false -> 48 {pre, Binary} 49 end; 50identify_line_type(<<Binary/binary>>) -> 51 case starts_with_number(Binary) of 52 {true, Binary2} -> 53 {ol, Binary2}; 54 false -> 55 {p, Binary} 56 end. 57 58%% Manages closing multi-line entities. 59line_start(<<Binary/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 60 ?DEBUG_LOGGER("line_start: ~p~n",[Binary]), 61 % calculate the expected indentation depth based on current stack 62 IndentDepth0 = lists:foldr(fun(Elem, Depth) -> 63 case Elem of 64 <<"ol">> -> Depth + 1; 65 <<"ul">> -> Depth + 1; 66 _ -> Depth 67 end end, 0, MultiContext), 68 IndentDepth = erlang:max(IndentDepth0-1,0), 69 % trim whitespace based on indent depth, 4 spaces per indentation depth 70 % restrict trimming to avoid capturing pre blocks and signifigant whitespace 71 % from within pre blocks 72 {Binary2, Offset} = trim_whitespace(Binary, 4*IndentDepth), 73 ?DEBUG_LOGGER("~p => ~p, IndentDepth = ~p, offset ~p~n", [Binary, Binary2, IndentDepth, Offset]), 74 % close an appropriate number of ol/ul blocks when 75 % the amount of trimmed whitespace is inadequate for 76 % the current indentation depth 77 CloseDepthBy = IndentDepth - trunc(Offset/4), 78 {MultiContext2, Acc3} = lists:foldr(fun(_, {Tags, Acc0}) -> 79 {Tags2, Acc2} = remove_top_tag([<<"li">>], Tags, Acc0), 80 remove_top_tag([<<"ul">>, <<"ol">>], Tags2, Acc2) 81 end, {MultiContext, Acc}, lists:seq(1,CloseDepthBy)), 82 {Type, Binary3} = identify_line_type(Binary2), 83 ?DEBUG_LOGGER("type (~p) and stack (~p) for ~p~n", [Type, MultiContext2, Binary]), 84 {MultiContext3, Acc4} = case {Type, MultiContext2} of 85 {empty_line, [<<"p">> | RestTags]} -> 86 {RestTags, [<<"</p>">> | Acc3]}; 87 {empty_line, [<<"pre">> | RestTags]} -> 88 {RestTags, [<<"</pre>">> | Acc3]}; 89 {empty_line, [<<"blockquote">> | RestTags]} -> 90 {RestTags, [<<"</blockquote>">> | Acc3]}; 91 {empty_line, [<<"li">>, <<"ol">> | RestTags]} -> 92 {RestTags, [<<"</ol>">>, <<"</li>">> | Acc3]}; 93 {empty_line, [<<"li">>, <<"ul">> | RestTags]} -> 94 {RestTags, [<<"</ul>">>, <<"</li>">> | Acc3]}; 95 {p, []} -> 96 {[<<"p">>], [<<"<p>">> | Acc3]}; 97 {p, [<<"p">> | RestTags]} -> 98 {[<<"p">> | RestTags], [<<" ">> | Acc3]}; 99 {p, [<<"li">> | RestTags]} -> 100 {[<<"li">> | RestTags], [<<" ">> | Acc3]}; 101 {p, [Tag | RestTags]} -> 102 case lists:member(Tag, [<<"pre">>, <<"blockquote">>]) of 103 true -> 104 {[<<"p">> | RestTags], [<<"<p>">>,<<"</",Tag/binary,">">> | Acc3]}; 105 false -> 106 {[Tag | RestTags], Acc3} 107 end; 108 {pre, []} -> 109 {[<<"pre">>], [<<"<pre>">> | Acc3]}; 110 {pre, [<<"pre">> | RestTags]} -> 111 {[<<"pre">> | RestTags], [<<"\n">> | Acc3]}; 112 {pre, [Tag | RestTags]} -> 113 case lists:member(Tag, [<<"p">>, <<"blockquote">>]) of 114 true -> 115 {[<<"pre">> | RestTags], [<<"<pre>">>,<<"</",Tag/binary,">">> | Acc3]}; 116 false -> 117 {[Tag | RestTags], Acc3} 118 end; 119 {blockquote, []} -> 120 {[<<"blockquote">>], [<<"<blockquote>">> | Acc3]}; 121 {blockquote, [<<"blockquote">> | RestTags]} -> 122 {[<<"blockquote">> | RestTags], [<<" ">> | Acc3]}; 123 {blockquote, [Tag | RestTags]} -> 124 case lists:member(Tag, [<<"pre">>, <<"p">>]) of 125 true -> 126 {[<<"blockquote">> | RestTags], [<<"<blockquote>">>,<<"</",Tag/binary,">">> | Acc3]}; 127 false -> 128 {[Tag | RestTags], Acc3} 129 end; 130 {deep_ul, [<<"li">> | RestTags]} -> 131 {[ <<"li">>, <<"ul">>, <<"li">> | RestTags], [<<"<li>">>, <<"<ul>">> | Acc3]}; 132 {deep_ol, [<<"li">> | RestTags]} -> 133 {[ <<"li">>, <<"ol">>, <<"li">> | RestTags], [<<"<li>">>, <<"<ol>">> | Acc3]}; 134 {ol, [<<"li">> | RestTags]} -> 135 {[<<"li">> | RestTags], [<<"<li>">>, <<"</li>">> | Acc3]}; 136 {ol, RestTags} -> 137 {[<<"li">>, <<"ol">> | RestTags], [<<"<li>">>, <<"<ol>">> | Acc3]}; 138 {ul, [<<"li">> | RestTags]} -> 139 {[<<"li">> | RestTags], [<<"<li>">>, <<"</li>">> | Acc3]}; 140 {ul, RestTags} -> 141 {[<<"li">>, <<"ul">> | RestTags], [<<"<li>">>, <<"<ul>">> | Acc3]}; 142 _ -> 143 {MultiContext2, Acc3} 144 end, 145 case {Type, Binary3} of 146 {empty_line, <<"">>} -> 147 single_line(Binary3, OpenTags, Acc4, LinkContext, MultiContext3); 148 {empty_line, _} -> 149 line_start(Binary3, OpenTags, Acc4, LinkContext, MultiContext3); 150 _ -> 151 single_line(Binary3, OpenTags, Acc4, LinkContext, MultiContext3) 152 end. 153 154%% 155%% Single Line Entities (headers, em, strong, links, code) 156%% 157 158%% Wrapup function, called at end of document. 159single_line(<<"">>, OpenTags, Acc, _LinkContext, MultiContext) -> 160 Open = lists:reverse(lists:append([OpenTags, MultiContext])), 161 ?DEBUG_LOGGER("remaining_tags: ~p~n", [Open]), 162 ClosedTags = lists:foldr(fun(Tag, Acc2) -> 163 [<<"</",Tag/binary,">">> | Acc2] 164 end, Acc, Open), 165 % markdown is gathered in reverse order 166 Reversed = lists:reverse(ClosedTags), 167 %list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, Reversed))); 168 lists:foldr(fun(X,Acc2) -> <<Acc2/binary, X/binary>> end, <<"">>, Reversed); 169 170 171%% Pass control to multi-line entity handler when 172%% encountering new-line. 173single_line(<<" \n", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 174 line_start(Rest, OpenTags, [<<"<br>">> | Acc], LinkContext, MultiContext); 175single_line(<<"\n", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 176 line_start(Rest, OpenTags, Acc, LinkContext, MultiContext); 177single_line(<<"#####", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 178 {OpenTags2, Acc2} = exclusive_insert_tag(<<"h5">>, OpenTags, Acc), 179 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 180single_line(<<"####", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 181 {OpenTags2, Acc2} = exclusive_insert_tag(<<"h4">>, OpenTags, Acc), 182 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 183single_line(<<"###", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 184 {OpenTags2, Acc2} = exclusive_insert_tag(<<"h3">>, OpenTags, Acc), 185 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 186single_line(<<"##", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 187 {OpenTags2, Acc2} = exclusive_insert_tag(<<"h2">>, OpenTags, Acc), 188 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 189single_line(<<"#", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 190 {OpenTags2, Acc2} = exclusive_insert_tag(<<"h1">>, OpenTags, Acc), 191 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 192single_line(<<"**", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 193 {OpenTags2, Acc2} = toggle_tag(<<"strong">>, OpenTags, Acc), 194 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 195single_line(<<"*", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 196 {OpenTags2, Acc2} = toggle_tag(<<"em">>, OpenTags, Acc), 197 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 198single_line(<<"``", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 199 {OpenTags2, Acc2} = toggle_tag(<<"code">>, OpenTags, Acc), 200 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 201single_line(<<"`", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 202 {OpenTags2, Acc2} = toggle_tag(<<"code">>, OpenTags, Acc), 203 single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext); 204single_line(<<"![", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 205 case parse_link(<<"[", Rest/binary>>, LinkContext) of 206 {link, Rest2, Href, Text, []} -> 207 Img = <<"<img src=\"", Href/binary, "\" alt=\"", Text/binary, "\">">>, 208 single_line(Rest2, OpenTags, [Img | Acc], LinkContext, MultiContext); 209 {link, Rest2, Href, Text, Title} -> 210 Img = <<"<img src=\"", Href/binary, "\" alt=\"", Text/binary, "\" title=\"", Title/binary, "\">">>, 211 single_line(Rest2, OpenTags, [Img | Acc], LinkContext, MultiContext) 212 end; 213single_line(<<"[", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 214 case parse_link(<<"[" , Rest/binary>>, LinkContext) of 215 {link, Rest2, Href, Text, <<"">>} -> 216 Link = <<"<a href=\"", Href/binary, "\">", Text/binary, "</a>">>, 217 single_line(Rest2, OpenTags, [Link | Acc], LinkContext, MultiContext); 218 {link, Rest2, Href, Text, Title} -> 219 Link = <<"<a href=\"", Href/binary, "\" title=\"", Title/binary, "\">", Text/binary, "</a>">>, 220 single_line(Rest2, OpenTags, [Link | Acc], LinkContext, MultiContext); 221 {context, Rest2, LinkContext2} -> 222 single_line(Rest2, OpenTags, Acc, LinkContext2, MultiContext) 223 end; 224single_line(<<B:1/binary, Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 225 single_line(Rest, OpenTags, [B | Acc], LinkContext, MultiContext). 226 227%% 228%% Utility functions (parsing links, managing tags, etc) 229%% 230 231 232%% @doc sub-parser for handling links. 233%% Handles formats: 234%% [This is a test](http://test.com/ "The title") 235%% [This is a test][test] 236%% 237%% For the second format, you'll need to have previously 238%% specified the link using the format 239%% [test]: http://test.com/ "Test Title" 240%% 241%% @spec parse_link() -> link_components() | new_context() 242%% new_context = {context, binary(), proplist()} 243%% link_components = {link, binary(), href(), text(), title()} 244%% href = string() 245%% text = string() 246%% title = string() | undefined 247%% proplist = [{binary(), binary()}] 248parse_link(Binary, LinkContext) -> 249 {Binary2, Text} = parse_link_text(Binary,[]), 250 case Binary2 of 251 <<"(", Binary3/binary>> -> 252 {Binary4, Link, Title} = parse_link_remainder(Binary3, <<")">>), 253 {link, Binary4, Link, Text, Title}; 254 <<"[",Binary3/binary>> -> 255 {Binary4, Reference} = parse_link_text(<<"[",Binary3/binary>>, []), 256 case proplists:get_value(Reference, LinkContext) of 257 {Link, Title} -> 258 {link, Binary4, Link, Text, Title}; 259 _ -> 260 {syntax_error, reference_to_undeclared_link_definition} 261 end; 262 <<":",Binary3/binary>> -> 263 {Binary4, Link, Title} = parse_link_remainder(Binary3, <<"\n">>), 264 {context, Binary4, [{Text, {Link, Title}} | LinkContext]}; 265 <<_:1/binary, _Binary3/binary>> -> 266 {syntax_error, expected_paren_bracket_or_colon} 267 end. 268 269%% @doc parse the text portion of a link. 270%% For example, parse "test" from [test][this]. 271parse_link_text(<<"\n", _Binary/binary>>, _Acc) -> 272 {syntax_error, unexpected_newline_in_link}; 273parse_link_text(<<"[",Binary/binary>>, Acc) -> 274 parse_link_text(Binary, Acc); 275parse_link_text(<<"]",Binary/binary>>, Acc) -> 276 Reversed = lists:reverse(Acc), 277 Text = lists:append(lists:map(fun(X) -> binary_to_list(X) end, Reversed)), 278 {Binary, list_to_binary(Text)}; 279parse_link_text(<<Char:1/binary, Binary/binary>>, Acc) -> 280 parse_link_text(Binary, [Char | Acc]). 281 282%% @doc Parse the link and title out of Markdown link remainder. 283%% 'http:/test/ "this"' has link of "http:/test/" and title of "this" 284%% 285%% Example: 286%% {Binary4, Link, Title} = parse_link_remainder(Binary3, <<")">>), 287parse_link_remainder(<<Binary/binary>>, <<EndChar:1/binary>>) -> 288 parse_link_remainder(Binary, EndChar, [], [], link). 289parse_link_remainder(<<EndChar:1/binary>>, EndChar, LinkAcc, TitleAcc, _) -> 290 Link = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(LinkAcc)))), 291 Title = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(TitleAcc)))), 292 {<<"">>, Link, Title}; 293parse_link_remainder(<<EndChar:1/binary, Binary/binary>>, EndChar, LinkAcc, TitleAcc, _) -> 294 Link = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(LinkAcc)))), 295 Title = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(TitleAcc)))), 296 {Binary, Link, Title}; 297parse_link_remainder(<<"\"", Binary/binary>>, EndChar, LinkAcc, TitleAcc, title) -> 298 parse_link_remainder(<<Binary/binary>>, EndChar, LinkAcc, TitleAcc, done); 299parse_link_remainder(<<"\"", Binary/binary>>, EndChar, LinkAcc, [], link) -> 300 parse_link_remainder(Binary, EndChar, LinkAcc, [], title); 301parse_link_remainder(<<" ", Binary/binary>>, EndChar, LinkAcc, [], link) -> 302 parse_link_remainder(Binary, EndChar, LinkAcc, [], link); 303parse_link_remainder(<<Char:1/binary, Binary/binary>>, EndChar, LinkAcc, [], link) -> 304 parse_link_remainder(Binary, EndChar, [Char | LinkAcc], [], link); 305parse_link_remainder(<<"\"", Binary/binary>>, EndChar, LinkAcc, TitleAcc, title) -> 306 parse_link_remainder(Binary, EndChar, LinkAcc, TitleAcc, done); 307parse_link_remainder(<<Char:1/binary, Binary/binary>>, EndChar, LinkAcc, TitleAcc, title) -> 308 parse_link_remainder(Binary, EndChar, LinkAcc, [Char | TitleAcc], title). 309 310%% @doc remove all whitespace from a newline, returns count 311%% of whitespace and trimmed binary. 312trim_whitespace(<<Binary/binary>>, Max) -> 313 trim_whitespace(Binary, 0, Max). 314trim_whitespace(<<Binary/binary>>, Max, Max) -> 315 {Binary, Max}; 316trim_whitespace(<<" ", Binary/binary>>, Offset, Max) -> 317 trim_whitespace(Binary, Offset+1, Max); 318trim_whitespace(<<Binary/binary>>, Offset, _Max) -> 319 {Binary, Offset}. 320 321%% @doc close a tag if it is in the open tags stack, 322%% otherwise open it. 323%% @spec toggle_tag(tag(), tag_stack(), html()) -> {tag_stack(), html()} 324%% tag = binary() 325%% tag_stack = [tag()] 326%% html = [binary()] 327toggle_tag(Tag, OpenTags, Acc) -> 328 case lists:member(Tag, OpenTags) of 329 true -> 330 {lists:delete(Tag, OpenTags), [<<"</",Tag/binary,">">> | Acc]}; 331 false -> 332 {[Tag | OpenTags], [<<"<",Tag/binary,">">> | Acc]} 333 end. 334 335%% @doc insert tag IFF it isn't already on the 336%% stack of open tags. 337%% @spec exclusive_insert_tag(tag(), tag_stack(), html()) -> {tag_stack(), html()} 338%% tag = binary() 339%% tag_stack = [tag()] 340%% html = [binary()] 341exclusive_insert_tag(Tag, OpenTags, Acc) -> 342 case lists:member(Tag,OpenTags) of 343 true -> 344 {OpenTags, Acc}; 345 false -> 346 {[Tag | OpenTags], [<<"<",Tag/binary,">">> | Acc]} 347 end. 348 349%% @doc consume an entire line as is without modification 350preserve_line(<<"">>, OpenTags, Acc, LinkContext, MultiContext) -> 351 line_start(<<"">>, OpenTags, Acc, LinkContext, MultiContext); 352preserve_line(<<"\n",Binary/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 353 line_start(Binary, OpenTags, [<<"\n">> | Acc], LinkContext, MultiContext); 354preserve_line(<<Char:1/binary,Binary/binary>>, OpenTags, Acc, LinkContext, MultiContext) -> 355 preserve_line(Binary, OpenTags, [Char | Acc], LinkContext, MultiContext). 356 357%% @doc skip remainder of line 358start_of_next_line(<<"">>) -> 359 <<"">>; 360start_of_next_line(<<"\n", Binary/binary>>) -> 361 Binary; 362start_of_next_line(<<_Char:1/binary, Binary/binary>>) -> 363 start_of_next_line(Binary). 364 365%% @doc determine if line starts with a number 366starts_with_number(<<Binary/binary>>) -> 367 ?DEBUG_LOGGER("starts_with_number: ~p~n", [Binary]), 368 starts_with_number(Binary, []). 369starts_with_number(<<"">>, []) -> 370 false; 371starts_with_number(<<".", _Binary/binary>>, []) -> 372 false; 373starts_with_number(<<". ", Binary/binary>>, _Acc) -> 374 {true, Binary}; 375starts_with_number(<<Char:1/binary, Binary/binary>>, Acc) -> 376 try 377 _Integer = list_to_integer(binary_to_list(Char)), 378 starts_with_number(Binary, [Char | Acc]) 379 catch 380 _:_ -> 381 false 382 end. 383 384%% @doc remove the first occurance of any of the tags in ToRemove. 385remove_top_tag(ToRemove, Tags, Html) -> 386 {_, Tags3, Html3} = lists:foldr(fun(X, {Done, Acc, Html2}) -> 387 case {Done, lists:member(X, ToRemove)} of 388 {true, _} -> 389 {true, [X | Acc], Html2}; 390 {false, true} -> 391 {true, Acc, [<<"</",X/binary,">">> | Html2]}; 392 {false, false} -> 393 {false, [X | Acc], Html2} 394 end 395 end, {false,[],Html}, Tags), 396 {Tags3, Html3}. 397