PageRenderTime 47ms CodeModel.GetById 10ms app.highlight 29ms RepoModel.GetById 1ms app.codeStats 0ms

/src/markdown.erl

http://github.com/lethain/erlang_markdown
Erlang | 397 lines | 299 code | 24 blank | 74 comment | 1 complexity | a40d72353bd9962a6b499c148617e202 MD5 | raw file
  1%% Copyright (c) 2009 Will Larson <lethain@gmail.com>
  2%% <insert MIT License here>
  3%% @todo support for horizontal rule
  4%% @todo support for secondary title syntax "Title\n====="
  5%% @todo support for multi-level indentation
  6-module(markdown).
  7-author("Will Larson <lethain@gmail.com>").
  8-version("0.0.2").
  9-export([markdown/1]).
 10-export([line_start/5, single_line/5]).
 11-export([trim_whitespace/2, preserve_line/5, start_of_next_line/1, starts_with_number/1, remove_top_tag/3]).
 12-export([identify_line_type/1]).
 13-export([toggle_tag/3, exclusive_insert_tag/3]).
 14-export([parse_link/2, parse_link_text/2, parse_link_remainder/2]).
 15
 16-define(DEBUG_LOGGER, fun(_X,_Y) -> ok end).
 17%-define(DEBUG_LOGGER, fun(X,Y) -> io:format(X,Y) end).			      
 18
 19%%
 20%% Primary Interface
 21%%
 22
 23markdown(Text) when is_list(Text) ->
 24    markdown(list_to_binary(Text));
 25markdown(Binary) when is_binary(Binary) ->
 26    line_start(Binary, [], [], [], []).
 27
 28
 29%%
 30%% Multi-line Entities
 31%%
 32
 33identify_line_type(<<"">>) -> {empty_line, <<"">>};
 34identify_line_type(<<"\n", Binary/binary>>) -> {empty_line, Binary};
 35%identify_line_type(<<"- -", Binary/binary>>) ->  {hr, Binary};
 36%identify_line_type(<<"--", Binary/binary>>) ->  {hr, Binary};
 37identify_line_type(<<"- ", Binary/binary>>) -> {ul, Binary};
 38identify_line_type(<<"    - ", Binary/binary>>) -> {deep_ul, Binary};
 39identify_line_type(<<"* ", Binary/binary>>) -> {ul, Binary};
 40identify_line_type(<<"    * ", Binary/binary>>) -> {deep_ul, Binary};
 41identify_line_type(<<">> ", Binary/binary>>) -> {blockquote, Binary};
 42identify_line_type(<<"> ", Binary/binary>>) -> {blockquote, Binary};
 43identify_line_type(<<"    ", Binary/binary>>) ->
 44    case starts_with_number(Binary) of    
 45	{true, Binary2} ->
 46	    {deep_ol, Binary2};
 47	false ->
 48	    {pre, Binary}
 49    end;
 50identify_line_type(<<Binary/binary>>) ->
 51    case starts_with_number(Binary) of    
 52	{true, Binary2} ->
 53	    {ol, Binary2};
 54	false ->
 55	    {p, Binary}
 56    end.
 57
 58%% Manages closing multi-line entities.
 59line_start(<<Binary/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->   
 60    ?DEBUG_LOGGER("line_start: ~p~n",[Binary]),
 61    % calculate the expected indentation depth based on current stack
 62    IndentDepth0 = lists:foldr(fun(Elem, Depth) ->
 63				       case Elem of
 64					   <<"ol">> -> Depth + 1;
 65					   <<"ul">> -> Depth + 1;
 66					   _ -> Depth
 67				       end end, 0, MultiContext),
 68    IndentDepth = erlang:max(IndentDepth0-1,0),
 69    % trim whitespace based on indent depth, 4 spaces per indentation depth
 70    % restrict trimming to avoid capturing pre blocks and signifigant whitespace
 71    % from within pre blocks
 72    {Binary2, Offset} = trim_whitespace(Binary, 4*IndentDepth),
 73    ?DEBUG_LOGGER("~p => ~p, IndentDepth = ~p, offset ~p~n", [Binary, Binary2, IndentDepth, Offset]),
 74    % close an appropriate number of ol/ul blocks when
 75    % the amount of trimmed whitespace is inadequate for
 76    % the current indentation depth
 77    CloseDepthBy = IndentDepth - trunc(Offset/4),
 78    {MultiContext2, Acc3} = lists:foldr(fun(_, {Tags, Acc0}) ->
 79						{Tags2, Acc2} = remove_top_tag([<<"li">>], Tags, Acc0),
 80						remove_top_tag([<<"ul">>, <<"ol">>], Tags2, Acc2)
 81					end, {MultiContext, Acc}, lists:seq(1,CloseDepthBy)),
 82    {Type, Binary3} = identify_line_type(Binary2),
 83    ?DEBUG_LOGGER("type (~p) and stack (~p) for ~p~n", [Type, MultiContext2, Binary]),
 84    {MultiContext3, Acc4} = case {Type, MultiContext2} of
 85				{empty_line, [<<"p">> | RestTags]} ->
 86				    {RestTags, [<<"</p>">> | Acc3]};
 87				{empty_line, [<<"pre">> | RestTags]} ->
 88				    {RestTags, [<<"</pre>">> | Acc3]};
 89				{empty_line, [<<"blockquote">> | RestTags]} ->
 90				    {RestTags, [<<"</blockquote>">> | Acc3]};
 91				{empty_line, [<<"li">>, <<"ol">> | RestTags]} ->
 92				    {RestTags, [<<"</ol>">>, <<"</li>">> | Acc3]};
 93				{empty_line, [<<"li">>, <<"ul">> | RestTags]} ->
 94				    {RestTags, [<<"</ul>">>, <<"</li>">> | Acc3]};
 95				{p, []} ->
 96				    {[<<"p">>], [<<"<p>">> | Acc3]};
 97				{p, [<<"p">> | RestTags]} ->
 98				    {[<<"p">> | RestTags], [<<" ">> | Acc3]};
 99				{p, [<<"li">> | RestTags]} ->
100				    {[<<"li">> | RestTags], [<<" ">> | Acc3]};
101				{p, [Tag | RestTags]} ->
102				    case lists:member(Tag, [<<"pre">>, <<"blockquote">>]) of
103					true ->
104					    {[<<"p">> | RestTags], [<<"<p>">>,<<"</",Tag/binary,">">> | Acc3]};
105					false ->
106					    {[Tag | RestTags], Acc3}
107				    end;
108				{pre, []} ->
109				    {[<<"pre">>], [<<"<pre>">> | Acc3]};
110				{pre, [<<"pre">> | RestTags]} ->
111				    {[<<"pre">> | RestTags], [<<"\n">> | Acc3]};
112				{pre, [Tag | RestTags]} ->
113				    case lists:member(Tag, [<<"p">>, <<"blockquote">>]) of
114					true ->
115					    {[<<"pre">> | RestTags], [<<"<pre>">>,<<"</",Tag/binary,">">> | Acc3]};
116					false ->
117					    {[Tag | RestTags], Acc3}
118				    end;
119				{blockquote, []} ->
120				    {[<<"blockquote">>], [<<"<blockquote>">> | Acc3]};
121				{blockquote, [<<"blockquote">> | RestTags]} ->
122				    {[<<"blockquote">> | RestTags], [<<" ">> | Acc3]};
123				{blockquote, [Tag | RestTags]} ->
124				    case lists:member(Tag, [<<"pre">>, <<"p">>]) of
125					true ->
126					    {[<<"blockquote">> | RestTags], [<<"<blockquote>">>,<<"</",Tag/binary,">">> | Acc3]};
127					false ->
128					    {[Tag | RestTags], Acc3}
129				    end;
130				{deep_ul, [<<"li">> | RestTags]} ->
131				    {[ <<"li">>,  <<"ul">>, <<"li">> | RestTags], [<<"<li>">>, <<"<ul>">> | Acc3]};
132				{deep_ol, [<<"li">> | RestTags]} ->
133				    {[ <<"li">>,  <<"ol">>, <<"li">> | RestTags], [<<"<li>">>, <<"<ol>">> | Acc3]};
134				{ol, [<<"li">> | RestTags]} ->
135				    {[<<"li">> | RestTags], [<<"<li>">>, <<"</li>">> | Acc3]};
136				{ol, RestTags} ->
137				    {[<<"li">>, <<"ol">> | RestTags], [<<"<li>">>, <<"<ol>">> | Acc3]};
138				{ul, [<<"li">> | RestTags]} ->
139				    {[<<"li">> | RestTags], [<<"<li>">>, <<"</li>">> | Acc3]};
140				{ul, RestTags} ->
141				    {[<<"li">>, <<"ul">> | RestTags], [<<"<li>">>, <<"<ul>">> | Acc3]};
142				_ ->
143				    {MultiContext2, Acc3}
144			    end,
145    case {Type, Binary3} of
146	{empty_line, <<"">>} ->
147	    single_line(Binary3, OpenTags, Acc4, LinkContext, MultiContext3);
148	{empty_line, _} ->
149	    line_start(Binary3, OpenTags, Acc4, LinkContext, MultiContext3);
150	_ ->
151	    single_line(Binary3, OpenTags, Acc4, LinkContext, MultiContext3)
152    end.
153
154%%
155%% Single Line Entities (headers, em, strong, links, code)
156%%
157
158%% Wrapup function, called at end of document.
159single_line(<<"">>, OpenTags, Acc, _LinkContext, MultiContext) ->
160    Open = lists:reverse(lists:append([OpenTags, MultiContext])),
161    ?DEBUG_LOGGER("remaining_tags: ~p~n", [Open]),
162    ClosedTags = lists:foldr(fun(Tag, Acc2) ->
163				     [<<"</",Tag/binary,">">> | Acc2]
164			     end, Acc, Open),
165    % markdown is gathered in reverse order
166    Reversed = lists:reverse(ClosedTags),
167    %list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, Reversed)));
168    lists:foldr(fun(X,Acc2) -> <<Acc2/binary, X/binary>> end, <<"">>, Reversed);
169
170
171%% Pass control to multi-line entity handler when
172%% encountering new-line.
173single_line(<<"  \n", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
174    line_start(Rest, OpenTags, [<<"<br>">> | Acc], LinkContext, MultiContext);
175single_line(<<"\n", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
176    line_start(Rest, OpenTags, Acc, LinkContext, MultiContext);
177single_line(<<"#####", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
178    {OpenTags2, Acc2} = exclusive_insert_tag(<<"h5">>, OpenTags, Acc),
179    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
180single_line(<<"####", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
181    {OpenTags2, Acc2} = exclusive_insert_tag(<<"h4">>, OpenTags, Acc),
182    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
183single_line(<<"###", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
184    {OpenTags2, Acc2} = exclusive_insert_tag(<<"h3">>, OpenTags, Acc),
185    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
186single_line(<<"##", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
187    {OpenTags2, Acc2} = exclusive_insert_tag(<<"h2">>, OpenTags, Acc),
188    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
189single_line(<<"#", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
190    {OpenTags2, Acc2} = exclusive_insert_tag(<<"h1">>, OpenTags, Acc),
191    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
192single_line(<<"**", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
193    {OpenTags2, Acc2} = toggle_tag(<<"strong">>, OpenTags, Acc),
194    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
195single_line(<<"*", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
196    {OpenTags2, Acc2} = toggle_tag(<<"em">>, OpenTags, Acc),
197    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
198single_line(<<"``", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
199    {OpenTags2, Acc2} = toggle_tag(<<"code">>, OpenTags, Acc),
200    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
201single_line(<<"`", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
202    {OpenTags2, Acc2} = toggle_tag(<<"code">>, OpenTags, Acc),
203    single_line(Rest, OpenTags2, Acc2, LinkContext, MultiContext);
204single_line(<<"![", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
205    case parse_link(<<"[", Rest/binary>>, LinkContext) of
206	{link, Rest2, Href, Text, []} ->
207	    Img = <<"<img src=\"", Href/binary, "\" alt=\"", Text/binary, "\">">>,
208	    single_line(Rest2, OpenTags, [Img | Acc], LinkContext, MultiContext);
209	{link, Rest2, Href, Text, Title} ->
210	    Img = <<"<img src=\"", Href/binary, "\" alt=\"", Text/binary, "\" title=\"", Title/binary, "\">">>,
211	    single_line(Rest2, OpenTags, [Img | Acc], LinkContext, MultiContext)
212    end;
213single_line(<<"[", Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
214    case parse_link(<<"[" , Rest/binary>>, LinkContext) of
215	{link, Rest2, Href, Text, <<"">>} ->
216	    Link = <<"<a href=\"", Href/binary, "\">", Text/binary, "</a>">>,
217	    single_line(Rest2, OpenTags, [Link | Acc], LinkContext, MultiContext);
218	{link, Rest2, Href, Text, Title} ->
219	    Link = <<"<a href=\"", Href/binary, "\" title=\"", Title/binary, "\">", Text/binary, "</a>">>,
220	    single_line(Rest2, OpenTags, [Link | Acc], LinkContext, MultiContext);
221	{context, Rest2, LinkContext2} ->
222	    single_line(Rest2, OpenTags, Acc, LinkContext2, MultiContext)
223    end;
224single_line(<<B:1/binary, Rest/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
225    single_line(Rest, OpenTags, [B | Acc], LinkContext, MultiContext).
226
227%%
228%% Utility functions (parsing links, managing tags, etc)
229%%
230
231
232%% @doc sub-parser for handling links.
233%%      Handles formats:
234%%        [This is a test](http://test.com/ "The title")
235%%        [This is a test][test]
236%%
237%%     For the second format, you'll need to have previously
238%%     specified the link using the format
239%%        [test]: http://test.com/ "Test Title"
240%%
241%% @spec parse_link() -> link_components() | new_context()
242%%       new_context = {context, binary(), proplist()}
243%%       link_components = {link, binary(), href(), text(), title()}
244%%       href = string()
245%%       text = string()
246%%       title = string() | undefined
247%%       proplist = [{binary(), binary()}]
248parse_link(Binary, LinkContext) ->
249    {Binary2, Text} = parse_link_text(Binary,[]),
250    case Binary2 of
251	<<"(", Binary3/binary>> ->
252	    {Binary4, Link, Title} =  parse_link_remainder(Binary3, <<")">>),
253	    {link, Binary4, Link, Text, Title};
254	<<"[",Binary3/binary>> ->
255	    {Binary4, Reference} = parse_link_text(<<"[",Binary3/binary>>, []),
256	    case proplists:get_value(Reference, LinkContext) of
257		{Link, Title} ->	    
258		    {link, Binary4, Link, Text, Title};
259		_ ->
260		    {syntax_error, reference_to_undeclared_link_definition}
261	    end;
262	<<":",Binary3/binary>> -> 
263	    {Binary4, Link, Title} =  parse_link_remainder(Binary3, <<"\n">>),
264	    {context, Binary4, [{Text, {Link, Title}} | LinkContext]};
265	<<_:1/binary, _Binary3/binary>> ->
266	    {syntax_error, expected_paren_bracket_or_colon}
267    end.
268
269%% @doc parse the text portion of a link.
270%%      For example, parse "test" from [test][this].
271parse_link_text(<<"\n", _Binary/binary>>, _Acc) ->
272    {syntax_error, unexpected_newline_in_link};
273parse_link_text(<<"[",Binary/binary>>, Acc) ->
274    parse_link_text(Binary, Acc);
275parse_link_text(<<"]",Binary/binary>>, Acc) ->
276    Reversed = lists:reverse(Acc),
277    Text = lists:append(lists:map(fun(X) -> binary_to_list(X) end, Reversed)),				     
278    {Binary, list_to_binary(Text)};
279parse_link_text(<<Char:1/binary, Binary/binary>>, Acc) ->
280    parse_link_text(Binary, [Char | Acc]).
281
282%% @doc Parse the link and title out of Markdown link remainder.
283%%      'http:/test/ "this"' has link of "http:/test/" and title of "this"
284%%
285%%      Example:
286%%	    {Binary4, Link, Title} =  parse_link_remainder(Binary3, <<")">>),
287parse_link_remainder(<<Binary/binary>>, <<EndChar:1/binary>>) ->
288    parse_link_remainder(Binary, EndChar, [], [], link).
289parse_link_remainder(<<EndChar:1/binary>>, EndChar, LinkAcc, TitleAcc, _) ->
290    Link = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(LinkAcc)))),
291    Title = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(TitleAcc)))),
292    {<<"">>, Link, Title};
293parse_link_remainder(<<EndChar:1/binary, Binary/binary>>, EndChar, LinkAcc, TitleAcc, _) ->
294    Link = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(LinkAcc)))),
295    Title = list_to_binary(lists:append(lists:map(fun(X) -> binary_to_list(X) end, lists:reverse(TitleAcc)))),
296    {Binary, Link, Title};
297parse_link_remainder(<<"\"", Binary/binary>>, EndChar, LinkAcc, TitleAcc, title) ->
298    parse_link_remainder(<<Binary/binary>>, EndChar, LinkAcc, TitleAcc, done);
299parse_link_remainder(<<"\"", Binary/binary>>, EndChar, LinkAcc, [], link) ->
300    parse_link_remainder(Binary, EndChar, LinkAcc, [], title);
301parse_link_remainder(<<" ", Binary/binary>>, EndChar, LinkAcc, [], link) ->
302    parse_link_remainder(Binary, EndChar, LinkAcc, [], link);
303parse_link_remainder(<<Char:1/binary, Binary/binary>>, EndChar, LinkAcc, [], link) ->
304    parse_link_remainder(Binary, EndChar, [Char | LinkAcc], [], link);
305parse_link_remainder(<<"\"", Binary/binary>>, EndChar, LinkAcc, TitleAcc, title) ->
306    parse_link_remainder(Binary, EndChar, LinkAcc, TitleAcc, done);
307parse_link_remainder(<<Char:1/binary, Binary/binary>>, EndChar, LinkAcc, TitleAcc, title) ->
308    parse_link_remainder(Binary, EndChar, LinkAcc, [Char | TitleAcc], title).
309
310%% @doc remove all whitespace from a newline, returns count
311%%      of whitespace and trimmed binary.
312trim_whitespace(<<Binary/binary>>, Max) ->
313    trim_whitespace(Binary, 0, Max).
314trim_whitespace(<<Binary/binary>>, Max, Max) ->
315    {Binary, Max};
316trim_whitespace(<<" ", Binary/binary>>, Offset, Max) ->
317    trim_whitespace(Binary, Offset+1, Max);
318trim_whitespace(<<Binary/binary>>, Offset, _Max) ->
319    {Binary, Offset}.
320
321%% @doc close a tag if it is in the open tags stack,
322%%      otherwise open it.
323%% @spec toggle_tag(tag(), tag_stack(), html()) -> {tag_stack(), html()}
324%%       tag = binary()
325%%       tag_stack = [tag()]
326%%       html = [binary()]
327toggle_tag(Tag, OpenTags, Acc) ->
328    case lists:member(Tag, OpenTags) of
329	true ->
330	    {lists:delete(Tag, OpenTags), [<<"</",Tag/binary,">">> | Acc]};
331	false ->
332	    {[Tag | OpenTags], [<<"<",Tag/binary,">">> | Acc]}
333    end.
334
335%% @doc insert tag IFF it isn't already on the
336%%      stack of open tags.
337%% @spec exclusive_insert_tag(tag(), tag_stack(), html()) -> {tag_stack(), html()}
338%%       tag = binary()
339%%       tag_stack = [tag()]
340%%       html = [binary()]
341exclusive_insert_tag(Tag, OpenTags, Acc) ->
342    case lists:member(Tag,OpenTags) of
343	true ->
344	    {OpenTags, Acc};
345	false ->
346	    {[Tag | OpenTags], [<<"<",Tag/binary,">">> | Acc]}
347    end.
348
349%% @doc consume an entire line as is without modification
350preserve_line(<<"">>, OpenTags, Acc, LinkContext, MultiContext) ->
351    line_start(<<"">>, OpenTags, Acc, LinkContext, MultiContext);
352preserve_line(<<"\n",Binary/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
353    line_start(Binary, OpenTags, [<<"\n">> | Acc], LinkContext, MultiContext);
354preserve_line(<<Char:1/binary,Binary/binary>>, OpenTags, Acc, LinkContext, MultiContext) ->
355    preserve_line(Binary, OpenTags, [Char | Acc], LinkContext, MultiContext).
356
357%% @doc skip remainder of line
358start_of_next_line(<<"">>) ->
359    <<"">>;
360start_of_next_line(<<"\n", Binary/binary>>) ->
361    Binary;
362start_of_next_line(<<_Char:1/binary, Binary/binary>>) ->
363    start_of_next_line(Binary).
364
365%% @doc determine if line starts with a number
366starts_with_number(<<Binary/binary>>) ->
367    ?DEBUG_LOGGER("starts_with_number: ~p~n", [Binary]),
368    starts_with_number(Binary, []).
369starts_with_number(<<"">>, []) ->
370    false;
371starts_with_number(<<".", _Binary/binary>>, []) ->
372    false;
373starts_with_number(<<". ", Binary/binary>>, _Acc) ->
374    {true, Binary};
375starts_with_number(<<Char:1/binary, Binary/binary>>, Acc) ->
376    try 
377	_Integer = list_to_integer(binary_to_list(Char)),
378	starts_with_number(Binary, [Char | Acc])
379    catch 
380	_:_ ->
381	    false 
382    end.
383
384%% @doc remove the first occurance of any of the tags in ToRemove.
385remove_top_tag(ToRemove, Tags, Html) ->
386    {_, Tags3, Html3} = lists:foldr(fun(X, {Done, Acc, Html2}) ->
387			case {Done, lists:member(X, ToRemove)} of
388			    {true, _} ->
389				{true, [X | Acc], Html2};
390			    {false, true} ->
391				{true, Acc, [<<"</",X/binary,">">> | Html2]};
392			    {false, false} ->
393				{false, [X | Acc], Html2}
394			end
395		end, {false,[],Html}, Tags),
396    {Tags3, Html3}.
397