PageRenderTime 37ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/entities.erl

http://github.com/basho/mochiweb
Erlang | 45 lines | 35 code | 7 blank | 3 comment | 2 complexity | b00b74e3ca077fa3fdb78cf07f47bfae MD5 | raw file
Possible License(s): MIT
  1. #!/usr/bin/env escript
  2. %% -*- mode: erlang -*-
  3. -export([main/1]).
  4. %% @doc Script used to generate mochiweb_charref.erl table.
  5. main(_) ->
  6. application:start(inets),
  7. code:add_patha("ebin"),
  8. {ok, {_, _, HTML}} = httpc:request("http://www.w3.org/TR/html5/named-character-references.html"),
  9. print(lists:sort(search(mochiweb_html:parse(HTML)))).
  10. print([F | T]) ->
  11. io:put_chars([clause(F), ";\n"]),
  12. print(T);
  13. print([]) ->
  14. io:put_chars(["entity(_) -> undefined.\n"]),
  15. ok.
  16. clause({Title, [Codepoint]}) ->
  17. ["entity(\"", Title, "\") -> 16#", Codepoint];
  18. clause({Title, [First | Rest]}) ->
  19. ["entity(\"", Title, "\") -> [16#", First,
  20. [[", 16#", Codepoint] || Codepoint <- Rest],
  21. "]"].
  22. search(Elem) ->
  23. search(Elem, []).
  24. search({<<"tr">>, [{<<"id">>, <<"entity-", _/binary>>} | _], Children}, Acc) ->
  25. %% HTML5 charrefs can have more than one code point(!)
  26. [{<<"td">>, _, [{<<"code">>, _, [TitleSemi]}]},
  27. {<<"td">>, [], [RawCPs]} | _] = Children,
  28. L = byte_size(TitleSemi) - 1,
  29. <<Title:L/binary, $;>> = TitleSemi,
  30. {match, Matches} = re:run(RawCPs, "(?:\\s*U\\+)([a-fA-F0-9]+)",
  31. [{capture, all, binary}, global]),
  32. [{Title, [CP || [_, CP] <- Matches]} | Acc];
  33. search({Tag, Attrs, [H | T]}, Acc) ->
  34. search({Tag, Attrs, T}, search(H, Acc));
  35. search({_Tag, _Attrs, []}, Acc) ->
  36. Acc;
  37. search(<<_/binary>>, Acc) ->
  38. Acc.