PageRenderTime 322ms CodeModel.GetById 51ms RepoModel.GetById 2ms app.codeStats 0ms

/dylan/parser.dylan

http://github.com/cgay/wiki
Unknown | 306 lines | 283 code | 23 blank | 0 comment | 0 complexity | 3754e883b65c1b3e2930217fc835a80f MD5 | raw file
  1. Module: %wiki
  2. Synopsis: An ad-hoc parser for wiki markup
  3. Author: Carl Gay
  4. Copyright: This code is in the public domain.
  5. define variable *wiki-link-url* = wiki-url("/page/view/");
  6. // This table maps the leading character of any markup that can occur
  7. // top-level (i.e., anywhere in the wiki page) to a function that
  8. // parses that kind of markup. Once we've dispatched to the correct
  9. // parser based on the first character, that parser might dispatch
  10. // further based on subsequent characters.
  11. define table $markup-top-level
  12. = { '[' => parse-link,
  13. // TODO: '/' => parse-comment,
  14. '\n' => parse-newline,
  15. '<' => parse-less-than,
  16. '&' => parse-ampersand
  17. };
  18. // Markup that may occur after a newline (plus optional whitespace) has
  19. // just been seen.
  20. define table $markup-after-newline
  21. = { '=' => parse-header,
  22. '#' => parse-numbered-list,
  23. '*' => parse-bulleted-list,
  24. '|' => parse-table,
  25. '-' => parse-horizontal-line,
  26. '\n' => parse-newline-newline
  27. };
  28. define method wiki-markup-to-html
  29. (markup :: <string>, #key start :: <integer> = 0)
  30. => (html :: <string>)
  31. with-output-to-string (html-stream)
  32. parse-markup(html-stream, markup, start, $markup-top-level);
  33. end;
  34. end;
  35. define method parse-markup
  36. (out :: <stream>, markup :: <string>, start :: <integer>, parser-table :: <table>)
  37. let leading-chars = key-sequence(parser-table);
  38. iterate loop (start :: <integer> = start)
  39. // find first occurrance of a "markup leading character"...
  40. if (start < markup.size)
  41. let markup-index = find(markup, rcurry(member?, leading-chars), start: start);
  42. //log-debug("start = %s, markup-index = %s", start, markup-index);
  43. if (~markup-index)
  44. write(out, markup, start: start);
  45. else
  46. write(out, markup, start: start, end: markup-index);
  47. let dispatch-char = markup[markup-index];
  48. let parser = parser-table[dispatch-char];
  49. let end-pos = parser(out, markup, markup-index);
  50. if (end-pos)
  51. // successful parse
  52. loop(end-pos);
  53. else
  54. // unsuccessful parse
  55. write-element(out, dispatch-char);
  56. loop(markup-index + 1);
  57. end;
  58. end if;
  59. end if;
  60. end iterate;
  61. end method parse-markup;
  62. define method parse-table
  63. (out :: <stream>, markup :: <string>, start :: <integer>)
  64. => (end-pos :: false-or(<integer>))
  65. // TODO
  66. end;
  67. // The parser has just encountered a newline in the markup...
  68. define method parse-newline
  69. (out :: <stream>, markup :: <string>, start :: <integer>)
  70. => (end-pos :: false-or(<integer>))
  71. let index = find(markup, method (c) ~member?(c, " \t\r") end, start: start + 1);
  72. if (index)
  73. let parser = element($markup-after-newline, markup[index], default: #f);
  74. if (parser)
  75. parser(out, markup, index)
  76. elseif (start + 1 < markup.size & markup[start + 1] == ' ')
  77. // lines preceded by space are preformatted...
  78. // Find next line with no leading whitespace...
  79. let (epos, #rest xs) = regex-position(compile-regex("\n\\S"), markup, start: start + 1) | markup.size;
  80. write(out, "<pre>");
  81. //XXX more speed
  82. let raw-text = copy-sequence(markup, start: start, end: epos);
  83. write(out, escape-xml(raw-text));
  84. write(out, "</pre>");
  85. epos
  86. end
  87. end // note returning #f or end-pos of parser
  88. end method parse-newline;
  89. define method parse-newline-newline
  90. (out :: <stream>, markup :: <string>, start :: <integer>)
  91. => (end-pos :: false-or(<integer>))
  92. write(out, "<p/>\n");
  93. // Note that this leaves the SECOND newline in the input stream.
  94. // This is kind of a kludge to make sure that if the following
  95. // markup must be preceded by a newline it'll still parse correctly.
  96. // Otherwise we'd return start + 1 here.
  97. start
  98. end method parse-newline-newline;
  99. define method find
  100. (text :: <string>, fun :: <function>,
  101. #key start :: <integer> = 0, end: epos)
  102. => (pos :: false-or(<integer>))
  103. block (return)
  104. for (i :: <integer> from start below epos | text.size)
  105. if (fun(text[i]))
  106. return(i);
  107. end;
  108. end;
  109. end;
  110. end method find;
  111. define method find
  112. (text :: <string>, char :: <character>,
  113. #key start :: <integer> = 0, end: epos = #f)
  114. => (pos :: false-or(<integer>))
  115. find(text, curry(\==, char), start: start, end: epos)
  116. end;
  117. // Parse == heading == markup
  118. define method parse-header
  119. (out :: <stream>, markup :: <string>, start :: <integer>)
  120. => (end-pos :: false-or(<integer>))
  121. let newline = find(markup, '\n', start: start) | markup.size;
  122. // let (#rest idxs) = regex-position(compile-regex("(==+)([^=\n]+)(==+)\\s*(\n|$)"), markup,
  123. let regex = compile-regex("(==+)([^=\n]+)(==+|$)");
  124. let (#rest idxs) = regex-position(regex, markup,
  125. start: start, end: newline);
  126. if (idxs.size > 1)
  127. let tag = copy-sequence(markup, start: idxs[2], end: idxs[3]);
  128. let header = copy-sequence(markup, start: idxs[4], end: idxs[5]);
  129. format(out, "<h%d>%s</h%d>\n", tag.size, header, tag.size);
  130. idxs[7]
  131. end
  132. end method parse-header;
  133. // Parse [[wiki-title]] or [url|label] markup.
  134. // @param start points to the initial '[' char.
  135. define method parse-link
  136. (out :: <stream>, markup :: <string>, start :: <integer>)
  137. => (end-pos :: false-or(<integer>))
  138. // links can't span multiple lines
  139. let close = find(markup, method (x) x == ']' | x == '\n' end, start: start);
  140. //format(out, "start: %d, close: %d\n", start, close);
  141. if (close)
  142. if(markup[close] == ']')
  143. close := make-link(out, markup, start, close);
  144. elseif(markup[close] == '\n')
  145. // note-form-message("The link %s is invalid wiki markup.",
  146. // copy-sequence(markup, start: start, end: close));
  147. close := make-link(out, markup, start, close);
  148. close := close - 1;
  149. end if;
  150. close;
  151. end if;
  152. end method parse-link;
  153. define method make-link
  154. (out :: <stream>, markup :: <string>, start :: <integer>, close :: <integer>)
  155. if (close)
  156. let wiki-link? = (markup[start + 1] == '[');
  157. if (wiki-link?)
  158. if (markup[close] == ']')
  159. close := close + 1;
  160. end;
  161. let title = copy-sequence(markup, start: start + 2, end: close - 1);
  162. format(out, "<a href=\"%s%s\">%s%s</a>",
  163. *wiki-link-url*,
  164. title,
  165. if (find-or-load-page(title)) "" else "[?]" end,
  166. title);
  167. else
  168. let bar = find(markup, '|', start: start, end: close);
  169. let url = copy-sequence(markup, start: start + 1, end: bar | close);
  170. let label = bar & copy-sequence(markup, start: bar + 1, end: close);
  171. format(out, "<a href=\"%s\">%s</a>", url, label | url);
  172. end if;
  173. end if;
  174. close + 1;
  175. end method make-link;
  176. define method parse-bulleted-list
  177. (out :: <stream>, markup :: <string>, start :: <integer>)
  178. => (end-pos :: false-or(<integer>))
  179. generate-list(out, markup, start, '*', "ul")
  180. end;
  181. define method parse-numbered-list
  182. (out :: <stream>, markup :: <string>, start :: <integer>)
  183. => (end-pos :: false-or(<integer>))
  184. generate-list(out, markup, start, '#', "ol")
  185. end;
  186. define method generate-list
  187. (stream, markup, start, bullet-char, tag)
  188. => (end-pos :: false-or(<integer>))
  189. let regex1 = compile-regex(format-to-string("\n\\s*[^%s]", bullet-char));
  190. let (list-end, #rest xs) = regex-position(regex1, markup, start: start);
  191. let lines = split(copy-sequence(markup,
  192. start: start,
  193. end: list-end | markup.size),
  194. "\n", trim?: #t);
  195. // write(stream, "<p>\n");
  196. let depth :: <integer> = 0;
  197. let regex2 = compile-regex(format-to-string("^\\s*([%s]+)", bullet-char));
  198. for (line in lines)
  199. let (#rest indexes) = regex-position(regex2, line);
  200. if (indexes.size > 1)
  201. let bullet-start = indexes[2];
  202. let bullet-end = indexes[3];
  203. let num-bullets = bullet-end - bullet-start;
  204. let item-html = wiki-markup-to-html(line, start: bullet-end);
  205. item-html := copy-sequence(item-html, start: 0, end: max(item-html.size - 1, 0));
  206. case
  207. depth = 0 =>
  208. format(stream, "<%s>\n<li>%s", tag, item-html);
  209. inc!(depth);
  210. num-bullets < depth =>
  211. format(stream, "</li>\n</%s></li>\n<li>%s", tag, item-html);
  212. dec!(depth);
  213. num-bullets = depth =>
  214. format(stream, "</li>\n<li>%s", item-html);
  215. num-bullets > depth =>
  216. format(stream, "\n<%s>\n<li>%s", tag, item-html);
  217. inc!(depth);
  218. end case;
  219. end if;
  220. end for;
  221. for (i from 0 below depth)
  222. format(stream, "</li>\n</%s>\n", tag);
  223. end;
  224. // write(stream, "</p>\n");
  225. list-end | markup.size
  226. end method generate-list;
  227. define method parse-horizontal-line
  228. (out :: <stream>, markup :: <string>, start :: <integer>)
  229. => (end-pos :: false-or(<integer>))
  230. let non-hyphen = find(markup, method (c) c ~== '-' end, start: start) | markup.size;
  231. if (non-hyphen - start >= 4)
  232. write(out, "<hr/>\n");
  233. non-hyphen
  234. end
  235. end method parse-horizontal-line;
  236. define method parse-less-than
  237. (out :: <stream>, markup :: <string>, start :: <integer>)
  238. => (end-pos :: false-or(<integer>))
  239. // don't search past newline...
  240. let close = find(markup, method (c) c == '>' | c == '\n' end, start: start);
  241. if (close & markup[close] == '>')
  242. let word = as-lowercase(copy-sequence(markup, start: start + 1, end: close));
  243. select (word by \=)
  244. "br", "br/"
  245. => write(out, "<br/>");
  246. close + 1;
  247. "p", "p/"
  248. => write(out, "<p/>");
  249. close + 1;
  250. // add more paired elements here...
  251. "center", "center/"
  252. => write(out, markup, start: start, end: close + 1);
  253. close + 1;
  254. "nowiki"
  255. // TODO: allow nested nowiki elements.
  256. => let epos = regex-position(compile-regex("</nowiki>"), markup,
  257. case-sensitive: #f,
  258. start: close)
  259. | markup.size;
  260. write(out, markup, start: start + "<nowiki>".size, end: epos);
  261. epos + "</nowiki>".size;
  262. otherwise
  263. => write(out, "&lt;");
  264. start + 1;
  265. end select
  266. else
  267. write(out, "&lt;");
  268. start + 1
  269. end
  270. end method parse-less-than;
  271. define method parse-ampersand
  272. (out :: <stream>, markup :: <string>, start :: <integer>)
  273. => (end-pos :: false-or(<integer>))
  274. write(out, "&amp;");
  275. start + 1
  276. end method parse-ampersand;
  277. /* TODO...
  278. define wiki-markup raw-url
  279. regex: "(http|ftp|gopher|mailto|news|nntp|telnet|wais|file|prospero)://[^ \t\r\n)]+";
  280. (stream, all, #rest ignore)
  281. format(stream, "<a href=\"%s\">%s</a>", all, all);
  282. end;
  283. */