/dylan/monday-parser.dylan

http://github.com/cgay/wiki · Unknown · 271 lines · 215 code · 56 blank · 0 comment · 0 complexity · c298154362cbce66a403f4472ebd6589 MD5 · raw file

  1. module: %wiki
  2. author: Hannes Mehnert <hannes@mehnert.org>
  3. synopsis: markup definition for dylan wiki
  4. define function extract-action
  5. (token-string :: <byte-string>,
  6. token-start :: <integer>,
  7. token-end :: <integer>)
  8. => (result :: <byte-string>);
  9. copy-sequence(token-string, start: token-start, end: token-end);
  10. end;
  11. define function count-chars
  12. (string :: <byte-string>,
  13. tstart :: <integer>,
  14. tend :: <integer>)
  15. => (res :: <integer>)
  16. tend - tstart
  17. end;
  18. define constant $base-url = "/wiki/view.dsp?title=";
  19. define constant $wiki-tokens
  20. = simple-lexical-definition
  21. token EOF;
  22. inert "([ ])+";
  23. token LBRACKET = "\\[";
  24. token RBRACKET = "\\]";
  25. token EQUALS = "(=)+",
  26. semantic-value-function: count-chars;
  27. token TILDES = "(~)+",
  28. semantic-value-function: count-chars;
  29. token TICKS = "(')+",
  30. semantic-value-function: count-chars;
  31. token AMPERSAND = "&";
  32. token HASHMARK = "#";
  33. token STAR = "*";
  34. token FOUR-DASHES = "----", priority: 3;
  35. token PIPE = "\\|";
  36. token SMALLER = "<";
  37. token GREATER = ">";
  38. token CLIST = "(\n|\r|\r\n)(\\*|#)", priority: 3;
  39. token PREFORMATTED = "(\r|\n|\r\n) ", priority: 3;
  40. token NEWLINE = "(\n|\r|\r\n)";
  41. //todo: ignore spaces?!
  42. token TEXT = "[a-zA-Z_-0-9\\.]+",
  43. semantic-value-function: extract-action;
  44. token URL = "(http|ftp|https)://",
  45. semantic-value-function: extract-action;
  46. end;
  47. define constant $wiki-productions
  48. = simple-grammar-productions
  49. production description :: false-or(<string>) => [TEXT] (data)
  50. if (TEXT.size = 0) #f else TEXT end;
  51. production wiki-page-name :: <string> => [TEXT] (data)
  52. TEXT;
  53. production myurl :: <string> => [URL TEXT] (data)
  54. concatenate(URL, TEXT);
  55. production external-link :: xml/<element> => [LBRACKET myurl RBRACKET] (data)
  56. with-xml() a(myurl, href => myurl) end;
  57. production external-link :: xml/<element> => [LBRACKET myurl description RBRACKET] (data)
  58. with-xml() a(description, href => myurl) end;
  59. production internal-link :: xml/<element> => [LBRACKET LBRACKET wiki-page-name RBRACKET RBRACKET] (data)
  60. with-xml() a(wiki-page-name, href => concatenate($base-url, wiki-page-name)) end;
  61. production internal-link :: xml/<element> => [LBRACKET LBRACKET wiki-page-name PIPE description RBRACKET RBRACKET] (data)
  62. with-xml() a(description, href => concatenate($base-url, wiki-page-name)) end;
  63. production header :: xml/<element> => [EQUALS more-wiki-text EQUALS], action:
  64. method (p :: <simple-parser>, data, s, e)
  65. let heading = max(p[0], p[2]);
  66. unless (p[0] = p[2])
  67. format-out("Unbalanced number of '=' in header %s, left: %d right: %d, using %d\n",
  68. p[1], p[0], p[2], heading);
  69. end;
  70. make(xml/<element>,
  71. name: concatenate("h", integer-to-string(heading)),
  72. children: p[1]);
  73. end;
  74. production unnumbered-list :: xml/<element> => [STAR list-elements] (data)
  75. format-out("Hit unnumbered-list %=\n", list-elements);
  76. make(xml/<element>, name: "ul", children: list-elements);
  77. production numbered-list :: xml/<element> => [HASHMARK list-elements] (data)
  78. make(xml/<element>, name: "ol", children: list-elements);
  79. production list-elements :: <collection> => [list-element more-list-elements] (data)
  80. format-out("Hit list-elements\n");
  81. add!(more-list-elements, list-element);
  82. production more-list-elements :: <collection> => [CLIST list-element more-list-elements] (data)
  83. format-out("Hit more-list-elements\n");
  84. add!(more-list-elements | #(), list-element);
  85. production more-list-elements :: <collection> => [] (data)
  86. format-out("Hit more-list-elements, empty\n");
  87. #();
  88. production list-element :: xml/<element> => [wiki-text] (data)
  89. format-out("Hit list-element %=\n", wiki-text);
  90. make(xml/<element>, name: "li", children: wiki-text);
  91. production simple-format :: xml/<xml> => [TICKS TEXT TICKS], action:
  92. method (p :: <simple-parser>, data, s, e)
  93. let ticks = max(p[0], p[2]);
  94. unless (p[0] = p[2])
  95. format-out("Unbalanced number of ' in TICKS %s, left: %d right: %d, using %d\n",
  96. p[1], p[0], p[2], ticks);
  97. end;
  98. let str = list(make(xml/<char-string>, text: p[1]));
  99. if (ticks = 5)
  100. make(xml/<element>, name: "b", children: list(make(xml/<element>, name: "i", children: str)));
  101. else
  102. let ele-name = if (ticks = 2) "i" elseif (ticks = 3) "b" end;
  103. if (ele-name)
  104. make(xml/<element>, name: ele-name, children: str);
  105. else
  106. str[0]
  107. end;
  108. end;
  109. end;
  110. production wiki-text :: <collection> => [TEXT more-wiki-text] (data)
  111. add!(more-wiki-text, with-xml() text(TEXT) end);
  112. production wiki-text :: <collection> => [internal-link more-wiki-text] (data)
  113. add!(more-wiki-text, internal-link);
  114. production wiki-text :: <collection> => [external-link more-wiki-text] (data)
  115. add!(more-wiki-text, external-link);
  116. production wiki-text :: <collection> => [simple-format more-wiki-text] (data)
  117. add!(more-wiki-text, simple-format);
  118. production more-wiki-text :: <collection> => [wiki-text] (data)
  119. wiki-text;
  120. production more-wiki-text :: <collection> => [] (data)
  121. #();
  122. production horizontal-line :: xml/<element> => [FOUR-DASHES] (data)
  123. with-xml() hr end;
  124. production preformat :: xml/<element> => [PREFORMATTED TEXT more-preformat] (data)
  125. let pre-string = concatenate("\n ", TEXT, more-preformat);
  126. make(xml/<element>,
  127. name: "pre",
  128. children: list(make(xml/<char-string>, text: pre-string)));
  129. production more-preformat :: <string> => [TEXT more-preformat] (data)
  130. concatenate(" ", TEXT, more-preformat);
  131. production more-preformat :: <string> => [PREFORMATTED more-preformat] (data)
  132. concatenate("\n", more-preformat);
  133. production more-preformat :: <string> => [NEWLINE] (data)
  134. "\n";
  135. production line :: <collection> => [wiki-text] (data)
  136. wiki-text;
  137. production line :: <collection> => [header] (data)
  138. list(header);
  139. production line :: <collection> => [unnumbered-list] (data)
  140. list(unnumbered-list);
  141. production line :: <collection> => [numbered-list] (data)
  142. list(numbered-list);
  143. production line :: <collection> => [horizontal-line] (data)
  144. list(horizontal-line);
  145. production lines => [] (data)
  146. production lines => [preformat lines] (data)
  147. add!(data.my-real-data, preformat);
  148. production lines => [line NEWLINE NEWLINE lines] (data)
  149. add!(data.my-real-data, with-xml() p end);
  150. do(curry(add!, data.my-real-data), line);
  151. production lines => [line NEWLINE lines] (data)
  152. do(curry(add!, data.my-real-data), line);
  153. end;
  154. define constant $wiki-parser-automaton
  155. = simple-parser-automaton($wiki-tokens, $wiki-productions,
  156. #[#"lines"]);
  157. define function consume-token
  158. (consumer-data,
  159. token-number :: <integer>,
  160. token-name :: <object>,
  161. semantic-value :: <object>,
  162. start-position :: <integer>,
  163. end-position :: <integer>)
  164. => ();
  165. //let srcloc
  166. // = range-source-location(consumer-data, start-position, end-position);
  167. format-out("%d - %d: token %d: %= value %=\n",
  168. start-position,
  169. end-position,
  170. token-number,
  171. token-name,
  172. semantic-value);
  173. simple-parser-consume-token(consumer-data, token-number, token-name, semantic-value, start-position, end-position);
  174. end function;
  175. define sealed class <my-data> (<object>)
  176. slot my-real-data = make(<stretchy-vector>);
  177. end;
  178. define function parse-wiki-markup (input :: <string>)
  179. let rangemap = make(<source-location-rangemap>);
  180. rangemap-add-line(rangemap, 0, 1);
  181. if(input[0] = ' ')
  182. input := concatenate("\n", input);
  183. end;
  184. unless(input[input.size - 1] = '\n')
  185. input := add!(input, '\n')
  186. end;
  187. let scanner = make(<simple-lexical-scanner>,
  188. definition: $wiki-tokens,
  189. rangemap: rangemap);
  190. let data = make(<my-data>);
  191. let parser = make(<simple-parser>,
  192. automaton: $wiki-parser-automaton,
  193. start-symbol: #"lines",
  194. rangemap: rangemap,
  195. consumer-data: data);
  196. format-out("before scan-tokens, input: %s\n", input);
  197. scan-tokens(scanner,
  198. simple-parser-consume-token,
  199. //consume-token,
  200. parser,
  201. input,
  202. end: input.size,
  203. partial?: #f);
  204. let end-position = scanner.scanner-source-position;
  205. format-out("before consuming EOF at %d\n", end-position);
  206. simple-parser-consume-token(parser, 0, #"EOF", parser, end-position, end-position);
  207. format-out("data (%d) is %=\n", data.my-real-data.size, data.my-real-data);
  208. reduce1(concatenate, (map(curry(as, <string>), reverse(data.my-real-data))));
  209. end;
  210. /*
  211. begin
  212. parse-wiki-markup(" one\n two\n three\n foo");
  213. parse-wiki-markup(" this is pre-text\n and another line");
  214. end;
  215. */