PageRenderTime 12ms CodeModel.GetById 2ms app.highlight 5ms RepoModel.GetById 2ms app.codeStats 0ms

/dylan/parser.dylan

http://github.com/cgay/wiki
Unknown | 306 lines | 283 code | 23 blank | 0 comment | 0 complexity | 3754e883b65c1b3e2930217fc835a80f MD5 | raw file
  1Module: %wiki
  2Synopsis: An ad-hoc parser for wiki markup
  3Author: Carl Gay
  4Copyright: This code is in the public domain.
  5
  6
  7define variable *wiki-link-url* = wiki-url("/page/view/");
  8
  9// This table maps the leading character of any markup that can occur
 10// top-level (i.e., anywhere in the wiki page) to a function that
 11// parses that kind of markup.  Once we've dispatched to the correct
 12// parser based on the first character, that parser might dispatch
 13// further based on subsequent characters.
 14define table $markup-top-level
 15  = { '[' => parse-link,
 16      // TODO: '/' => parse-comment,
 17      '\n' => parse-newline,
 18      '<' => parse-less-than,
 19      '&' => parse-ampersand
 20      };
 21
 22// Markup that may occur after a newline (plus optional whitespace) has
 23// just been seen.
 24define table $markup-after-newline
 25  = { '=' => parse-header,
 26      '#' => parse-numbered-list,
 27      '*' => parse-bulleted-list,
 28      '|' => parse-table,
 29      '-' => parse-horizontal-line,
 30      '\n' => parse-newline-newline
 31      };
 32
 33define method wiki-markup-to-html
 34    (markup :: <string>, #key start :: <integer> = 0)
 35 => (html :: <string>)
 36  with-output-to-string (html-stream)
 37    parse-markup(html-stream, markup, start, $markup-top-level);
 38  end;
 39end;
 40
 41define method parse-markup
 42    (out :: <stream>, markup :: <string>, start :: <integer>, parser-table :: <table>)
 43  let leading-chars = key-sequence(parser-table);
 44  iterate loop (start :: <integer> = start)
 45    // find first occurrance of a "markup leading character"...
 46    if (start < markup.size)
 47      let markup-index = find(markup, rcurry(member?, leading-chars), start: start);
 48      //log-debug("start = %s, markup-index = %s", start, markup-index);
 49      if (~markup-index)
 50        write(out, markup, start: start);
 51      else
 52        write(out, markup, start: start, end: markup-index);
 53        let dispatch-char = markup[markup-index];
 54        let parser = parser-table[dispatch-char];
 55        let end-pos = parser(out, markup, markup-index);
 56        if (end-pos)
 57          // successful parse
 58          loop(end-pos);
 59        else
 60          // unsuccessful parse
 61          write-element(out, dispatch-char);
 62          loop(markup-index + 1);
 63        end;
 64      end if;
 65    end if;
 66  end iterate;
 67end method parse-markup;
 68
 69define method parse-table
 70    (out :: <stream>, markup :: <string>, start :: <integer>)
 71 => (end-pos :: false-or(<integer>))
 72  // TODO
 73end;
 74
 75// The parser has just encountered a newline in the markup...
 76define method parse-newline
 77    (out :: <stream>, markup :: <string>, start :: <integer>)
 78 => (end-pos :: false-or(<integer>))
 79  let index = find(markup, method (c) ~member?(c, " \t\r") end, start: start + 1);
 80  if (index)
 81    let parser = element($markup-after-newline, markup[index], default: #f);
 82    if (parser)
 83      parser(out, markup, index)
 84    elseif (start + 1 < markup.size & markup[start + 1] == ' ')
 85      // lines preceded by space are preformatted...
 86      // Find next line with no leading whitespace...
 87      let (epos, #rest xs) = regex-position(compile-regex("\n\\S"), markup, start: start + 1) | markup.size;
 88      write(out, "<pre>");
 89      //XXX more speed
 90      let raw-text = copy-sequence(markup, start: start, end: epos);
 91      write(out, escape-xml(raw-text));
 92      write(out, "</pre>");
 93      epos
 94    end
 95  end 	// note returning #f or end-pos of parser
 96end method parse-newline;
 97
 98define method parse-newline-newline
 99    (out :: <stream>, markup :: <string>, start :: <integer>)
100 => (end-pos :: false-or(<integer>))
101  write(out, "<p/>\n");
102  // Note that this leaves the SECOND newline in the input stream.
103  // This is kind of a kludge to make sure that if the following
104  // markup must be preceded by a newline it'll still parse correctly.
105  // Otherwise we'd return start + 1 here.
106  start
107end method parse-newline-newline;
108
109define method find
110    (text :: <string>, fun :: <function>,
111     #key start :: <integer> = 0, end: epos)
112 => (pos :: false-or(<integer>))
113  block (return)
114    for (i :: <integer> from start below epos | text.size)
115      if (fun(text[i]))
116        return(i);
117      end;
118    end;
119  end;
120end method find;
121
122define method find
123    (text :: <string>, char :: <character>,
124     #key start :: <integer> = 0, end: epos = #f)
125 => (pos :: false-or(<integer>))
126  find(text, curry(\==, char), start: start, end: epos)
127end;
128
129// Parse == heading == markup
130define method parse-header
131    (out :: <stream>, markup :: <string>, start :: <integer>)
132 => (end-pos :: false-or(<integer>))
133  let newline = find(markup, '\n', start: start) | markup.size;
134  // let (#rest idxs) = regex-position(compile-regex("(==+)([^=\n]+)(==+)\\s*(\n|$)"), markup,
135  let regex = compile-regex("(==+)([^=\n]+)(==+|$)");
136  let (#rest idxs) = regex-position(regex, markup,
137                                    start: start, end: newline);
138  if (idxs.size > 1)
139    let tag = copy-sequence(markup, start: idxs[2], end: idxs[3]);
140    let header = copy-sequence(markup, start: idxs[4], end: idxs[5]);
141    format(out, "<h%d>%s</h%d>\n", tag.size, header, tag.size);
142    idxs[7]
143  end
144end method parse-header;
145
146// Parse [[wiki-title]] or [url|label] markup.
147// @param start points to the initial '[' char.
148define method parse-link
149    (out :: <stream>, markup :: <string>, start :: <integer>)
150 => (end-pos :: false-or(<integer>))
151  // links can't span multiple lines
152  let close = find(markup, method (x) x == ']' | x == '\n' end, start: start);
153  //format(out, "start: %d, close: %d\n", start, close);
154  if (close)
155    if(markup[close] == ']')
156      close := make-link(out, markup, start, close);
157    elseif(markup[close] == '\n')
158//      note-form-message("The link %s is invalid wiki markup.",
159//        copy-sequence(markup, start: start, end: close));
160      close := make-link(out, markup, start, close);
161      close := close - 1;
162    end if;
163    close;
164  end if;
165end method parse-link;
166
167define method make-link
168      (out :: <stream>, markup :: <string>, start :: <integer>, close :: <integer>)
169  if (close)
170    let wiki-link? = (markup[start + 1] == '[');
171    if (wiki-link?)
172      if (markup[close] == ']')
173        close := close + 1;
174      end;
175      let title = copy-sequence(markup, start: start + 2, end: close - 1);
176      format(out, "<a href=\"%s%s\">%s%s</a>",
177             *wiki-link-url*,
178             title,
179             if (find-or-load-page(title)) "" else "[?]" end,
180             title);
181    else
182      let bar = find(markup, '|', start: start, end: close);
183      let url = copy-sequence(markup, start: start + 1, end: bar | close);
184      let label = bar & copy-sequence(markup, start: bar + 1, end: close);
185      format(out, "<a href=\"%s\">%s</a>", url, label | url);
186    end if;
187  end if;
188  close + 1;
189end method make-link;
190
191define method parse-bulleted-list
192    (out :: <stream>, markup :: <string>, start :: <integer>)
193 => (end-pos :: false-or(<integer>))
194  generate-list(out, markup, start, '*', "ul")
195end;
196
197define method parse-numbered-list
198    (out :: <stream>, markup :: <string>, start :: <integer>)
199 => (end-pos :: false-or(<integer>))
200  generate-list(out, markup, start, '#', "ol")
201end;
202
203define method generate-list
204    (stream, markup, start, bullet-char, tag)
205 => (end-pos :: false-or(<integer>))
206  let regex1 = compile-regex(format-to-string("\n\\s*[^%s]", bullet-char));
207  let (list-end, #rest xs) = regex-position(regex1, markup, start: start);
208  let lines = split(copy-sequence(markup,
209                                  start: start,
210                                  end: list-end | markup.size),
211                    "\n", trim?: #t);
212//  write(stream, "<p>\n");
213  let depth :: <integer> = 0;
214  let regex2 = compile-regex(format-to-string("^\\s*([%s]+)", bullet-char));
215  for (line in lines)
216    let (#rest indexes) = regex-position(regex2, line);
217    if (indexes.size > 1)
218      let bullet-start = indexes[2];
219      let bullet-end = indexes[3];
220      let num-bullets = bullet-end - bullet-start;
221      let item-html = wiki-markup-to-html(line, start: bullet-end);
222      item-html := copy-sequence(item-html, start: 0, end: max(item-html.size - 1, 0));
223      case
224        depth = 0 => 
225          format(stream, "<%s>\n<li>%s", tag, item-html);
226          inc!(depth);
227        num-bullets < depth =>
228          format(stream, "</li>\n</%s></li>\n<li>%s", tag, item-html);
229          dec!(depth);
230        num-bullets = depth =>
231          format(stream, "</li>\n<li>%s", item-html);
232        num-bullets > depth =>
233          format(stream, "\n<%s>\n<li>%s", tag, item-html);
234          inc!(depth);
235      end case;
236    end if;
237  end for;
238  for (i from 0 below depth)
239    format(stream, "</li>\n</%s>\n", tag);
240  end;
241//  write(stream, "</p>\n");
242  list-end | markup.size
243end method generate-list;
244
245define method parse-horizontal-line
246    (out :: <stream>, markup :: <string>, start :: <integer>)
247 => (end-pos :: false-or(<integer>))
248  let non-hyphen = find(markup, method (c) c ~== '-' end, start: start) | markup.size;
249  if (non-hyphen - start >= 4)
250    write(out, "<hr/>\n");
251    non-hyphen
252  end
253end method parse-horizontal-line;
254
255define method parse-less-than
256    (out :: <stream>, markup :: <string>, start :: <integer>)
257 => (end-pos :: false-or(<integer>))
258  // don't search past newline...
259  let close = find(markup, method (c) c == '>' | c == '\n' end, start: start);
260  if (close & markup[close] == '>')
261    let word = as-lowercase(copy-sequence(markup, start: start + 1, end: close));
262    select (word by \=)
263      "br", "br/"
264        => write(out, "<br/>");
265           close + 1;
266      "p", "p/"
267        => write(out, "<p/>");
268           close + 1;
269      // add more paired elements here...
270      "center", "center/"
271        => write(out, markup, start: start, end: close + 1);
272           close + 1;
273      "nowiki"
274        // TODO: allow nested nowiki elements.
275        => let epos = regex-position(compile-regex("</nowiki>"), markup,
276                                     case-sensitive: #f,
277                                     start: close)
278                        | markup.size;
279           write(out, markup, start: start + "<nowiki>".size, end: epos);
280           epos + "</nowiki>".size;
281      otherwise
282        => write(out, "&lt;");
283           start + 1;
284    end select
285  else
286    write(out, "&lt;");
287    start + 1
288  end
289end method parse-less-than;
290
291define method parse-ampersand
292    (out :: <stream>, markup :: <string>, start :: <integer>)
293 => (end-pos :: false-or(<integer>))
294  write(out, "&amp;");
295  start + 1
296end method parse-ampersand;
297
298/* TODO...
299define wiki-markup raw-url
300    regex: "(http|ftp|gopher|mailto|news|nntp|telnet|wais|file|prospero)://[^ \t\r\n)]+";
301    (stream, all, #rest ignore)
302  format(stream, "<a href=\"%s\">%s</a>", all, all);
303end;
304
305*/
306