/dylan/parser.dylan
Unknown | 306 lines | 283 code | 23 blank | 0 comment | 0 complexity | 3754e883b65c1b3e2930217fc835a80f MD5 | raw file
1Module: %wiki 2Synopsis: An ad-hoc parser for wiki markup 3Author: Carl Gay 4Copyright: This code is in the public domain. 5 6 7define variable *wiki-link-url* = wiki-url("/page/view/"); 8 9// This table maps the leading character of any markup that can occur 10// top-level (i.e., anywhere in the wiki page) to a function that 11// parses that kind of markup. Once we've dispatched to the correct 12// parser based on the first character, that parser might dispatch 13// further based on subsequent characters. 14define table $markup-top-level 15 = { '[' => parse-link, 16 // TODO: '/' => parse-comment, 17 '\n' => parse-newline, 18 '<' => parse-less-than, 19 '&' => parse-ampersand 20 }; 21 22// Markup that may occur after a newline (plus optional whitespace) has 23// just been seen. 24define table $markup-after-newline 25 = { '=' => parse-header, 26 '#' => parse-numbered-list, 27 '*' => parse-bulleted-list, 28 '|' => parse-table, 29 '-' => parse-horizontal-line, 30 '\n' => parse-newline-newline 31 }; 32 33define method wiki-markup-to-html 34 (markup :: <string>, #key start :: <integer> = 0) 35 => (html :: <string>) 36 with-output-to-string (html-stream) 37 parse-markup(html-stream, markup, start, $markup-top-level); 38 end; 39end; 40 41define method parse-markup 42 (out :: <stream>, markup :: <string>, start :: <integer>, parser-table :: <table>) 43 let leading-chars = key-sequence(parser-table); 44 iterate loop (start :: <integer> = start) 45 // find first occurrance of a "markup leading character"... 46 if (start < markup.size) 47 let markup-index = find(markup, rcurry(member?, leading-chars), start: start); 48 //log-debug("start = %s, markup-index = %s", start, markup-index); 49 if (~markup-index) 50 write(out, markup, start: start); 51 else 52 write(out, markup, start: start, end: markup-index); 53 let dispatch-char = markup[markup-index]; 54 let parser = parser-table[dispatch-char]; 55 let end-pos = parser(out, markup, markup-index); 56 if (end-pos) 57 // successful parse 58 loop(end-pos); 59 else 60 // unsuccessful parse 61 write-element(out, dispatch-char); 62 loop(markup-index + 1); 63 end; 64 end if; 65 end if; 66 end iterate; 67end method parse-markup; 68 69define method parse-table 70 (out :: <stream>, markup :: <string>, start :: <integer>) 71 => (end-pos :: false-or(<integer>)) 72 // TODO 73end; 74 75// The parser has just encountered a newline in the markup... 76define method parse-newline 77 (out :: <stream>, markup :: <string>, start :: <integer>) 78 => (end-pos :: false-or(<integer>)) 79 let index = find(markup, method (c) ~member?(c, " \t\r") end, start: start + 1); 80 if (index) 81 let parser = element($markup-after-newline, markup[index], default: #f); 82 if (parser) 83 parser(out, markup, index) 84 elseif (start + 1 < markup.size & markup[start + 1] == ' ') 85 // lines preceded by space are preformatted... 86 // Find next line with no leading whitespace... 87 let (epos, #rest xs) = regex-position(compile-regex("\n\\S"), markup, start: start + 1) | markup.size; 88 write(out, "<pre>"); 89 //XXX more speed 90 let raw-text = copy-sequence(markup, start: start, end: epos); 91 write(out, escape-xml(raw-text)); 92 write(out, "</pre>"); 93 epos 94 end 95 end // note returning #f or end-pos of parser 96end method parse-newline; 97 98define method parse-newline-newline 99 (out :: <stream>, markup :: <string>, start :: <integer>) 100 => (end-pos :: false-or(<integer>)) 101 write(out, "<p/>\n"); 102 // Note that this leaves the SECOND newline in the input stream. 103 // This is kind of a kludge to make sure that if the following 104 // markup must be preceded by a newline it'll still parse correctly. 105 // Otherwise we'd return start + 1 here. 106 start 107end method parse-newline-newline; 108 109define method find 110 (text :: <string>, fun :: <function>, 111 #key start :: <integer> = 0, end: epos) 112 => (pos :: false-or(<integer>)) 113 block (return) 114 for (i :: <integer> from start below epos | text.size) 115 if (fun(text[i])) 116 return(i); 117 end; 118 end; 119 end; 120end method find; 121 122define method find 123 (text :: <string>, char :: <character>, 124 #key start :: <integer> = 0, end: epos = #f) 125 => (pos :: false-or(<integer>)) 126 find(text, curry(\==, char), start: start, end: epos) 127end; 128 129// Parse == heading == markup 130define method parse-header 131 (out :: <stream>, markup :: <string>, start :: <integer>) 132 => (end-pos :: false-or(<integer>)) 133 let newline = find(markup, '\n', start: start) | markup.size; 134 // let (#rest idxs) = regex-position(compile-regex("(==+)([^=\n]+)(==+)\\s*(\n|$)"), markup, 135 let regex = compile-regex("(==+)([^=\n]+)(==+|$)"); 136 let (#rest idxs) = regex-position(regex, markup, 137 start: start, end: newline); 138 if (idxs.size > 1) 139 let tag = copy-sequence(markup, start: idxs[2], end: idxs[3]); 140 let header = copy-sequence(markup, start: idxs[4], end: idxs[5]); 141 format(out, "<h%d>%s</h%d>\n", tag.size, header, tag.size); 142 idxs[7] 143 end 144end method parse-header; 145 146// Parse [[wiki-title]] or [url|label] markup. 147// @param start points to the initial '[' char. 148define method parse-link 149 (out :: <stream>, markup :: <string>, start :: <integer>) 150 => (end-pos :: false-or(<integer>)) 151 // links can't span multiple lines 152 let close = find(markup, method (x) x == ']' | x == '\n' end, start: start); 153 //format(out, "start: %d, close: %d\n", start, close); 154 if (close) 155 if(markup[close] == ']') 156 close := make-link(out, markup, start, close); 157 elseif(markup[close] == '\n') 158// note-form-message("The link %s is invalid wiki markup.", 159// copy-sequence(markup, start: start, end: close)); 160 close := make-link(out, markup, start, close); 161 close := close - 1; 162 end if; 163 close; 164 end if; 165end method parse-link; 166 167define method make-link 168 (out :: <stream>, markup :: <string>, start :: <integer>, close :: <integer>) 169 if (close) 170 let wiki-link? = (markup[start + 1] == '['); 171 if (wiki-link?) 172 if (markup[close] == ']') 173 close := close + 1; 174 end; 175 let title = copy-sequence(markup, start: start + 2, end: close - 1); 176 format(out, "<a href=\"%s%s\">%s%s</a>", 177 *wiki-link-url*, 178 title, 179 if (find-or-load-page(title)) "" else "[?]" end, 180 title); 181 else 182 let bar = find(markup, '|', start: start, end: close); 183 let url = copy-sequence(markup, start: start + 1, end: bar | close); 184 let label = bar & copy-sequence(markup, start: bar + 1, end: close); 185 format(out, "<a href=\"%s\">%s</a>", url, label | url); 186 end if; 187 end if; 188 close + 1; 189end method make-link; 190 191define method parse-bulleted-list 192 (out :: <stream>, markup :: <string>, start :: <integer>) 193 => (end-pos :: false-or(<integer>)) 194 generate-list(out, markup, start, '*', "ul") 195end; 196 197define method parse-numbered-list 198 (out :: <stream>, markup :: <string>, start :: <integer>) 199 => (end-pos :: false-or(<integer>)) 200 generate-list(out, markup, start, '#', "ol") 201end; 202 203define method generate-list 204 (stream, markup, start, bullet-char, tag) 205 => (end-pos :: false-or(<integer>)) 206 let regex1 = compile-regex(format-to-string("\n\\s*[^%s]", bullet-char)); 207 let (list-end, #rest xs) = regex-position(regex1, markup, start: start); 208 let lines = split(copy-sequence(markup, 209 start: start, 210 end: list-end | markup.size), 211 "\n", trim?: #t); 212// write(stream, "<p>\n"); 213 let depth :: <integer> = 0; 214 let regex2 = compile-regex(format-to-string("^\\s*([%s]+)", bullet-char)); 215 for (line in lines) 216 let (#rest indexes) = regex-position(regex2, line); 217 if (indexes.size > 1) 218 let bullet-start = indexes[2]; 219 let bullet-end = indexes[3]; 220 let num-bullets = bullet-end - bullet-start; 221 let item-html = wiki-markup-to-html(line, start: bullet-end); 222 item-html := copy-sequence(item-html, start: 0, end: max(item-html.size - 1, 0)); 223 case 224 depth = 0 => 225 format(stream, "<%s>\n<li>%s", tag, item-html); 226 inc!(depth); 227 num-bullets < depth => 228 format(stream, "</li>\n</%s></li>\n<li>%s", tag, item-html); 229 dec!(depth); 230 num-bullets = depth => 231 format(stream, "</li>\n<li>%s", item-html); 232 num-bullets > depth => 233 format(stream, "\n<%s>\n<li>%s", tag, item-html); 234 inc!(depth); 235 end case; 236 end if; 237 end for; 238 for (i from 0 below depth) 239 format(stream, "</li>\n</%s>\n", tag); 240 end; 241// write(stream, "</p>\n"); 242 list-end | markup.size 243end method generate-list; 244 245define method parse-horizontal-line 246 (out :: <stream>, markup :: <string>, start :: <integer>) 247 => (end-pos :: false-or(<integer>)) 248 let non-hyphen = find(markup, method (c) c ~== '-' end, start: start) | markup.size; 249 if (non-hyphen - start >= 4) 250 write(out, "<hr/>\n"); 251 non-hyphen 252 end 253end method parse-horizontal-line; 254 255define method parse-less-than 256 (out :: <stream>, markup :: <string>, start :: <integer>) 257 => (end-pos :: false-or(<integer>)) 258 // don't search past newline... 259 let close = find(markup, method (c) c == '>' | c == '\n' end, start: start); 260 if (close & markup[close] == '>') 261 let word = as-lowercase(copy-sequence(markup, start: start + 1, end: close)); 262 select (word by \=) 263 "br", "br/" 264 => write(out, "<br/>"); 265 close + 1; 266 "p", "p/" 267 => write(out, "<p/>"); 268 close + 1; 269 // add more paired elements here... 270 "center", "center/" 271 => write(out, markup, start: start, end: close + 1); 272 close + 1; 273 "nowiki" 274 // TODO: allow nested nowiki elements. 275 => let epos = regex-position(compile-regex("</nowiki>"), markup, 276 case-sensitive: #f, 277 start: close) 278 | markup.size; 279 write(out, markup, start: start + "<nowiki>".size, end: epos); 280 epos + "</nowiki>".size; 281 otherwise 282 => write(out, "<"); 283 start + 1; 284 end select 285 else 286 write(out, "<"); 287 start + 1 288 end 289end method parse-less-than; 290 291define method parse-ampersand 292 (out :: <stream>, markup :: <string>, start :: <integer>) 293 => (end-pos :: false-or(<integer>)) 294 write(out, "&"); 295 start + 1 296end method parse-ampersand; 297 298/* TODO... 299define wiki-markup raw-url 300 regex: "(http|ftp|gopher|mailto|news|nntp|telnet|wais|file|prospero)://[^ \t\r\n)]+"; 301 (stream, all, #rest ignore) 302 format(stream, "<a href=\"%s\">%s</a>", all, all); 303end; 304 305*/ 306