monday-parser.dylan | searchcode

/dylan/monday-parser.dylan

http://github.com/cgay/wiki · Unknown · 271 lines · 215 code · 56 blank · 0 comment · 0 complexity · c298154362cbce66a403f4472ebd6589 MD5 · raw file

module: %wiki
author: Hannes Mehnert <hannes@mehnert.org>
synopsis: markup definition for dylan wiki

define function extract-action
    (token-string :: <byte-string>,
     token-start :: <integer>, 	 
     token-end :: <integer>) 	 
 => (result :: <byte-string>); 	 
  copy-sequence(token-string, start: token-start, end: token-end);
end;

define function count-chars
  (string :: <byte-string>,
   tstart :: <integer>,
   tend :: <integer>)
 => (res :: <integer>)
  tend - tstart
end;

define constant $base-url = "/wiki/view.dsp?title=";

define constant $wiki-tokens
  = simple-lexical-definition
      token EOF;

      inert "([ ])+";

      token LBRACKET = "\\[";
      token RBRACKET = "\\]";

      token EQUALS = "(=)+",
        semantic-value-function: count-chars;
      token TILDES = "(~)+",
        semantic-value-function: count-chars;
      token TICKS = "(')+",
        semantic-value-function: count-chars;

      token AMPERSAND = "&";

      token HASHMARK = "#";
      token STAR = "*";

      token FOUR-DASHES = "----", priority: 3;

      token PIPE = "\\|";

      token SMALLER = "<";
      token GREATER = ">";

      token CLIST = "(\n|\r|\r\n)(\\*|#)", priority: 3;
      token PREFORMATTED = "(\r|\n|\r\n) ", priority: 3;

      token NEWLINE = "(\n|\r|\r\n)";
      //todo: ignore spaces?!

      token TEXT = "[a-zA-Z_-0-9\\.]+",
        semantic-value-function: extract-action;

      token URL = "(http|ftp|https)://",
        semantic-value-function: extract-action;
end;

define constant $wiki-productions
  = simple-grammar-productions

     production description :: false-or(<string>) => [TEXT] (data)
       if (TEXT.size = 0) #f else TEXT end;

     production wiki-page-name :: <string> => [TEXT] (data)
       TEXT;

     production myurl :: <string> => [URL TEXT] (data)
       concatenate(URL, TEXT);

     production external-link :: xml/<element> => [LBRACKET myurl RBRACKET] (data)
       with-xml() a(myurl, href => myurl) end;

     production external-link :: xml/<element> => [LBRACKET myurl description RBRACKET] (data)
       with-xml() a(description, href => myurl) end;

     production internal-link :: xml/<element> => [LBRACKET LBRACKET wiki-page-name RBRACKET RBRACKET] (data)
       with-xml() a(wiki-page-name, href => concatenate($base-url, wiki-page-name)) end;

     production internal-link :: xml/<element> => [LBRACKET LBRACKET wiki-page-name PIPE description RBRACKET RBRACKET] (data)
       with-xml() a(description, href => concatenate($base-url, wiki-page-name)) end;

     production header :: xml/<element> => [EQUALS more-wiki-text EQUALS], action:
       method (p :: <simple-parser>, data, s, e)
         let heading = max(p[0], p[2]);
         unless (p[0] = p[2])
           format-out("Unbalanced number of '=' in header %s, left: %d right: %d, using %d\n",
                      p[1], p[0], p[2], heading);
         end;
         make(xml/<element>,
              name: concatenate("h", integer-to-string(heading)),
              children: p[1]);
       end;

     production unnumbered-list :: xml/<element> => [STAR list-elements] (data)
       format-out("Hit unnumbered-list %=\n", list-elements);
       make(xml/<element>, name: "ul", children: list-elements);

     production numbered-list :: xml/<element> => [HASHMARK list-elements] (data)
       make(xml/<element>, name: "ol", children: list-elements);

     production list-elements :: <collection> => [list-element more-list-elements] (data)
       format-out("Hit list-elements\n");
       add!(more-list-elements, list-element);

     production more-list-elements :: <collection> => [CLIST list-element more-list-elements] (data)
       format-out("Hit more-list-elements\n");
       add!(more-list-elements | #(), list-element);

     production more-list-elements :: <collection> => [] (data)
       format-out("Hit more-list-elements, empty\n");
       #();

     production list-element :: xml/<element> => [wiki-text] (data)
       format-out("Hit list-element %=\n", wiki-text);
       make(xml/<element>, name: "li", children: wiki-text);

     production simple-format :: xml/<xml> => [TICKS TEXT TICKS], action:
       method (p :: <simple-parser>, data, s, e)
         let ticks = max(p[0], p[2]);
         unless (p[0] = p[2])
           format-out("Unbalanced number of ' in TICKS %s, left: %d right: %d, using %d\n",
                      p[1], p[0], p[2], ticks);
         end;
         let str = list(make(xml/<char-string>, text: p[1]));
         if (ticks = 5)
           make(xml/<element>, name: "b", children: list(make(xml/<element>, name: "i", children: str)));
         else
           let ele-name = if (ticks = 2) "i" elseif (ticks = 3) "b" end;
           if (ele-name)
             make(xml/<element>, name: ele-name, children: str);
           else
             str[0]
           end;
         end;
       end;

     production wiki-text :: <collection> => [TEXT more-wiki-text] (data)
       add!(more-wiki-text, with-xml() text(TEXT) end);

     production wiki-text :: <collection> => [internal-link more-wiki-text] (data)
       add!(more-wiki-text, internal-link);

     production wiki-text :: <collection> => [external-link more-wiki-text] (data)
       add!(more-wiki-text, external-link);

     production wiki-text :: <collection> => [simple-format more-wiki-text] (data)
       add!(more-wiki-text, simple-format);

     production more-wiki-text :: <collection> => [wiki-text] (data)
       wiki-text;

     production more-wiki-text :: <collection> => [] (data)
       #();

     production horizontal-line :: xml/<element> => [FOUR-DASHES] (data)
       with-xml() hr end;

     production preformat :: xml/<element> => [PREFORMATTED TEXT more-preformat] (data)
        let pre-string = concatenate("\n ", TEXT, more-preformat);
        make(xml/<element>,
             name: "pre",
             children: list(make(xml/<char-string>, text: pre-string)));

     production more-preformat :: <string> => [TEXT more-preformat] (data)
       concatenate(" ", TEXT, more-preformat);

     production more-preformat :: <string> => [PREFORMATTED more-preformat] (data)
       concatenate("\n", more-preformat);

     production more-preformat :: <string> => [NEWLINE] (data)
       "\n";

     production line :: <collection> => [wiki-text] (data)
       wiki-text;

     production line :: <collection> => [header] (data)
       list(header);

     production line :: <collection> => [unnumbered-list] (data)
       list(unnumbered-list);

     production line :: <collection> => [numbered-list] (data)
       list(numbered-list);

     production line :: <collection> => [horizontal-line] (data)
       list(horizontal-line);

     production lines => [] (data)

     production lines => [preformat lines] (data)
       add!(data.my-real-data, preformat);

     production lines => [line NEWLINE NEWLINE lines] (data)
       add!(data.my-real-data, with-xml() p end);
       do(curry(add!, data.my-real-data), line);

     production lines => [line NEWLINE lines] (data)
       do(curry(add!, data.my-real-data), line);
end;

define constant $wiki-parser-automaton
  = simple-parser-automaton($wiki-tokens, $wiki-productions,
                            #[#"lines"]);

define function consume-token 	 
    (consumer-data,
     token-number :: <integer>,
     token-name :: <object>,
     semantic-value :: <object>,
     start-position :: <integer>,
     end-position :: <integer>)
 => ();
  //let srcloc
  //  = range-source-location(consumer-data, start-position, end-position);
  format-out("%d - %d: token %d: %= value %=\n",
             start-position,
             end-position,
             token-number,
             token-name,
             semantic-value);
  simple-parser-consume-token(consumer-data, token-number, token-name, semantic-value, start-position, end-position);
end function;

define sealed class <my-data> (<object>)
  slot my-real-data = make(<stretchy-vector>);
end;

define function parse-wiki-markup (input :: <string>)
  let rangemap = make(<source-location-rangemap>);
  rangemap-add-line(rangemap, 0, 1);
  if(input[0] = ' ')
    input := concatenate("\n", input);
  end;
  unless(input[input.size - 1] = '\n')
    input := add!(input, '\n')
  end;
  let scanner = make(<simple-lexical-scanner>,
                     definition: $wiki-tokens,
                     rangemap: rangemap);
  let data = make(<my-data>);
  let parser = make(<simple-parser>,
                    automaton: $wiki-parser-automaton,
                    start-symbol: #"lines",
                    rangemap: rangemap,
                    consumer-data: data);
  format-out("before scan-tokens, input: %s\n", input);
  scan-tokens(scanner,
              simple-parser-consume-token,
              //consume-token,
              parser,
              input,
              end: input.size,
              partial?: #f);
  let end-position = scanner.scanner-source-position;
  format-out("before consuming EOF at %d\n", end-position);
  simple-parser-consume-token(parser, 0, #"EOF", parser, end-position, end-position);
  format-out("data (%d) is %=\n", data.my-real-data.size, data.my-real-data);
  reduce1(concatenate, (map(curry(as, <string>), reverse(data.my-real-data))));
end;
/*
begin
  parse-wiki-markup(" one\n two\n three\n foo");
  parse-wiki-markup(" this is pre-text\n and another line");
end;
*/