PageRenderTime 191ms CodeModel.GetById 41ms app.highlight 105ms RepoModel.GetById 35ms app.codeStats 1ms

/parsec.ss

http://github.com/yinwang0/ydiff
Scheme | 1046 lines | 645 code | 269 blank | 132 comment | 1 complexity | 1bdd78ec6e37658f186985ab25e059d3 MD5 | raw file
   1;; ydiff - a language-aware tool for comparing programs
   2;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com)
   3
   4;; This program is free software: you can redistribute it and/or modify
   5;; it under the terms of the GNU General Public License as published by
   6;; the Free Software Foundation, either version 3 of the License, or
   7;; (at your option) any later version.
   8
   9;; This program is distributed in the hope that it will be useful,
  10;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12;; GNU General Public License for more details.
  13
  14;; You should have received a copy of the GNU General Public License
  15;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17
  18
  19(load "utils.ss")
  20
  21(define *left-recur-detection* #f)
  22
  23
  24
  25
  26;-------------------------------------------------------------
  27;                 parser combinator library
  28;-------------------------------------------------------------
  29
  30;; s-expression settings
  31;; please override for other languages.
  32(define *delims* (list "("  ")"  "["  "]"  "{"  "}" "'"  "`"  ","))
  33(define *line-comment* (list ";"))
  34(define *comment-start* "")
  35(define *comment-end* "")
  36(define *operators*  '())
  37(define *quotation-marks* '(#\" #\'))
  38(define *lisp-char* (list "#\\" "?\\"))
  39(define *significant-whitespaces* '())
  40
  41
  42
  43
  44;-------------------------------------------------------------
  45;                       data types
  46;-------------------------------------------------------------
  47(struct Node    (start end)        #:transparent)
  48(struct Expr     Node (type elts)  #:transparent)
  49(struct Token    Node (text)       #:transparent)
  50(struct Comment  Node (text)       #:transparent)
  51(struct Str      Node (text)       #:transparent)
  52(struct Char     Node (text)       #:transparent)
  53(struct Newline  Node ()           #:transparent)
  54(struct Phantom  Node ()           #:transparent)
  55
  56
  57(define node-type
  58  (lambda (node)
  59    (and (Expr? node) (Expr-type node))))
  60
  61
  62(define get-start
  63  (lambda (node)
  64    (Node-start node)))
  65
  66
  67(define get-end
  68  (lambda (node)
  69    (Node-end node)))
  70
  71
  72(define get-symbol
  73  (lambda (node)
  74    (cond
  75     [(Token? node)
  76      (string->symbol (Token-text node))]
  77     [else #f])))
  78
  79
  80(define get-tag
  81  (lambda (e tag)
  82    (let ([matches (filter (lambda (x)
  83                             (and (Expr? x)
  84                                  (eq? (Expr-type x) tag)))
  85                           (Expr-elts e))])
  86      (cond
  87       [(null? matches) #f]
  88       [else (car matches)]))))
  89
  90
  91(define match-tags
  92  (lambda (e tags)
  93    (cond
  94     [(not (Expr? e)) #f]
  95     [(null? tags) e]
  96     [else
  97      (match-tags (get-tag e (car tags)) (cdr tags))])))
  98
  99
 100
 101
 102;-------------------------------------------------------------
 103;                          scanner
 104;-------------------------------------------------------------
 105
 106(define whitespace?  char-whitespace?)
 107(define alpha?       char-alphabetic?)
 108(define digit?       char-numeric?)
 109
 110
 111; Is char c a delimeter?
 112(define delim?
 113  (lambda (c)
 114    (member (char->string c) *delims*)))
 115
 116
 117(define id?
 118  (lambda (s)
 119    (cond
 120     [(= 0 (string-length s)) #f]
 121     [(or (alpha? (string-ref s 0))
 122          (eq? #\_ (string-ref s 0)))
 123      (let loop ([i 1])
 124        (cond
 125         [(>= i (string-length s)) #t]
 126         [else
 127          (let ([c (string-ref s i)])
 128            (cond
 129             [(alpha? c) (loop (add1 i))]
 130             [(digit? c) (loop (add1 i))]
 131             [(char=? c #\_) (loop (add1 i))]
 132             [else #f]))]))]
 133     [else #f])))
 134
 135
 136(define numeral?
 137  (lambda (s)
 138    (cond
 139     [(= 0 (string-length s)) #f]
 140     [(digit? (string-ref s 0)) #t
 141      ;; (let loop ([i 1])
 142      ;;   (cond
 143      ;;    [(>= i (string-length s)) #t]
 144      ;;    [else
 145      ;;     (let ([c (string-ref s i)])
 146      ;;         (cond
 147      ;;          [(digit? c) (loop (add1 i))]
 148      ;;          [(char=? c #\.) (loop (add1 i))]
 149      ;;          [else #f]))]))
 150]
 151     [else #f])))
 152
 153
 154
 155(define start-with
 156  (lambda (s start prefix)
 157    (let* ([prefix-str (if (char? prefix)
 158                           (char->string prefix)
 159                           prefix)]
 160           [len (string-length prefix-str)])
 161      (cond
 162       [(= len 0) #f]
 163       [(< (string-length s) (+ start len)) #f]
 164       [(string=? (substring s start (+ start len)) prefix-str)
 165        prefix]
 166       [else #f]))))
 167
 168
 169
 170(define start-with-one-of
 171  (lambda (s start prefixes)
 172    (cond
 173     [(null? prefixes) #f]
 174     [(start-with s start (car prefixes))
 175      (car prefixes)]
 176     [else
 177      (start-with-one-of s start (cdr prefixes))])))
 178
 179; (start-with-one-of "+>>=" 0 (list ">" #\+))
 180
 181
 182
 183(define find-next
 184  (lambda (s start pred)
 185    (cond
 186     [(<= (string-length s) start) #f]
 187     [(pred s start) start]
 188     [else
 189      (find-next s (add1 start) pred)])))
 190
 191
 192
 193; Find the first delim that match the start of s
 194(define find-delim
 195  (lambda (s start)
 196    (start-with-one-of s start *delims*)))
 197
 198
 199
 200(define find-operator
 201  (lambda (s start)
 202    (start-with-one-of s start *operators*)))
 203
 204; (find-operator ">> x" 0)
 205
 206
 207
 208(define scan
 209  (lambda (s)
 210    (define scan1
 211      (lambda (s start)
 212        (cond
 213         [(= start (string-length s)) (values 'eof start)]
 214
 215         [(start-with-one-of s start *significant-whitespaces*)
 216          (values (Newline start (add1 start)) (add1 start))]
 217
 218         [(whitespace? (string-ref s start))
 219          (scan1 s (add1 start))]
 220
 221         [(start-with-one-of s start *line-comment*) ; line comment
 222          (let ([line-end (find-next s start
 223                                     (lambda (s start)
 224                                       (eq? (string-ref s start) #\newline)))])
 225            (values (Comment start (add1 line-end) (substring s start line-end))
 226                    line-end))]
 227
 228         [(start-with s start *comment-start*) ; block comment
 229          (let* ([line-end (find-next s start
 230                                      (lambda (s start)
 231                                        (start-with s start *comment-end*)))]
 232                 [end (+ line-end (string-length *comment-end*))])
 233            (values (Comment start end (substring s start end)) end))]
 234
 235         [(find-delim s start) =>
 236          (lambda (delim)
 237            (let ([end (+ start (string-length delim))])
 238              (values (Token start end delim) end)))]
 239
 240         [(find-operator s start) =>
 241          (lambda (op)
 242            (let ([end (+ start (string-length op))])
 243              (values (Token start end op) end)))]
 244
 245         [(start-with-one-of s start *quotation-marks*)   ; string
 246          (let ([reg-match (or (regexp-match (regexp "^\"(\\\\.|[^\"])*\"")
 247                                             s start)
 248                               (regexp-match (regexp "^\'(\\\\.|[^\'])*\'")
 249                                             s start))])
 250            (cond
 251             [(not reg-match)
 252              (fatal 'scan "string match error")]
 253             [else
 254              (let* ([len (string-length (car reg-match))]
 255                     [end (+ start len)])
 256                (values (Str start end (car reg-match)) end))]))]
 257
 258         ;; => (lambda (q) (scan-string s start q))
 259
 260         [(start-with-one-of s start *lisp-char*) ; scheme/elisp char
 261          (cond
 262           [(<= (string-length s) (+ 2 start))
 263            (error 'scan-string "reached EOF while scanning char")]
 264           [else
 265            (let ([end
 266                   (let loop ([end (+ 3 start)])
 267                     (cond
 268                      [(or (whitespace? (string-ref s end))
 269                           (delim? (string-ref s end)))
 270                       end]
 271                      [else (loop (add1 end))]))])
 272              (values (Char start end (string-ref s (sub1 end))) end))])]
 273
 274         [else                        ; identifier or number
 275          (let loop ([pos start] [chars '()])
 276            (cond
 277             [(or (<= (string-length s) pos)
 278                  (whitespace? (string-ref s pos))
 279                  (find-delim s pos)
 280                  (find-operator s pos))
 281              (let ([text (list->string (reverse chars))])
 282                (values (Token start pos text) pos))]
 283             [else
 284              (loop (add1 pos) (cons (string-ref s pos) chars))]))])))
 285
 286    (let loop ([start 0] [toks '()])
 287      (letv ([(tok newstart) (scan1 s start)])
 288        (cond
 289         [(eq? tok 'eof)
 290          (reverse toks)]
 291         [else
 292          (loop newstart (cons tok toks))])))))
 293
 294
 295
 296
 297
 298
 299;-------------------------------------------------------------
 300;                           parser
 301;-------------------------------------------------------------
 302
 303(define onstack?
 304  (lambda (u v stk)
 305    (let loop ([stk stk] [trace '()])
 306      (cond
 307       [(null? stk) #f]
 308       [(and (eq? u (car (car stk)))
 309             (eq? v (cdr (car stk))))
 310        (reverse (cons (car stk) trace))]
 311       [else
 312        (loop (cdr stk) (cons (car stk) trace))]))))
 313
 314
 315
 316(define ext
 317  (lambda (u v stk)
 318    (cond
 319     [(not *left-recur-detection*) stk]
 320     [else
 321      (cons (cons u v) stk)])))
 322
 323
 324
 325(define stack->string
 326  (lambda (stk)
 327    (let ([ps (map
 328               (lambda (x) (format "~a" (car x)))
 329               stk)])
 330      (string-join ps "\n"))))
 331
 332; (display (stack->string (onstack? 'x 'y '((u . v) (x . y) (w . t)))))
 333
 334
 335
 336;; apply parser on toks, check for left-recurson if
 337;; *left-recur-detection* is enabled.
 338(define apply-check
 339  (lambda (parser toks stk ctx)
 340    (cond
 341     [(and *left-recur-detection*
 342           (onstack? parser toks stk))
 343      => (lambda (t)
 344           (fatal 'apply-check
 345                  "left-recursion detected     \n"
 346                  "parser: " parser           "\n"
 347                  "start token: " (car toks)  "\n"
 348                  "stack trace: " (stack->string t)))]
 349     [else
 350      ((parser) toks (ext parser toks stk) ctx)])))
 351
 352
 353
 354
 355;------------------ parser combinators --------------------
 356(define @seq
 357  (lambda ps
 358    (lambda ()
 359      (lambda (toks stk ctx)
 360        (let loop ([ps ps] [toks toks] [nodes '()])
 361          (cond
 362           [(null? ps)
 363            (values (apply append (reverse nodes)) toks)]
 364           [else
 365            (letv ([(t r) (apply-check (car ps) toks stk ctx)])
 366              (cond
 367               [(not t)
 368                (values #f #f)]
 369               [else
 370                (loop (cdr ps) r (cons t nodes))]))]))))))
 371
 372
 373
 374;; removes phantoms
 375(define @...
 376  (lambda ps
 377    (let ([parser ((apply @seq ps))])
 378      (lambda ()
 379        (lambda (toks stk ctx)
 380          (letv ([(t r) (parser toks stk ctx)])
 381            (cond
 382             [(not t) (values #f #f)]
 383             [else
 384              (values (filter (negate Phantom?) t) r)])))))))
 385
 386
 387; (((@seq)) (scan "ok"))
 388
 389
 390
 391(define @or
 392  (lambda ps
 393    (lambda ()
 394      (lambda (toks stk ctx)
 395        (let loop ([ps ps])
 396             (cond
 397              [(null? ps)
 398               (values #f #f)]
 399              [else
 400               (letv ([(t r) (apply-check (car ps) toks stk ctx)])
 401                 (cond
 402                  [(not t)
 403                   (loop (cdr ps))]
 404                  [else
 405                   (values t r)]))]))))))
 406
 407
 408; (((@or ($$ "foo") ($$ "bar"))) (scan "bar foo"))
 409
 410
 411
 412(define @=
 413  (lambda (type . ps)
 414    (let ([parser ((apply @seq ps))])
 415      (lambda ()
 416        (lambda (toks stk ctx)
 417          (letv ([(t r) (parser toks stk ctx)])
 418            (cond
 419             [(not t) (values #f #f)]
 420             [(not type)
 421              (values (filter (negate Phantom?) t) r)]
 422             [(null? t)
 423              (values (list (Expr (get-start (car toks))
 424                                  (get-start (car toks))
 425                                  type '()))
 426                      r)]
 427             [else
 428              (values (list (Expr (get-start (car t))
 429                                  (get-end (last t))
 430                                  type
 431                                  (filter (negate Phantom?) t)))
 432                      r)])))))))
 433
 434
 435
 436(define @*
 437  (lambda ps
 438    (let ([parser ((apply @... ps))])
 439      (lambda ()
 440        (lambda (toks stk ctx)
 441          (let loop ([toks toks] [nodes '()])
 442            (cond
 443             [(null? toks)
 444              (values (apply append (reverse nodes)) '())]
 445             [else
 446              (letv ([(t r) (parser toks stk ctx)])
 447                (cond
 448                 [(not t)
 449                  (values (apply append (reverse nodes)) toks)]
 450                 [else
 451                  (loop r (cons t nodes))]))])))))))
 452
 453
 454; ($eval (@* ($$ "ok")) (scan "ok ok ok"))
 455
 456
 457;; similar to @*, but takes only one parser and will not
 458;; make a sequence by invoking @seq
 459(define @*^
 460  (lambda (p)
 461    (lambda ()
 462      (lambda (toks stk ctx)
 463        (let loop ([toks toks] [nodes '()])
 464          (cond
 465           [(null? toks)
 466            (values (apply append (reverse nodes)) '())]
 467           [else
 468            (letv ([(t r) ((p) toks stk ctx)])
 469              (cond
 470               [(not t)
 471                (values (apply append (reverse nodes)) toks)]
 472               [else
 473                (loop r (cons t nodes))]))]))))))
 474
 475
 476(define @+
 477  (lambda (p)
 478    (@... p (@* p))))
 479
 480; (((@+ ($$ "ok"))) (scan "ok ok ok"))
 481
 482
 483(define @?
 484  (lambda ps
 485    (@or (apply @... ps) $none)))
 486
 487
 488; (((@? ($$ "x"))) (scan "x y z"))
 489
 490
 491;; negation - will fail if ps parses successfully.
 492(define @!
 493  (lambda ps
 494    (let ([parser ((apply @... ps))])
 495      (lambda ()
 496        (lambda (toks stk ctx)
 497          (letv ([(t r) (parser toks stk ctx)])
 498            (cond
 499             [(not t) (values (list (car toks)) (cdr toks))]
 500             [else (values #f #f)])))))))
 501
 502
 503;; similar to @!, but takes only one parser and will not
 504;; make a sequence by invoking @seq
 505(define @!^
 506  (lambda (p)
 507    (lambda ()
 508      (lambda (toks stk ctx)
 509        (letv ([(t r) ((p) toks stk ctx)])
 510          (cond
 511           [(not t) (values (list (car toks)) (cdr toks))]
 512           [else (values #f #f)]))))))
 513
 514
 515
 516
 517(define @and
 518  (lambda ps
 519    (lambda ()
 520      (lambda (toks stk ctx)
 521        (let loop ([ps ps] [res '()])
 522          (cond
 523           [(null? ps)
 524            (let ([r1 (car res)])
 525                (values (car r1) (cadr r1)))]
 526           [else
 527            (letv ([(t r) (apply-check (car ps) toks stk ctx)])
 528              (cond
 529               [(not t)
 530                (values #f #f)]
 531               [else
 532                (loop (cdr ps) (cons (list t r) res))]))]))))))
 533
 534
 535
 536; (((@and (@or ($$ "[") ($$ "{")) (@! ($$ "{")))) (scan "["))
 537
 538
 539
 540;; parses the parsers ps normally, but "globs" the parses and doesn't
 541;; put them into the output.
 542(define $glob
 543  (lambda ps
 544    (let ([parser ((apply @... ps))])
 545      (lambda ()
 546        (lambda (toks stk ctx)
 547          (letv ([(t r) (parser toks stk ctx)])
 548            (cond
 549             [(not t) (values #f #f)]
 550             [else
 551              (values '() r)])))))))
 552
 553; (($glob ($$ "foo")) (scan "foo bar"))
 554
 555
 556
 557;; similar to $glob, but takes only one parser and will not
 558;; make a sequence by invoking @seq
 559(define $glob^
 560  (lambda (p)
 561    (lambda ()
 562      (lambda (toks stk ctx)
 563        (letv ([(t r) ((p) toks stk ctx)])
 564          (cond
 565           [(not t) (values #f #f)]
 566           [else
 567            (values '() r)]))))))
 568
 569
 570
 571;; A phantom is something that takes space but invisible. It is useful
 572;; for something whose position is important, but is meaningless to
 573;; show up in the AST. It is used mostly for delimeters. $phantom is
 574;; seldom used directly. The helper @~ creates a phantom from strings.
 575(define $phantom
 576  (lambda ps
 577    (let ([parser ((apply @... ps))])
 578      (lambda ()
 579        (lambda (toks stk ctx)
 580          (letv ([(t r) (parser toks stk ctx)])
 581            (cond
 582             [(not t) (values #f #f)]
 583             [else
 584              (cond
 585               [(null? t)
 586                (values '() r)]
 587               [else
 588                (values (list (Phantom (get-start (car t))
 589                                       (get-end (last t))))
 590                        r)])])))))))
 591
 592
 593
 594
 595;------------------------ parsers ---------------------------
 596
 597(define $fail
 598  (lambda ()
 599    (lambda (toks stk ctx)
 600      (values #f #f))))
 601
 602
 603(define $none
 604  (lambda ()
 605    (lambda (toks stk ctx)
 606      (values '() toks))))
 607
 608
 609;; succeeds if the predicate 'proc' returns true for the first token.
 610(define $pred
 611  (lambda (proc)
 612    (lambda ()
 613      (lambda (toks stk ctx)
 614        (cond
 615         [(null? toks) (values #f #f)]
 616         [(proc (car toks))
 617          (values (list (car toks)) (cdr toks))]
 618         [else
 619          (values #f #f)])))))
 620
 621
 622(define $eof
 623  ($glob ($pred (lambda (t) (eq? t 'eof)))))
 624
 625
 626;; literal parser for tokens. for example ($$ "for")
 627(define $$
 628  (lambda (s)
 629    ($pred
 630     (lambda (x)
 631       (and (Token? x) (string=? (Token-text x) s))))))
 632
 633
 634(define @_
 635  (lambda (s)
 636    ($glob ($$ s))))
 637
 638
 639(define @~
 640  (lambda (s)
 641    ($phantom ($$ s))))
 642
 643
 644(define join
 645  (lambda (ps sep)
 646    (cond
 647     [(null? (cdr ps)) ps]
 648     [else
 649      (cons (car ps) (cons sep (join (cdr ps) sep)))])))
 650
 651
 652;; a list of parser p separated by sep
 653(define @.@
 654  (lambda (p sep)
 655    (@... p (@* (@... sep p)))))
 656
 657
 658
 659;; ($eval (@.@ ($$ "foo") ($$ ","))
 660;;        (scan "foo, foo, foo"))
 661
 662
 663
 664
 665
 666;-------------------------------------------------------------
 667;                  expression parser combinators
 668;-------------------------------------------------------------
 669
 670;; helper for constructing left-associative infix expression
 671(define constr-exp-l
 672  (lambda (type fields)
 673    (let loop ([fields (cdr fields)] [ret (car fields)])
 674         (cond
 675          [(null? fields) ret]
 676          [else
 677           (let ([e (Expr (get-start ret)
 678                          (get-end (cadr fields))
 679                          type (list ret (car fields) (cadr fields)))])
 680             (loop (cddr fields) e))]))))
 681
 682
 683;; helper for constructing right-associative infix expression
 684(define constr-exp-r
 685  (lambda (type fields)
 686    (let ([fields (reverse fields)])
 687      (let loop ([fields (cdr fields)] [ret (car fields)])
 688           (cond
 689            [(null? fields) ret]
 690            [else
 691             (let ([e (Expr (get-start (cadr fields))
 692                            (get-end ret)
 693                            type (list (cadr fields) (car fields) ret))])
 694               (loop (cddr fields) e))])))))
 695
 696
 697
 698;; helper for creating infix operator parser. used by @infix-left and
 699;; @infix-right
 700(define @infix
 701  (lambda (type p op associativity)
 702    (lambda ()
 703      (lambda (toks stk ctx)
 704        (let loop ([rest toks] [ret '()])
 705             (letv ([(tp rp) (((@seq p)) rest stk ctx)])
 706               (cond
 707                [(not tp)
 708                 (cond
 709                  [(< (length ret) 3)
 710                   (values #f #f)]
 711                  [else
 712                   (let ([fields (reverse (cdr ret))]
 713                         [constr (if (eq? associativity 'left)
 714                                     constr-exp-l
 715                                     constr-exp-r)])
 716                     (values (list (constr type fields))
 717                             (cons (car ret) rest)))])]
 718                [else
 719                 (letv ([(top rop) (((@seq op)) rp stk ctx)])
 720                   (cond
 721                    [(not top)
 722                     (cond
 723                      [(< (length ret) 2)
 724                       (values #f #f)]
 725                      [else
 726                       (let ([fields (reverse (append tp ret))]
 727                             [constr (if (eq? associativity 'left)
 728                                         constr-exp-l
 729                                         constr-exp-r)])
 730                         (values (list (constr type fields))
 731                                 rp))])]
 732                    [else
 733                     (loop rop (append (append top tp) ret))]))])))))))
 734
 735
 736(define @infix-left
 737  (lambda (type p op)
 738    (@infix type p op 'left)))
 739
 740
 741(define @infix-right
 742  (lambda (type p op)
 743    (@infix type p op 'right)))
 744
 745
 746
 747;; ($eval (@infix-right 'binop $multiplicative-expression $additive-operator)
 748;;        (scan "x + y + z"))
 749
 750
 751
 752
 753(define @postfix
 754  (lambda (type p op)
 755    (lambda ()
 756      (lambda (toks stk ctx)
 757        (letv ([(t r) (((@... p (@+ op))) toks stk ctx)])
 758          (cond
 759           [(not t)
 760            (values #f #f)]
 761           [else
 762            (values (list (make-postfix type t)) r)]))))))
 763
 764
 765;; ($eval (@postfix 'ok ($$ "foo") (@= 'bar ($$ "bar")) 'ok)
 766;;        (scan "foo bar bar"))
 767
 768
 769(define make-postfix
 770  (lambda (type ls)
 771    (let loop ([ls (cdr ls)] [ret (car ls)])
 772         (cond
 773          [(null? ls) ret]
 774          [else
 775           (let ([e (Expr (get-start ret)
 776                          (get-end (car ls))
 777                          type
 778                          (list ret (car ls)))])
 779             (loop (cdr ls) e))]))))
 780
 781
 782(define @prefix
 783  (lambda (type p op)
 784    (lambda ()
 785      (lambda (toks stk ctx)
 786        (letv ([(t r) (((@... (@+ op) p)) toks stk ctx)])
 787          (cond
 788           [(not t)
 789            (values #f #f)]
 790           [else
 791            (values (list (make-prefix type t)) r)]))))))
 792
 793
 794(define make-prefix
 795  (lambda (type ls)
 796    (cond
 797     [(null? (cdr ls)) (car ls)]
 798     [else
 799      (let ([tail (make-prefix type (cdr ls))])
 800        (Expr (get-start (car ls))
 801              (get-end tail)
 802              type
 803              (list (car ls) tail)))])))
 804
 805
 806;; ($eval (@prefix 'prefix $primary-expression $prefix-operator)
 807;;        (scan "-1"))
 808
 809
 810
 811;-------------------------------------------------------------
 812;                   syntactic extensions
 813;-------------------------------------------------------------
 814
 815(define *parse-hash* (make-hasheq))
 816
 817
 818;; define an unnamed parser
 819(define-syntax ::
 820  (syntax-rules ()
 821    [(_ name expr)
 822     (define name
 823       (lambda ()
 824         (lambda (toks stk ctx)
 825           (cond
 826            [(hash-get *parse-hash* name toks)
 827             => (lambda (p)
 828                  (values (car p) (cdr p)))]
 829            [else
 830             (letv ([(t r) ((expr) toks stk ctx)])
 831               (hash-put! *parse-hash* name toks (cons t r))
 832               (values t r))]))))]))
 833
 834
 835
 836;; define a named parser
 837(define-syntax ::=
 838  (syntax-rules ()
 839    [(_ name type expr ...)
 840     (define name
 841       (cond
 842        [(symbol? type)
 843         (lambda ()
 844           (lambda (toks stk ctx)
 845             (cond
 846              [(hash-get *parse-hash* name toks)
 847               => (lambda (p)
 848                    (values (car p) (cdr p)))]
 849              [else
 850               (letv ([parser (@= type expr ...)]
 851                      [(t r) ((parser) toks stk (cons 'name ctx))])
 852                 (hash-put! *parse-hash* name toks (cons t r))
 853                 (values t r))])))]
 854        [else
 855         (fatal '::= "type must be a symbol, but got: " type)]))]))
 856
 857
 858
 859
 860
 861;;---------------- context sensitive parsing ----------------
 862
 863;; succeed only in certain context
 864(define-syntax ::?
 865  (syntax-rules ()
 866    [(_ name effective-ctx expr)
 867     (define name
 868       (lambda ()
 869         (lambda (toks stk ctx)
 870           (cond
 871            [(not (memq 'effective-ctx ctx))
 872             (values #f #f)]
 873            [(hash-get *parse-hash* name toks)
 874             => (lambda (p)
 875                  (values (car p) (cdr p)))]
 876            [else
 877             (letv ([(t r) ((expr) toks stk (cons 'name ctx))])
 878               (hash-put! *parse-hash* name toks t r)
 879               (values t r))]))))]))
 880
 881
 882
 883;; succeed only in a context that is NOT avoid-ctx
 884(define-syntax ::!
 885  (syntax-rules ()
 886    [(_ name avoid-ctx expr)
 887     (define name
 888       (lambda ()
 889         (lambda (toks stk ctx)
 890           (cond
 891            [(memq 'avoid-ctx ctx)
 892             (values #f #f)]
 893            [(hash-get *parse-hash* name toks)
 894             => (lambda (p)
 895                  (values (car p) (cdr p)))]
 896            [else
 897             (letv ([(t r) ((expr) toks stk (cons 'name ctx))])
 898               (hash-put! *parse-hash* name toks t r)
 899               (values t r))]))))]))
 900
 901
 902;; EXAMPLES:
 903
 904;; (::= $foo
 905;;      (@= 'foo (@... $bar ($$ "foo"))))
 906
 907;; (::? $bar $baz
 908;;      ($$ "bar"))
 909
 910;; (::= $baz
 911;;      (@= 'baz (@... $bar ($$ "baz"))))
 912
 913
 914;; ($eval $bar (scan "bar foo"))
 915;; ($eval $foo (scan "bar foo"))
 916;; ($eval $baz (scan "bar baz"))           ; only this one succeeds
 917
 918
 919;; (::! $avoid-foo $foo
 920;;      (@= 'avoid-foo ($$ "avoid-foo")))
 921
 922;; (::= $foo
 923;;      (@= 'foo (@... $avoid-foo ($$ "foo"))))
 924
 925;; (::= $not-foo
 926;;      (@= 'not-foo (@... $avoid-foo ($$ "not-foo"))))
 927
 928
 929;; ($eval $foo (scan "avoid-foo foo"))     ; $avoid-foo fails only in foo
 930;; ($eval $not-foo (scan "avoid-foo not-foo"))
 931
 932
 933
 934;; execuate parser p on the input tokens
 935(define $eval
 936  (lambda (p toks)
 937    (set! *parse-hash* (make-hasheq))
 938    (letv ([(t r) ((p) toks '() '())])
 939      (set! *parse-hash* (make-hasheq))
 940      (values t r))))
 941
 942
 943(define parse1
 944  (lambda (p s)
 945    (letv ([(t r) ($eval p (filter (lambda (x) (not (Comment? x)))
 946                                   (scan s)))])
 947      t)))
 948
 949
 950
 951
 952
 953
 954;-------------------------------------------------------------
 955;                    testing facilities
 956;-------------------------------------------------------------
 957
 958(define test-string
 959  (lambda (s)
 960    (letv ([(t r) ($eval $program
 961                         (filter (lambda (x) (not (Comment? x)))
 962                                 (scan s)))])
 963      (cond
 964       [(null? r) #t]
 965       [(not r) #f]
 966       [else (car r)]))))
 967
 968
 969
 970
 971
 972(define test-file
 973  (lambda files
 974    (define test1
 975      (lambda (file)
 976        (printf "testing file: ~a ... " file)
 977        (let ([start (current-seconds)])
 978          (flush-output)
 979          (let ([res (test-string (read-file file))])
 980            (cond
 981             [(eq? #t res)
 982              (printf "succeed.~ntime used: ~a seconds~n"
 983                      (- (current-seconds) start))
 984              (flush-output)]
 985             [else
 986              (printf "failed at token: ~a~n" res)
 987              (flush-output)])))))
 988    (for-each test1 files)))
 989
 990
 991
 992
 993
 994;-------------------------- examples ---------------------------
 995
 996; a parser for s-expressions
 997
 998(:: $open
 999     (@or (@~ "(") (@~ "[")))
1000
1001(:: $close
1002     (@or (@~ ")") (@~ "]")))
1003
1004(:: $non-parens
1005     (@and (@! $open) (@! $close)))
1006
1007(::= $parens 'sexp
1008     (@seq $open (@* $sexp) $close))
1009
1010(:: $sexp
1011    (@+ (@or $parens $non-parens)))
1012
1013(:: $program $sexp)
1014
1015
1016(define parse-sexp
1017  (lambda (s)
1018    (first-val ($eval $program (scan s)))))
1019
1020
1021;; (parse-sexp "(lambda (x) x)")
1022;; (parse-sexp (read-file "paredit20.el"))
1023
1024
1025
1026
1027;;-------------- direct left recursion test ---------------
1028;;
1029;; (::= $left 'left
1030;;   (@or (@seq $left ($$ "ok"))
1031;;        ($$ "ok")))
1032
1033;; ($eval $left (scan "ok"))
1034
1035
1036;;---------- indirect left-recursion -------------
1037;;
1038;; (::= $left1 'left1
1039;;   (@seq $left2 ($$ "ok")))
1040
1041;; (::= $left2 'left2
1042;;   (@or (@seq $left1 ($$ "ok"))
1043;;        ($$ "ok")))
1044
1045;; ($eval $left1 (scan "ok ok"))
1046