PageRenderTime 262ms CodeModel.GetById 61ms app.highlight 117ms RepoModel.GetById 59ms app.codeStats 1ms

/parse-cpp.ss

http://github.com/yinwang0/ydiff
Scheme | 873 lines | 503 code | 277 blank | 93 comment | 0 complexity | 8b3fdadce0c129b47e460caa6744216b MD5 | raw file
  1;; ydiff - a language-aware tool for comparing programs
  2;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com)
  3
  4;; This program is free software: you can redistribute it and/or modify
  5;; it under the terms of the GNU General Public License as published by
  6;; the Free Software Foundation, either version 3 of the License, or
  7;; (at your option) any later version.
  8
  9;; This program is distributed in the hope that it will be useful,
 10;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 11;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12;; GNU General Public License for more details.
 13
 14;; You should have received a copy of the GNU General Public License
 15;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16
 17
 18(load "parsec.ss")
 19
 20
 21
 22;-------------------------------------------------------------
 23;                     scanner settings
 24;-------------------------------------------------------------
 25
 26(define *delims* (list "("  ")"  "["  "]"  "{"  "}" ","  "`"  ";" "#"))
 27
 28(define *operators*
 29  (list
 30   ">>>="
 31   "<<="  ">>="  ">>>"  "->*"  "..."
 32   "&&"  "||"  ">>"  "<<"  "++"  "--"
 33   "=="  "!="  ">="  "<="  "+="  "-="  "*="  "/="  "^="  "&="  "|="
 34   "->"  ".*"  "::"
 35   "="  "+"  "-"  "*"  "/"  "%"  "<"  ">"  "!"  ":"  "?"  "."
 36   "^"  "|"  "&"  "~"
 37))
 38
 39(define *line-comment*  (list "//"))
 40(define *comment-start*  "/*")
 41(define *comment-end*    "*/")
 42(define *quotation-marks*  '(#\" #\'))
 43(define *significant-whitespaces*
 44  (list #\newline #\linefeed #\u2028 #\u2029))
 45
 46
 47
 48
 49;-------------------------------------------------------------
 50;                          parsers
 51;-------------------------------------------------------------
 52
 53;; literals
 54(:: $id
 55    ($pred
 56     (lambda (t)
 57       (and (Token? t)
 58            (id? (Token-text t))))))
 59
 60
 61(::= $identifier 'identifier
 62     (@? ($$ "::"))
 63     (@* $id (@* $type-parameter) ($$ "::"))
 64     (@= 'id (@? ($$ "~")) $id))
 65
 66
 67;; (::= $identifier 'identifier
 68;;      (@? ($$ "::")) $scope-resolution (@? ($$ "~")) $id)
 69
 70
 71(:: $numeral-literal
 72    ($pred
 73     (lambda (t)
 74       (and (Token? t)
 75            (numeral? (Token-text t))))))
 76
 77(:: $char-literal ($pred Char?))
 78(:: $string-literal ($pred Str?))
 79(:: $newline ($pred Newline?))
 80(:: $comment ($pred Comment?))
 81
 82
 83;; delimeters
 84(::  |,|   (@_ ","))
 85(::  |;|   (@~ ";"))
 86(::  |:|   (@_ ":"))
 87(::  |(|   (@~ "("))
 88(::  |)|   (@~ ")"))
 89(::  |[|   (@~ "["))
 90(::  |]|   (@~ "]"))
 91(::  |{|   (@~ "{"))
 92(::  |}|   (@~ "}"))
 93
 94(::  |\n|  ($glob^ (@*^ $newline)))
 95(::  |;\n| (@or |;| |\n|))
 96
 97
 98(define old-seq @seq)
 99
100(define @seq
101  (lambda ps
102    (let ([psj (join ps |\n|)])
103      (apply old-seq `(,|\n| ,@psj ,|\n|)))))
104
105
106
107;; a hacky definition for macros
108;; will fix later
109(::= $macro-defintion 'macro
110     (@~ "#")
111     (@*^ (old-seq (@*^ (@and (@!^ ($$ "\\")) (@!^ $newline))) ($$ "\\") (@*^ $newline)))
112     (old-seq (@*^ (@!^ $newline)) ($glob^ $newline) ($glob^ (@*^ $newline)))
113)
114
115
116(:: $directive
117    (@or ($$ "ifdef")
118         ($$ "define")
119         ($$ "undef")
120         ($$ "endif")))
121
122
123;;------------------ starting point -----------------
124(::= $program 'program
125     (@* $statement)
126)
127
128
129
130(:: $statement
131    (@or $macro-defintion
132         $empty-statement
133         $access-label
134         $statement-block
135
136         $if-statement
137         $switch-statement
138         $do-while-statement
139         $while-statement
140         $for-statement
141         $continue-statement
142         $break-statement
143
144         $return-statement
145         $labelled-statement
146         $try-statement
147
148         $namespace-definition
149         $using-namespace
150
151         $class-definition
152         $function-definition
153         $function-declaration
154         $variable-definition
155         $enum-declaration
156
157         $extended-assembly
158         $inline-assembly
159
160         $expression-statement
161))
162
163
164(:: $empty-statement |;|)
165
166
167(::= $enum-declaration 'enum
168     (@~ "enum") (@? $identifier)
169     |{|
170       (@? (@.@  (@= 'name-value $identifier  (@? $initializer))  |,|))
171     |}|
172     |;|
173)
174
175
176(::= $access-label 'access-label
177     $access-specifier (@~ ":"))
178
179
180(::= $statement-block 'block
181     |{|  (@* $statement)  |}|
182)
183
184
185(::= $namespace-definition 'namespace
186     ($$ "namespace") $identifier
187     |{|  (@* $statement)  |}|
188)
189
190(::= $using-namespace 'using-namespace
191     ($$ "using") ($$ "namespace") $identifier)
192
193
194
195;;--------------------------------------------
196(::= $class-definition 'class
197
198     (@or ($$ "class")
199          ($$ "struct")
200          ($$ "union"))
201
202     (@* (@= 'declspec
203             (@or ($$ "_declspec") ($$ "__declspec")) |(|  $expression  |)|))
204
205     (@or (@= 'name $identifier |;| )
206
207          (@...
208           (@= 'name (@? $identifier)) (@? (@... (@_ ":") $base-clause))
209           (@= 'body  |{|  (@* $statement)  |}|) )
210          ))
211
212
213(::= $base-clause 'bases
214     (@.@ $base-specifier |,|)
215)
216
217
218(::= $base-specifier 'base-specifier
219     (@? $access-specifier) $identifier)
220
221
222(::= $access-specifier 'access-specifier
223     (@or ($$ "public")
224          ($$ "protected")
225          ($$ "private")
226          ($$ "virtual")))
227
228
229;;---------- function definition and declaration ------------
230
231(::= $function-declaration 'function-declaration
232     (@? ($$ "typedef"))
233     (@? $access-specifier) (@? $modifiers) (@? $type)
234     (@= 'name (@or $identifier
235                    (@... |(| ($$ "*") $identifier |)|)) )
236     $formal-parameter-list
237     (@? ($$ "const"))
238     (@? $initializer)
239)
240
241
242(::= $function-definition 'function
243     (@or (@... (@? $modifiers) $type
244                (@= 'name $identifier ) $formal-parameter-list)
245
246          (@... (@= 'name $identifier ) $formal-parameter-list))
247     (@? $initializer)
248     $function-body)
249
250
251(::= $type 'type
252     (@? $modifiers) (@or $primitive-type
253                          $ctype
254                          $identifier)
255     (@* $type-parameter) (@* $ptr-suffix))
256
257
258(::= $type-parameter 'type-parameter
259     (@~ "<") (@.@ (@or $type $numeral-literal) |,|) (@~ ">"))
260
261
262(::= $ptr-suffix 'suffix
263     (@or ($$ "*")
264          ($$ "&")))
265
266
267(::= $formal-parameter-list 'parameters
268     |(|  (@? (@.@ $type-variable |,|)) (@? |,| ($$ "..."))  |)|
269)
270
271
272(::= $type-variable 'type-variable
273     (@? $modifiers) $type (@? $identifier) (@? $array-suffix))
274
275
276(::= $function-body 'body
277     |{|  (@* $statement)  |}|
278)
279
280
281
282(::= $variable-definition 'variable-definition
283     $variable-declaration-list |;|
284)
285
286
287(:: $variable-declaration-list
288    (@... (@? $modifiers) $type (@.@ $variable-declaration |,|)))
289
290
291(::= $variable-declaration 'variable-declaration
292     $identifier (@? $variable-declaration-suffix)
293     (@? $initializer))
294
295
296(::= $modifiers 'modifiers
297     (@+ (@or ($$ "const")
298              ($$ "static")
299              ($$ "inline"))))
300
301(:: $primitive-type
302     (@or (@...
303           (@or ($$ "signed")
304                ($$ "unsigned"))
305           (@or ($$ "int")
306                ($$ "char")
307                ($$ "long")
308                ($$ "double")
309                ($$ "float")))
310          (@or ($$ "signed")
311               ($$ "unsigned"))))
312
313(::= $ctype 'ctype
314     (@or ($$ "struct")
315          ($$ "enum"))
316     $identifier
317)
318
319
320(::= $variable-declaration-suffix 'suffix
321     (@or (@... |[|  $expression  |]|))
322)
323
324(::= $initializer 'initializer
325     (@or (@... (@_ "=") $expression)
326          (@... (@_ ":") $expression)
327          (@... (@_ "(") $expression (@_ ")"))))
328
329
330
331
332(::= $if-statement 'if
333     ($$ "if")  (@= 'test |(| $expression |)|) $statement
334     (@? (@= 'else ($$ "else") $statement))
335)
336
337
338
339(::= $do-while-statement 'do-while
340    ($$ "do") $statement
341    (@= 'while-do ($$ "while")  (@= 'test |(| $expression |)|   ))
342    |;|
343)
344
345
346(::= $while-statement 'while
347     ($$ "while")  (@= 'test |(| $expression |)|   )
348     $statement
349)
350
351
352(::= $for-statement 'for
353     ($$ "for") (@= 'iter
354                    |(| (@? $for-initaliser) |;|
355                    (@? $expression)     |;|
356                    (@? $expression)
357                    |)|
358                    )
359     $statement
360)
361
362
363(::= $for-initaliser 'for-initializer
364     (@or (@= 'variable-declaration
365              $variable-declaration-list)
366
367          $expression
368))
369
370
371(::= $continue-statement 'continue
372     ($$ "continue") (@= 'label (@? $identifier)) |;|
373)
374
375
376(::= $break-statement 'break
377     ($$ "break") (@= 'label (@? $identifier)) |;|
378)
379
380
381(::= $return-statement 'return
382     ($$ "return") (@= 'value (@? $expression)) |;|
383)
384
385
386
387(::= $labelled-statement 'labelled-statement
388     $identifier |:| $statement
389)
390
391
392(::= $switch-statement 'switch
393     ($$ "switch")  |(| $expression |)|
394     |{|  (@* $case-clause)
395          (@? (@... $default-clause
396                    (@* $case-clause)))
397     |}|
398)
399
400
401(::= $case-clause 'case-clause
402     ($$ "case") $expression |:| (@* $statement)
403)
404
405
406(::= $default-clause 'default-clause
407    ($$ "default") |:| (@* $statement)
408)
409
410
411;; throw is an expression in C++
412;; (::= $throw-statement 'throw
413;;      ($$ "throw") $expression  |;|
414;; )
415
416
417(::= $try-statement 'try
418     ($$ "try") $statement-block
419     (@or $finally-clause
420          (@... $catch-clause (@? $finally-clause))))
421
422
423(::= $catch-clause 'catch
424     ($$ "catch") |(| $identifier |)| $statement-block)
425
426
427(::= $finally-clause 'finally
428     ($$ "finally") $statement-block)
429
430
431(::= $expression-statement 'expression-statement
432     $expression |;|)
433
434
435
436
437;-------------------------------------------------------------
438;                       expressions
439;-------------------------------------------------------------
440
441;; utility for constructing operators
442(define op
443  (lambda (s)
444    (@= 'op ($$ s))))
445
446
447(:: $expression
448    $comma-expression
449    )
450
451
452
453;; 18. comma
454;;--------------------------------------------
455(::= $comma-expression 'comma
456     (@.@ $assignment-expression |,|))
457
458
459
460;; 17. throw
461;;--------------------------------------------
462(::= $throw-expression 'throw
463     (@or (@... (@~ "throw")) $expression
464          $assignment-expression)
465)
466
467
468;; 16. assignment
469;;--------------------------------------------
470(:: $assignment-expression
471    (@or (@= 'assignment
472             $conditional-expression
473             $assignment-operator
474             $assignment-expression)
475
476         $conditional-expression
477         ))
478
479
480(:: $assignment-operator
481     (@or (op "=")
482          (op "*=")
483          (op "/=")
484          (op "%=")
485          (op "+=")
486          (op "-=")
487          (op "<<=")
488          (op ">>=")
489          (op ">>>=")
490          (op "&=")
491          (op "^=")
492          (op "|=")))
493
494
495
496;; 15.	?:	 Ternary conditional
497;;--------------------------------------------
498(:: $conditional-expression
499    (@or (@= 'conditional-expression
500             (@= 'test $logical-or-expression)
501             (@~ "?") (@= 'then $conditional-expression)
502             (@~ ":") (@= 'else $conditional-expression))
503
504         $logical-or-expression
505         ))
506
507
508; ($eval $conditional-expression (scan "x > 0? x-1 : x"))
509
510
511
512
513;; 14.	||	 Logical OR
514;;--------------------------------------------
515(:: $logical-or-expression
516     (@or (@infix-left 'binop
517                       $logical-and-expression
518                       (op "||"))
519
520          $logical-and-expression
521          ))
522
523
524
525;; 13.	&&	 Logical AND
526;;--------------------------------------------
527(:: $logical-and-expression
528     (@or (@infix-left 'binop
529                       $bitwise-or-expression
530                       (op "&&"))
531
532          $bitwise-or-expression
533          ))
534
535
536
537;; 12.	|	 Bitwise OR (inclusive or)
538;;--------------------------------------------
539(:: $bitwise-or-expression
540     (@or (@infix-left 'binop
541                       $bitwise-xor-expression
542                       (op "|"))
543
544          $bitwise-xor-expression
545          ))
546
547
548
549;; 11.	^	 Bitwise XOR (exclusive or)
550;;--------------------------------------------
551(:: $bitwise-xor-expression
552     (@or (@infix-left 'binop
553                       $bitwise-and-expression
554                       (op "^"))
555
556          $bitwise-and-expression
557          ))
558
559
560
561;; 10.	&	 Bitwise AND
562;;--------------------------------------------
563(:: $bitwise-and-expression
564     (@or (@infix-left 'binop
565                       $equality-expression
566                       (op "&"))
567
568       $equality-expression
569       ))
570
571
572
573;; 9. equality
574;;--------------------------------------------
575(:: $equality-expression
576     (@or (@infix-left 'binop
577                       $relational-expression
578                       $equality-operator)
579
580          $relational-expression
581          ))
582
583(:: $equality-operator
584     (@or (op "==")
585          (op "!=")
586          (op "===")
587          (op "!==")
588))
589
590
591
592;; 8. relational
593;;--------------------------------------------
594(:: $relational-expression
595     (@or (@infix-left 'binop
596                       $bitwise-shift-expression
597                       $relational-operator)
598
599          $bitwise-shift-expression
600          ))
601
602(:: $relational-operator
603     (@or (op "<")
604          (op "<=")
605          (op ">")
606          (op ">=")
607          (op "instanceof")
608          (op "in")
609          ))
610
611
612
613;; 7. bitwise shift
614;;--------------------------------------------
615(:: $bitwise-shift-expression
616    (@or (@infix-left 'binop
617                      $additive-expression
618                      $bitwise-shift-operator)
619
620         $additive-expression
621))
622
623
624(:: $bitwise-shift-operator
625    (@or (op "<<")
626         (op ">>")
627         (op ">>>")
628         ))
629
630
631
632;; 6. additive
633;;--------------------------------------------
634(:: $additive-expression
635    (@or (@infix-left 'binop
636                      $multiplicative-expression
637                      $additive-operator)
638
639         $multiplicative-expression
640))
641
642
643(:: $additive-operator
644    (@or (op "+")
645         (op "-")))
646
647
648;; ($eval $additive-expression (scan "x + y + z"))
649
650
651
652
653;; 5. multiplicative
654;;--------------------------------------------
655(:: $multiplicative-expression
656    (@or (@infix-left 'binop
657                      $unary-expression
658                      $multiplicative-operator)
659
660         $unary-expression))
661
662(:: $multiplicative-operator
663    (@or (op "*")
664         (op "/")
665         (op "%")))
666
667
668
669
670;; unary =
671;; 3. prefix
672;; 2. postfix
673;;--------------------------------------------
674(:: $unary-expression
675    $prefix-expression)
676
677
678
679;; 3. prefix
680;;--------------------------------------------
681(:: $prefix-expression
682    (@or (@prefix 'prefix
683                  $postfix-expression
684                  $prefix-operator)
685         $postfix-expression))
686
687
688(:: $prefix-operator
689     (@or (@= 'new (op "new") (@? $array-suffix))
690          (@= 'delete (op "delete") (@? $array-suffix))
691          (@= 'cast |(|  $type  |)| )
692          (op "void")
693          (op "sizeof")
694          (op "++")
695          (op "--")
696          (op "+")
697          (op "-")
698          (op "~")
699          (op "!")
700          (op "*")                      ; indirection
701          (op "&")                      ; address of
702          (op "::")
703))
704
705
706(::= $array-suffix 'array-suffix
707     |[| |]|)
708
709
710
711
712;; 2. postfix
713;;--------------------------------------------
714(:: $postfix-expression
715    (@or (@postfix 'postfix
716                   $primary-expression
717                   $postfix-operator)
718         $primary-expression))
719
720
721(:: $postfix-operator
722     (@or (op "++")
723          (op "--")
724          $index-suffix
725          $property-reference-suffix
726          $type-parameter
727          $arguments))
728
729
730(::= $arguments 'argument
731     |(|  (@? (@.@ $expression |,|))  |)|
732)
733
734
735(::= $index-suffix 'index
736    |[|  $expression  |]|
737)
738
739
740(::= $property-reference-suffix 'field-access
741     (@or (@~ ".") (@~ "->")) $identifier)
742
743
744
745;; scope resolution ::
746;---------------------------------------------
747(:: $scope-resolution
748    (@or (@infix-left 'scope
749                      $id
750                      ($$ "::"))
751
752         $primary-expression
753))
754
755
756
757;; 1. primary
758;;--------------------------------------------
759(:: $primary-expression
760    (@or (@= 'this ($$ "this"))
761         $type-cast
762         $ctype                         ; could be used in a macro argument
763         $identifier
764         $literal
765         $array-literal
766         $object-literal
767         (@= #f |(|  $expression  |)|)
768))
769
770
771(::= $type-cast 'type-cast
772     (@or ($$ "typeid")
773          ($$ "const_cast")
774          ($$ "dynamic_cast")
775          ($$ "reinterpret_cast")
776          ($$ "static_cast")))
777
778
779
780;; literal
781;;--------------------------------------------
782(:: $literal
783     (@or ($$ "null")
784          ($$ "true")
785          ($$ "false")
786          $string-concat
787          $float-literal
788          $numeral-literal
789          $string-literal
790          $char-literal))
791
792
793(::= $array-literal 'array-literal
794     |{|  (@? (@.@ $expression |,|))  |}|
795)
796
797
798(::= $object-literal 'object-literal
799     |{|  $property-name-value (@* (@... |,| $property-name-value))  |}|
800)
801
802
803(::= $property-name-value 'property-name-value
804     $property-name |:| $assignment-expression)
805
806
807(:: $property-name
808     (@or $identifier
809          $string-literal
810          $numeral-literal))
811
812
813(::= $float-literal 'float-literal
814     $numeral-literal ($$ ".") $numeral-literal)
815
816
817(::= $string-concat 'string-concat
818     $string-literal (@* (@or $string-literal $expression)))
819
820
821
822;-------------------------------------------------------------
823;                    inline assembly
824;-------------------------------------------------------------
825(::= $inline-assembly 'inline-assembly
826     (@or (@~ "asm")
827          (@~ "__asm__"))
828     (@? (@or ($$ "volatile")
829              ($$ "__volatile__")))
830     |(|   $string-concat  |)|
831     |;|
832)
833
834
835(::= $extended-assembly 'extended-assembly
836     (@or (@~ "asm")
837          (@~ "__asm__"))
838     (@? (@or ($$ "volatile")
839              ($$ "__volatile__")))
840     |(|  $string-concat
841     |:|  (@= 'output-operands (@* $string-literal |(| $identifier |)|  ))
842     |:|  (@= 'input-operands (@* $string-literal |(| $identifier |)|   ))
843     |:|  (@= 'clobbered-registers (@? (@.@ $string-literal |,|)))
844     |)|
845     |;|
846)
847
848
849
850
851
852(define parse-cpp
853  (lambda (s)
854    (first-val
855     ($eval $program
856            (filter (lambda (x) (not (Comment? x)))
857                    (scan s))))))
858
859
860
861
862;-------------------------------------------------------------
863;                          tests
864;-------------------------------------------------------------
865
866;; (test-file "tests/simulator-arm.cc"
867;;            "tests/simulator-mips.cc"
868;;            "tests/d8-3404.cc"
869;;            "tests/d8-8424.cc"
870;;            "tests/assembler-arm-2.cc"
871;;            "tests/assembler-arm-7.cc"
872;;            "tests/assembler-arm-8309.cc"
873;; )