/vm/tinyrb/grammar.leg

http://github.com/feyeleanor/RubyGoLightly · Unknown · 334 lines · 282 code · 52 blank · 0 comment · 0 complexity · 4dcd747ac6b099c7cba2309e4786b4fa MD5 · raw file

  1. %{
  2. #include <stdlib.h>
  3. #include "tr.h"
  4. #include "internal.h"
  5. /*#define YY_DEBUG 1*/
  6. #define YYSTYPE OBJ
  7. #define YYMALLOC TR_MALLOC
  8. #define YYREALLOC TR_REALLOC
  9. #define yyvm compiler->vm
  10. static char *charbuf;
  11. static char *sbuf;
  12. static size_t nbuf;
  13. static TrCompiler *compiler;
  14. #define YY_INPUT(buf, result, max_size) { \
  15. int yyc; \
  16. if (charbuf && *charbuf != '\0') \
  17. yyc= *charbuf++; \
  18. else \
  19. yyc= EOF; \
  20. result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \
  21. }
  22. /* TODO grow buffer */
  23. #define STRING_MAX 4096
  24. #define STRING_START sbuf = TR_ALLOC_N(char, STRING_MAX); nbuf = 0
  25. #define STRING_PUSH(P,L) \
  26. assert(nbuf + (L) < 4096); \
  27. TR_MEMCPY_N(sbuf + nbuf, (P), char, (L)); \
  28. nbuf += (L)
  29. %}
  30. Root = s:Stmts EOF { compiler->node = NODE(ROOT, s) }
  31. Stmts = SEP*
  32. - head:Stmt Comment? { head = NODES(head) }
  33. ( SEP - tail:Stmt Comment? { PUSH_NODE(head, tail) }
  34. | SEP - Comment
  35. )* SEP? { $$ = head }
  36. | SEP+ { $$ = NODES_N(0) }
  37. OptStmts = Stmts
  38. | - SEP? { $$ = NODES_N(0) }
  39. Stmt = While
  40. | Until
  41. | If
  42. | Unless
  43. | Def
  44. | Class
  45. | Module
  46. | Expr
  47. Expr = Assign
  48. | AsgnCall
  49. | UnaryOp
  50. | BinOp
  51. | SpecCall
  52. | Call
  53. | Range
  54. | Yield
  55. | Return
  56. | Break
  57. | Value
  58. Comment = - '#' (!EOL .)*
  59. Call = { block = rcv = 0 }
  60. ( rcv:Value '.'
  61. )? ( rmsg:Message '.' { rcv = NODE2(SEND, rcv, rmsg) }
  62. )* msg:Message
  63. - block:Block? { $$ = NODE3(SEND, rcv, msg, block) }
  64. # TODO refactor head part w/ Call maybe eh?
  65. AsgnCall = { rcv = 0 }
  66. ( rcv:Value '.'
  67. )? ( rmsg:Message '.' { rcv = NODE2(SEND, rcv, rmsg) }
  68. )* msg:ID - asg:ASSIGN
  69. - val:Stmt { VM = yyvm; $$ = NODE2(SEND, rcv, NODE2(MSG, SYMCAT(msg, asg), NODES(NODE(ARG, val)))) }
  70. Receiver = ( { rcv = 0 }
  71. rcv:Call
  72. | rcv:Value
  73. ) { $$ = rcv }
  74. SpecCall = rcv:Receiver '[' args:Args ']'
  75. - ASSIGN - val:Stmt { PUSH_NODE(args, NODE(ARG, val)); $$ = NODE2(SEND, rcv, NODE2(MSG, TrSymbol_new(yyvm, "[]="), args)) }
  76. | rcv:Receiver '[' args:Args ']' { $$ = NODE2(SEND, rcv, NODE2(MSG, TrSymbol_new(yyvm, "[]"), args)) }
  77. BinOp = ( rcv:SpecCall | rcv:Receiver )
  78. -
  79. (
  80. '&&' - arg:Expr { $$ = NODE2(AND, rcv, arg) }
  81. | '||' - arg:Expr { $$ = NODE2(OR, rcv, arg) }
  82. | '+' - arg:Expr { $$ = NODE2(ADD, rcv, arg) }
  83. | '-' - arg:Expr { $$ = NODE2(SUB, rcv, arg) }
  84. | '<' - arg:Expr { $$ = NODE2(LT, rcv, arg) }
  85. | op:BINOP - arg:Expr { $$ = NODE2(SEND, rcv, NODE2(MSG, op, NODES(NODE(ARG, arg)))) }
  86. )
  87. UnaryOp = '-' rcv:Expr { $$ = NODE(NEG, rcv) }
  88. | '!' rcv:Expr { $$ = NODE(NOT, rcv) }
  89. Message = name:ID { args = 0 }
  90. ( '(' args:Args? ')'
  91. | SPACE args:Args
  92. )? { $$ = NODE2(MSG, name, args) }
  93. Args = - head:Expr - { head = NODES(NODE(ARG, head)) }
  94. ( ',' - tail:Expr - { PUSH_NODE(head, NODE(ARG, tail)) }
  95. )* ( ',' - '*' splat:Expr - { PUSH_NODE(head, NODE2(ARG, splat, 1)) }
  96. )? { $$ = head }
  97. | - '*' splat:Expr - { $$ = NODES(NODE2(ARG, splat, 1)) }
  98. Block = 'do' SEP
  99. - body:OptStmts -
  100. 'end' { $$ = NODE(BLOCK, body) }
  101. | 'do' - '|' params:Params '|' SEP
  102. - body:OptStmts -
  103. 'end' { $$ = NODE2(BLOCK, body, params) }
  104. # FIXME this might hang the parser and is very slow.
  105. # Clash with Hash for sure.
  106. #| '{' - body:OptStmts - '}' { $$ = NODE(BLOCK, body) }
  107. #| '{' - '|' params:Params '|'
  108. # - body:OptStmts - '}' { $$ = NODE2(BLOCK, body, params) }
  109. Assign = name:ID - ASSIGN - val:Stmt { $$ = NODE2(ASSIGN, name, val) }
  110. | name:CONST - ASSIGN - val:Stmt { $$ = NODE2(SETCONST, name, val) }
  111. | name:IVAR - ASSIGN - val:Stmt { $$ = NODE2(SETIVAR, name, val) }
  112. | name:CVAR - ASSIGN - val:Stmt { $$ = NODE2(SETCVAR, name, val) }
  113. | name:GLOBAL - ASSIGN - val:Stmt { $$ = NODE2(SETGLOBAL, name, val) }
  114. While = 'while' SPACE cond:Expr SEP
  115. body:Stmts -
  116. 'end' { $$ = NODE2(WHILE, cond, body) }
  117. Until = 'until' SPACE cond:Expr SEP
  118. body:Stmts -
  119. 'end' { $$ = NODE2(UNTIL, cond, body) }
  120. If = 'if' SPACE cond:Expr SEP { else_body = 0 }
  121. body:Stmts -
  122. else_body:Else?
  123. 'end' { $$ = NODE3(IF, cond, body, else_body) }
  124. | body:Expr - 'if' - cond:Expr { $$ = NODE2(IF, cond, NODES(body)) }
  125. Unless = 'unless' SPACE cond:Expr SEP { else_body = 0 }
  126. body:Stmts -
  127. else_body:Else?
  128. 'end' { $$ = NODE3(UNLESS, cond, body, else_body) }
  129. | body:Expr -
  130. 'unless' - cond:Expr { $$ = NODE2(UNLESS, cond, NODES(body)) }
  131. Else = 'else' SEP - body:Stmts - { $$ = body }
  132. Method = rcv:ID '.' name:METHOD { $$ = NODE2(METHOD, NODE2(SEND, 0, NODE(MSG, rcv)), name) }
  133. | rcv:Value '.' name:METHOD { $$ = NODE2(METHOD, rcv, name) }
  134. | name:METHOD { $$ = NODE2(METHOD, 0, name) }
  135. Def = 'def' SPACE method:Method { params = 0 }
  136. (- '(' params:Params? ')')? SEP
  137. body:OptStmts -
  138. 'end' { $$ = NODE3(DEF, method, params ? params : NODES_N(0), body) }
  139. Params = head:Param { head = NODES(head) }
  140. ( ',' tail:Param { PUSH_NODE(head, tail) }
  141. )* { $$ = head }
  142. Param = - name:ID - '=' - def:Expr { $$ = NODE3(PARAM, name, 0, def) }
  143. | - name:ID - { $$ = NODE(PARAM, name) }
  144. | - '*' name:ID - { $$ = NODE2(PARAM, name, 1) }
  145. Class = 'class' SPACE name:CONST { super = 0 }
  146. (- '<' - super:CONST)? SEP
  147. body:OptStmts -
  148. 'end' { $$ = NODE3(CLASS, name, super, body) }
  149. Module = 'module' SPACE name:CONST SEP
  150. body:OptStmts -
  151. 'end' { $$ = NODE3(MODULE, name, 0, body) }
  152. Range = s:Receiver - '..' - e:Expr { $$ = NODE3(RANGE, s, e, 0) }
  153. | s:Receiver - '...' - e:Expr { $$ = NODE3(RANGE, s, e, 1) }
  154. Yield = 'yield' SPACE args:AryItems { $$ = NODE(YIELD, args) }
  155. | 'yield' '(' args:AryItems ')' { $$ = NODE(YIELD, args) }
  156. | 'yield' { $$ = NODE(YIELD, NODES_N(0)) }
  157. Return = 'return' SPACE arg:Expr - !',' { $$ = NODE(RETURN, arg) }
  158. | 'return' '(' arg:Expr ')' - !','{ $$ = NODE(RETURN, arg) }
  159. | 'return' SPACE args:AryItems { $$ = NODE(RETURN, NODE(ARRAY, args)) }
  160. | 'return' '(' args:AryItems ')' { $$ = NODE(RETURN, NODE(ARRAY, args)) }
  161. | 'return' { $$ = NODE(RETURN, 0) }
  162. Break = 'break' { $$ = NODE(BREAK, 0) }
  163. Value = v:NUMBER { $$ = NODE(VALUE, v) }
  164. | v:SYMBOL { $$ = NODE(VALUE, v) }
  165. | v:REGEXP { $$ = NODE(VALUE, v) }
  166. | v:STRING1 { $$ = NODE(STRING, v) }
  167. | v:STRING2 { $$ = NODE(STRING, v) }
  168. | v:CONST { $$ = NODE(CONST, v) }
  169. | 'nil' { $$ = NODE(NIL, 0) }
  170. | 'true' { $$ = NODE(BOOL, TR_TRUE) }
  171. | 'false' { $$ = NODE(BOOL, TR_FALSE) }
  172. | 'self' { $$ = NODE(SELF, 0) }
  173. | name:IVAR { $$ = NODE(GETIVAR, name) }
  174. | name:CVAR { $$ = NODE(GETCVAR, name) }
  175. | name:GLOBAL { $$ = NODE(GETGLOBAL, name) } # TODO
  176. | '[' - ']' { $$ = NODE(ARRAY, NODES_N(0)) }
  177. | '[' - items:AryItems - ']' { $$ = NODE(ARRAY, items) }
  178. | '{' - '}' { $$ = NODE(HASH, NODES_N(0)) }
  179. | '{' - items:HashItems - '}' { $$ = NODE(HASH, items) }
  180. | '(' - Expr - ')'
  181. AryItems = - head:Expr - { head = NODES(head) }
  182. ( ',' - tail:Expr - { PUSH_NODE(head, tail) }
  183. )* { $$ = head }
  184. HashItems = head:Expr - '=>' - val:Expr { head = NODES_N(2, head, val) }
  185. ( - ',' - key:Expr - { PUSH_NODE(head, key) }
  186. '=>' - val:Expr { PUSH_NODE(head, val) }
  187. )* { $$ = head }
  188. KEYWORD = 'while' | 'until' | 'do' | 'end' |
  189. 'if' | 'unless' | 'else' |
  190. 'true' | 'false' | 'nil' | 'self' |
  191. 'class' | 'module' | 'def' |
  192. 'yield' | 'return' | 'break'
  193. NAME = [a-zA-Z0-9_]+
  194. ID = !'self' # self is special, can never be a method name
  195. < KEYWORD > &('.' | '(' | '[') { $$ = TrSymbol_new(yyvm, yytext) } # hm, there's probably a better way
  196. | < KEYWORD NAME > { $$ = TrSymbol_new(yyvm, yytext) }
  197. | !KEYWORD
  198. < [a-z_] NAME?
  199. ( '=' &'(' | '!'| '?' )? > { $$ = TrSymbol_new(yyvm, yytext) }
  200. CONST = < [A-Z] NAME? > { $$ = TrSymbol_new(yyvm, yytext) }
  201. BINOP = < ( '**' | '^' | '&' | '|' | '~' |
  202. '+' | '-' | '*' | '/' | '%' | '<=>' |
  203. '<<' | '>>' | '==' | '=~' | '!=' | '===' |
  204. '<' | '>' | '<=' | '>='
  205. ) > { $$ = TrSymbol_new(yyvm, yytext) }
  206. UNOP = < ( '-@' | '!' ) > { $$ = TrSymbol_new(yyvm, yytext) }
  207. METHOD = ID | UNOP | BINOP
  208. ASSIGN = < '=' > &(!'=') { $$ = TrSymbol_new(yyvm, yytext) }
  209. IVAR = < '@' NAME > { $$ = TrSymbol_new(yyvm, yytext) }
  210. CVAR = < '@@' NAME > { $$ = TrSymbol_new(yyvm, yytext) }
  211. GLOBAL = < '$' NAME > { $$ = TrSymbol_new(yyvm, yytext) }
  212. NUMBER = < [0-9]+ > { $$ = TR_INT2FIX(atoi(yytext)) }
  213. SYMBOL = ':' < (NAME | KEYWORD) > { $$ = TrSymbol_new(yyvm, yytext) }
  214. STRING1 = '\'' { STRING_START }
  215. (
  216. '\\\'' { STRING_PUSH("'", 1) }
  217. | < [^\'] > { STRING_PUSH(yytext, yyleng) }
  218. )* '\'' { $$ = TrString_new2(yyvm, sbuf) }
  219. ESC_CHAR = '\\n' { STRING_PUSH("\n", 1) }
  220. | '\\b' { STRING_PUSH("\b", 1) }
  221. | '\\f' { STRING_PUSH("\f", 1) }
  222. | '\\r' { STRING_PUSH("\r", 1) }
  223. | '\\t' { STRING_PUSH("\t", 1) }
  224. | '\\\"' { STRING_PUSH("\"", 1) }
  225. | '\\\\' { STRING_PUSH("\\", 1) }
  226. STRING2 = '"' { STRING_START }
  227. (
  228. ESC_CHAR
  229. | < [^\"] > { STRING_PUSH(yytext, yyleng) } #" for higlighting
  230. )*
  231. '"' { $$ = TrString_new2(yyvm, sbuf) }
  232. REGEXP = '/' { STRING_START }
  233. (
  234. ESC_CHAR
  235. | < [^/] > { STRING_PUSH(yytext, yyleng) }
  236. )*
  237. '/' { $$ = TrRegexp_new(yyvm, sbuf, 0) }
  238. - = [ \t]*
  239. SPACE = [ ]+
  240. EOL = ( '\n' | '\r\n' | '\r' ) { compiler->line++ }
  241. EOF = !.
  242. SEP = ( - Comment? (EOL | ';') )+
  243. %%
  244. /* Raise a syntax error. */
  245. OBJ yyerror() {
  246. VM = yyvm;
  247. OBJ msg = tr_sprintf(vm, "SyntaxError in %s at line %d", TR_STR_PTR(compiler->filename), compiler->line);
  248. /* Stupid ugly code, just to build a string... I suck... */
  249. if (yytext[0]) TrString_push(vm, msg, tr_sprintf(vm, " near token '%s'", yytext));
  250. if (yypos < yylimit) {
  251. yybuf[yylimit]= '\0';
  252. TrString_push(vm, msg, tr_sprintf(vm, " before text \""));
  253. while (yypos < yylimit) {
  254. if ('\n' == yybuf[yypos] || '\r' == yybuf[yypos]) break;
  255. char c[2] = { yybuf[yypos++], '\0' };
  256. TrString_push(vm, msg, tr_sprintf(vm, c));
  257. }
  258. TrString_push(vm, msg, tr_sprintf(vm, "\""));
  259. }
  260. /* TODO msg should not be a String object */
  261. tr_raise(SyntaxError, TR_STR_PTR(msg));
  262. }
  263. /* Compiles code to a TrBlock.
  264. Returns NULL on error, error is stored in TR_EXCEPTION. */
  265. TrBlock *TrBlock_compile(VM, char *code, char *fn, size_t lineno) {
  266. assert(!compiler && "parser not reentrant");
  267. charbuf = code;
  268. compiler = TrCompiler_new(vm, fn);
  269. compiler->line += lineno;
  270. compiler->filename = TrString_new2(vm, fn);
  271. TrBlock *b = NULL;
  272. if (!yyparse()) {
  273. yyerror();
  274. goto error;
  275. }
  276. TrCompiler_compile(compiler);
  277. b = compiler->block;
  278. error:
  279. charbuf = 0;
  280. compiler = 0;
  281. return b;
  282. }