PageRenderTime 119ms CodeModel.GetById 36ms RepoModel.GetById 1ms app.codeStats 0ms

/shotgun/lib/grammar.y

https://github.com/reinh/rubinius
Happy | 5899 lines | 5400 code | 499 blank | 0 comment | 0 complexity | 853777cd48b8270b7b994c215120cbe7 MD5 | raw file
Possible License(s): BSD-2-Clause, LGPL-2.1, BSD-3-Clause, GPL-2.0
  1. /**********************************************************************
  2. parse.y -
  3. $Author: matz $
  4. $Date: 2004/11/29 06:13:51 $
  5. created at: Fri May 28 18:02:42 JST 1993
  6. Copyright (C) 1993-2003 Yukihiro Matsumoto
  7. **********************************************************************/
  8. %{
  9. #define YYDEBUG 1
  10. #define YYERROR_VERBOSE 1
  11. #include <stdio.h>
  12. #include <errno.h>
  13. #include <ctype.h>
  14. #include <string.h>
  15. #include <stdbool.h>
  16. #include "shotgun/lib/grammar_internal.h"
  17. #include "shotgun/lib/grammar_runtime.h"
  18. #include "shotgun/lib/array.h"
  19. static NODE *syd_node_newnode(rb_parse_state*, enum node_type, OBJECT, OBJECT, OBJECT);
  20. #undef VALUE
  21. #ifndef isnumber
  22. #define isnumber isdigit
  23. #endif
  24. #define ISALPHA isalpha
  25. #define ISSPACE isspace
  26. #define ISALNUM(x) (isalpha(x) || isnumber(x))
  27. #define ISDIGIT isdigit
  28. #define ISXDIGIT isxdigit
  29. #define ISUPPER isupper
  30. #define ismbchar(c) (0)
  31. #define mbclen(c) (1)
  32. #define ID2SYM(i) (OBJECT)i
  33. #define string_new(ptr, len) blk2bstr(ptr, len)
  34. #define string_new2(ptr) cstr2bstr(ptr)
  35. intptr_t syd_sourceline;
  36. static char *syd_sourcefile;
  37. #define ruby_sourceline syd_sourceline
  38. #define ruby_sourcefile syd_sourcefile
  39. static int
  40. syd_yyerror(const char *, rb_parse_state*);
  41. #define yyparse syd_yyparse
  42. #define yylex syd_yylex
  43. #define yyerror(str) syd_yyerror(str, parse_state)
  44. #define yylval syd_yylval
  45. #define yychar syd_yychar
  46. #define yydebug syd_yydebug
  47. #define YYPARSE_PARAM parse_state
  48. #define YYLEX_PARAM parse_state
  49. #define ID_SCOPE_SHIFT 3
  50. #define ID_SCOPE_MASK 0x07
  51. #define ID_LOCAL 0x01
  52. #define ID_INSTANCE 0x02
  53. #define ID_GLOBAL 0x03
  54. #define ID_ATTRSET 0x04
  55. #define ID_CONST 0x05
  56. #define ID_CLASS 0x06
  57. #define ID_JUNK 0x07
  58. #define ID_INTERNAL ID_JUNK
  59. #define is_notop_id(id) ((id)>tLAST_TOKEN)
  60. #define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL)
  61. #define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL)
  62. #define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE)
  63. #define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET)
  64. #define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST)
  65. #define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS)
  66. #define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK)
  67. #define is_asgn_or_id(id) ((is_notop_id(id)) && \
  68. (((id)&ID_SCOPE_MASK) == ID_GLOBAL || \
  69. ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \
  70. ((id)&ID_SCOPE_MASK) == ID_CLASS))
  71. /* FIXME these went into the ruby_state instead of parse_state
  72. because a ton of other crap depends on it
  73. char *ruby_sourcefile; current source file
  74. int ruby_sourceline; current line no.
  75. */
  76. static int yylex();
  77. #define BITSTACK_PUSH(stack, n) (stack = (stack<<1)|((n)&1))
  78. #define BITSTACK_POP(stack) (stack >>= 1)
  79. #define BITSTACK_LEXPOP(stack) (stack = (stack >> 1) | (stack & 1))
  80. #define BITSTACK_SET_P(stack) (stack&1)
  81. #define COND_PUSH(n) BITSTACK_PUSH(vps->cond_stack, n)
  82. #define COND_POP() BITSTACK_POP(vps->cond_stack)
  83. #define COND_LEXPOP() BITSTACK_LEXPOP(vps->cond_stack)
  84. #define COND_P() BITSTACK_SET_P(vps->cond_stack)
  85. #define CMDARG_PUSH(n) BITSTACK_PUSH(vps->cmdarg_stack, n)
  86. #define CMDARG_POP() BITSTACK_POP(vps->cmdarg_stack)
  87. #define CMDARG_LEXPOP() BITSTACK_LEXPOP(vps->cmdarg_stack)
  88. #define CMDARG_P() BITSTACK_SET_P(vps->cmdarg_stack)
  89. /*
  90. static int class_nest = 0;
  91. static int in_single = 0;
  92. static int in_def = 0;
  93. static int compile_for_eval = 0;
  94. static ID cur_mid = 0;
  95. */
  96. static NODE *cond(NODE*,rb_parse_state*);
  97. static NODE *logop(enum node_type,NODE*,NODE*,rb_parse_state*);
  98. static int cond_negative(NODE**);
  99. static NODE *newline_node(rb_parse_state*,NODE*);
  100. static void fixpos(NODE*,NODE*);
  101. static int value_expr0(NODE*,rb_parse_state*);
  102. static void void_expr0(NODE *);
  103. static void void_stmts(NODE*,rb_parse_state*);
  104. static NODE *remove_begin(NODE*);
  105. #define value_expr(node) value_expr0((node) = remove_begin(node), parse_state)
  106. #define void_expr(node) void_expr0((node) = remove_begin(node))
  107. static NODE *block_append(rb_parse_state*,NODE*,NODE*);
  108. static NODE *list_append(rb_parse_state*,NODE*,NODE*);
  109. static NODE *list_concat(NODE*,NODE*);
  110. static NODE *arg_concat(rb_parse_state*,NODE*,NODE*);
  111. static NODE *arg_prepend(rb_parse_state*,NODE*,NODE*);
  112. static NODE *literal_concat(rb_parse_state*,NODE*,NODE*);
  113. static NODE *new_evstr(rb_parse_state*,NODE*);
  114. static NODE *evstr2dstr(rb_parse_state*,NODE*);
  115. static NODE *call_op(NODE*,ID,int,NODE*,rb_parse_state*);
  116. /* static NODE *negate_lit(NODE*); */
  117. static NODE *ret_args(rb_parse_state*,NODE*);
  118. static NODE *arg_blk_pass(NODE*,NODE*);
  119. static NODE *new_call(rb_parse_state*,NODE*,ID,NODE*);
  120. static NODE *new_fcall(rb_parse_state*,ID,NODE*);
  121. static NODE *new_super(rb_parse_state*,NODE*);
  122. static NODE *new_yield(rb_parse_state*,NODE*);
  123. static NODE *syd_gettable(rb_parse_state*,ID);
  124. #define gettable(i) syd_gettable(parse_state, i)
  125. static NODE *assignable(ID,NODE*,rb_parse_state*);
  126. static NODE *aryset(NODE*,NODE*,rb_parse_state*);
  127. static NODE *attrset(NODE*,ID,rb_parse_state*);
  128. static void rb_backref_error(NODE*);
  129. static NODE *node_assign(NODE*,NODE*,rb_parse_state*);
  130. static NODE *match_gen(NODE*,NODE*,rb_parse_state*);
  131. static void syd_local_push(rb_parse_state*, int cnt);
  132. #define local_push(cnt) syd_local_push(vps, cnt)
  133. static void syd_local_pop(rb_parse_state*);
  134. #define local_pop() syd_local_pop(vps)
  135. static intptr_t syd_local_cnt(rb_parse_state*,ID);
  136. #define local_cnt(i) syd_local_cnt(vps, i)
  137. static int syd_local_id(rb_parse_state*,ID);
  138. #define local_id(i) syd_local_id(vps, i)
  139. static ID *syd_local_tbl();
  140. static ID convert_op();
  141. static void tokadd(char c, rb_parse_state *parse_state);
  142. static int tokadd_string(int, int, int, int *, rb_parse_state*);
  143. #define SHOW_PARSER_WARNS 0
  144. static int _debug_print(const char *fmt, ...) {
  145. #if SHOW_PARSER_WARNS
  146. va_list ar;
  147. int i;
  148. va_start(ar, fmt);
  149. i = vprintf(fmt, ar);
  150. va_end(ar);
  151. return i;
  152. #else
  153. return 0;
  154. #endif
  155. }
  156. #define rb_warn _debug_print
  157. #define rb_warning _debug_print
  158. #define rb_compile_error _debug_print
  159. static ID rb_intern(const char *name);
  160. static ID rb_id_attrset(ID);
  161. rb_parse_state *alloc_parse_state();
  162. static unsigned long scan_oct(const char *start, int len, int *retlen);
  163. static unsigned long scan_hex(const char *start, int len, int *retlen);
  164. static void reset_block(rb_parse_state *parse_state);
  165. static NODE *extract_block_vars(rb_parse_state *parse_state, NODE* node, var_table vars);
  166. #define ruby_verbose 0
  167. #define RE_OPTION_ONCE 0x80
  168. #define RE_OPTION_IGNORECASE (1L)
  169. #define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE<<1)
  170. #define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED<<1)
  171. #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
  172. #define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE<<1)
  173. #define RE_MAY_IGNORECASE (RE_OPTION_LONGEST<<1)
  174. #define RE_OPTIMIZE_ANCHOR (RE_MAY_IGNORECASE<<1)
  175. #define RE_OPTIMIZE_EXACTN (RE_OPTIMIZE_ANCHOR<<1)
  176. #define RE_OPTIMIZE_NO_BM (RE_OPTIMIZE_EXACTN<<1)
  177. #define RE_OPTIMIZE_BMATCH (RE_OPTIMIZE_NO_BM<<1)
  178. #define NODE_STRTERM NODE_ZARRAY /* nothing to gc */
  179. #define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */
  180. #define SIGN_EXTEND(x,n) (((1<<((n)-1))^((x)&~(~0<<(n))))-(1<<((n)-1)))
  181. #define nd_func u1.id
  182. #if SIZEOF_SHORT != 2
  183. #define nd_term(node) SIGN_EXTEND((node)->u2.id, (CHAR_BIT*2))
  184. #else
  185. #define nd_term(node) ((signed short)(node)->u2.id)
  186. #endif
  187. #define nd_paren(node) (char)((node)->u2.id >> (CHAR_BIT*2))
  188. #define nd_nest u3.id
  189. /* Older versions of Yacc set YYMAXDEPTH to a very low value by default (150,
  190. for instance). This is too low for Ruby to parse some files, such as
  191. date/format.rb, therefore bump the value up to at least Bison's default. */
  192. #ifdef OLD_YACC
  193. #ifndef YYMAXDEPTH
  194. #define YYMAXDEPTH 10000
  195. #endif
  196. #endif
  197. #define vps ((rb_parse_state*)parse_state)
  198. %}
  199. %pure-parser
  200. %union {
  201. NODE *node;
  202. ID id;
  203. int num;
  204. var_table vars;
  205. }
  206. %token kCLASS
  207. kMODULE
  208. kDEF
  209. kUNDEF
  210. kBEGIN
  211. kRESCUE
  212. kENSURE
  213. kEND
  214. kIF
  215. kUNLESS
  216. kTHEN
  217. kELSIF
  218. kELSE
  219. kCASE
  220. kWHEN
  221. kWHILE
  222. kUNTIL
  223. kFOR
  224. kBREAK
  225. kNEXT
  226. kREDO
  227. kRETRY
  228. kIN
  229. kDO
  230. kDO_COND
  231. kDO_BLOCK
  232. kRETURN
  233. kYIELD
  234. kSUPER
  235. kSELF
  236. kNIL
  237. kTRUE
  238. kFALSE
  239. kAND
  240. kOR
  241. kNOT
  242. kIF_MOD
  243. kUNLESS_MOD
  244. kWHILE_MOD
  245. kUNTIL_MOD
  246. kRESCUE_MOD
  247. kALIAS
  248. kDEFINED
  249. klBEGIN
  250. klEND
  251. k__LINE__
  252. k__FILE__
  253. %token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tXSTRING_BEG
  254. %token <node> tINTEGER tFLOAT tSTRING_CONTENT
  255. %token <node> tNTH_REF tBACK_REF
  256. %token <num> tREGEXP_END
  257. %type <node> singleton strings string string1 xstring regexp
  258. %type <node> string_contents xstring_contents string_content
  259. %type <node> words qwords word_list qword_list word
  260. %type <node> literal numeric dsym cpath
  261. %type <node> bodystmt compstmt stmts stmt expr arg primary command command_call method_call
  262. %type <node> expr_value arg_value primary_value
  263. %type <node> if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure
  264. %type <node> args when_args call_args call_args2 open_args paren_args opt_paren_args
  265. %type <node> command_args aref_args opt_block_arg block_arg var_ref var_lhs
  266. %type <node> mrhs superclass block_call block_command
  267. %type <node> f_arglist f_args f_optarg f_opt f_block_arg opt_f_block_arg
  268. %type <node> assoc_list assocs assoc undef_list backref string_dvar
  269. %type <node> block_var opt_block_var brace_block cmd_brace_block do_block lhs none
  270. %type <node> mlhs mlhs_head mlhs_basic mlhs_entry mlhs_item mlhs_node
  271. %type <id> fitem variable sym symbol operation operation2 operation3
  272. %type <id> cname fname op f_rest_arg
  273. %type <num> f_norm_arg f_arg
  274. %token tUPLUS /* unary+ */
  275. %token tUMINUS /* unary- */
  276. %token tUBS /* unary\ */
  277. %token tPOW /* ** */
  278. %token tCMP /* <=> */
  279. %token tEQ /* == */
  280. %token tEQQ /* === */
  281. %token tNEQ /* != */
  282. %token tGEQ /* >= */
  283. %token tLEQ /* <= */
  284. %token tANDOP tOROP /* && and || */
  285. %token tMATCH tNMATCH /* =~ and !~ */
  286. %token tDOT2 tDOT3 /* .. and ... */
  287. %token tAREF tASET /* [] and []= */
  288. %token tLSHFT tRSHFT /* << and >> */
  289. %token tCOLON2 /* :: */
  290. %token tCOLON3 /* :: at EXPR_BEG */
  291. %token <id> tOP_ASGN /* +=, -= etc. */
  292. %token tASSOC /* => */
  293. %token tLPAREN /* ( */
  294. %token tLPAREN_ARG /* ( */
  295. %token tRPAREN /* ) */
  296. %token tLBRACK /* [ */
  297. %token tLBRACE /* { */
  298. %token tLBRACE_ARG /* { */
  299. %token tSTAR /* * */
  300. %token tAMPER /* & */
  301. %token tSYMBEG tSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG
  302. %token tSTRING_DBEG tSTRING_DVAR tSTRING_END
  303. /*
  304. * precedence table
  305. */
  306. %nonassoc tLOWEST
  307. %nonassoc tLBRACE_ARG
  308. %nonassoc kIF_MOD kUNLESS_MOD kWHILE_MOD kUNTIL_MOD
  309. %left kOR kAND
  310. %right kNOT
  311. %nonassoc kDEFINED
  312. %right '=' tOP_ASGN
  313. %left kRESCUE_MOD
  314. %right '?' ':'
  315. %nonassoc tDOT2 tDOT3
  316. %left tOROP
  317. %left tANDOP
  318. %nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
  319. %left '>' tGEQ '<' tLEQ
  320. %left '|' '^'
  321. %left '&'
  322. %left tLSHFT tRSHFT
  323. %left '+' '-'
  324. %left '*' '/' '%'
  325. %right tUMINUS_NUM tUMINUS
  326. %right tPOW
  327. %right '!' '~' tUPLUS
  328. %token tLAST_TOKEN
  329. %%
  330. program : {
  331. vps->lex_state = EXPR_BEG;
  332. vps->variables = var_table_create();
  333. class_nest = 0;
  334. }
  335. compstmt
  336. {
  337. if ($2 && !compile_for_eval) {
  338. /* last expression should not be void */
  339. if (nd_type($2) != NODE_BLOCK) void_expr($2);
  340. else {
  341. NODE *node = $2;
  342. while (node->nd_next) {
  343. node = node->nd_next;
  344. }
  345. void_expr(node->nd_head);
  346. }
  347. }
  348. vps->top = block_append(parse_state, vps->top, $2);
  349. class_nest = 0;
  350. }
  351. ;
  352. bodystmt : compstmt
  353. opt_rescue
  354. opt_else
  355. opt_ensure
  356. {
  357. $$ = $1;
  358. if ($2) {
  359. $$ = NEW_RESCUE($1, $2, $3);
  360. }
  361. else if ($3) {
  362. rb_warn("else without rescue is useless");
  363. $$ = block_append(parse_state, $$, $3);
  364. }
  365. if ($4) {
  366. $$ = NEW_ENSURE($$, $4);
  367. }
  368. fixpos($$, $1);
  369. }
  370. ;
  371. compstmt : stmts opt_terms
  372. {
  373. void_stmts($1, parse_state);
  374. $$ = $1;
  375. }
  376. ;
  377. stmts : none
  378. | stmt
  379. {
  380. $$ = newline_node(parse_state, $1);
  381. }
  382. | stmts terms stmt
  383. {
  384. $$ = block_append(parse_state, $1, newline_node(parse_state, $3));
  385. }
  386. | error stmt
  387. {
  388. $$ = $2;
  389. }
  390. ;
  391. stmt : kALIAS fitem {vps->lex_state = EXPR_FNAME;} fitem
  392. {
  393. $$ = NEW_ALIAS($2, $4);
  394. }
  395. | kALIAS tGVAR tGVAR
  396. {
  397. $$ = NEW_VALIAS($2, $3);
  398. }
  399. | kALIAS tGVAR tBACK_REF
  400. {
  401. char buf[3];
  402. snprintf(buf, sizeof(buf), "$%c", (char)$3->nd_nth);
  403. $$ = NEW_VALIAS($2, rb_intern(buf));
  404. }
  405. | kALIAS tGVAR tNTH_REF
  406. {
  407. yyerror("can't make alias for the number variables");
  408. $$ = 0;
  409. }
  410. | kUNDEF undef_list
  411. {
  412. $$ = $2;
  413. }
  414. | stmt kIF_MOD expr_value
  415. {
  416. $$ = NEW_IF(cond($3, parse_state), $1, 0);
  417. fixpos($$, $3);
  418. if (cond_negative(&$$->nd_cond)) {
  419. $$->nd_else = $$->nd_body;
  420. $$->nd_body = 0;
  421. }
  422. }
  423. | stmt kUNLESS_MOD expr_value
  424. {
  425. $$ = NEW_UNLESS(cond($3, parse_state), $1, 0);
  426. fixpos($$, $3);
  427. if (cond_negative(&$$->nd_cond)) {
  428. $$->nd_body = $$->nd_else;
  429. $$->nd_else = 0;
  430. }
  431. }
  432. | stmt kWHILE_MOD expr_value
  433. {
  434. if ($1 && nd_type($1) == NODE_BEGIN) {
  435. $$ = NEW_WHILE(cond($3, parse_state), $1->nd_body, 0);
  436. }
  437. else {
  438. $$ = NEW_WHILE(cond($3, parse_state), $1, 1);
  439. }
  440. if (cond_negative(&$$->nd_cond)) {
  441. nd_set_type($$, NODE_UNTIL);
  442. }
  443. }
  444. | stmt kUNTIL_MOD expr_value
  445. {
  446. if ($1 && nd_type($1) == NODE_BEGIN) {
  447. $$ = NEW_UNTIL(cond($3, parse_state), $1->nd_body, 0);
  448. }
  449. else {
  450. $$ = NEW_UNTIL(cond($3, parse_state), $1, 1);
  451. }
  452. if (cond_negative(&$$->nd_cond)) {
  453. nd_set_type($$, NODE_WHILE);
  454. }
  455. }
  456. | stmt kRESCUE_MOD stmt
  457. {
  458. $$ = NEW_RESCUE($1, NEW_RESBODY(0,$3,0), 0);
  459. }
  460. | klBEGIN
  461. {
  462. if (in_def || in_single) {
  463. yyerror("BEGIN in method");
  464. }
  465. local_push(0);
  466. }
  467. '{' compstmt '}'
  468. {
  469. /*
  470. ruby_eval_tree_begin = block_append(ruby_eval_tree_begin,
  471. NEW_PREEXE($4));
  472. */
  473. local_pop();
  474. $$ = 0;
  475. }
  476. | klEND '{' compstmt '}'
  477. {
  478. if (in_def || in_single) {
  479. rb_warn("END in method; use at_exit");
  480. }
  481. $$ = NEW_ITER(0, NEW_POSTEXE(), $3);
  482. }
  483. | lhs '=' command_call
  484. {
  485. $$ = node_assign($1, $3, parse_state);
  486. }
  487. | mlhs '=' command_call
  488. {
  489. value_expr($3);
  490. $1->nd_value = ($1->nd_head) ? NEW_TO_ARY($3) : NEW_ARRAY($3);
  491. $$ = $1;
  492. }
  493. | var_lhs tOP_ASGN command_call
  494. {
  495. value_expr($3);
  496. if ($1) {
  497. ID vid = $1->nd_vid;
  498. if ($2 == tOROP) {
  499. $1->nd_value = $3;
  500. $$ = NEW_OP_ASGN_OR(gettable(vid), $1);
  501. if (is_asgn_or_id(vid)) {
  502. $$->nd_aid = vid;
  503. }
  504. }
  505. else if ($2 == tANDOP) {
  506. $1->nd_value = $3;
  507. $$ = NEW_OP_ASGN_AND(gettable(vid), $1);
  508. }
  509. else {
  510. $$ = $1;
  511. $$->nd_value = call_op(gettable(vid),$2,1,$3, parse_state);
  512. }
  513. }
  514. else {
  515. $$ = 0;
  516. }
  517. }
  518. | primary_value '[' aref_args ']' tOP_ASGN command_call
  519. {
  520. NODE *args;
  521. value_expr($6);
  522. args = NEW_LIST($6);
  523. if ($3 && nd_type($3) != NODE_ARRAY)
  524. $3 = NEW_LIST($3);
  525. $3 = list_append(parse_state, $3, NEW_NIL());
  526. list_concat(args, $3);
  527. if ($5 == tOROP) {
  528. $5 = 0;
  529. }
  530. else if ($5 == tANDOP) {
  531. $5 = 1;
  532. }
  533. $$ = NEW_OP_ASGN1($1, $5, args);
  534. fixpos($$, $1);
  535. }
  536. | primary_value '.' tIDENTIFIER tOP_ASGN command_call
  537. {
  538. value_expr($5);
  539. if ($4 == tOROP) {
  540. $4 = 0;
  541. }
  542. else if ($4 == tANDOP) {
  543. $4 = 1;
  544. }
  545. $$ = NEW_OP_ASGN2($1, $3, $4, $5);
  546. fixpos($$, $1);
  547. }
  548. | primary_value '.' tCONSTANT tOP_ASGN command_call
  549. {
  550. value_expr($5);
  551. if ($4 == tOROP) {
  552. $4 = 0;
  553. }
  554. else if ($4 == tANDOP) {
  555. $4 = 1;
  556. }
  557. $$ = NEW_OP_ASGN2($1, $3, $4, $5);
  558. fixpos($$, $1);
  559. }
  560. | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call
  561. {
  562. value_expr($5);
  563. if ($4 == tOROP) {
  564. $4 = 0;
  565. }
  566. else if ($4 == tANDOP) {
  567. $4 = 1;
  568. }
  569. $$ = NEW_OP_ASGN2($1, $3, $4, $5);
  570. fixpos($$, $1);
  571. }
  572. | backref tOP_ASGN command_call
  573. {
  574. rb_backref_error($1);
  575. $$ = 0;
  576. }
  577. | lhs '=' mrhs
  578. {
  579. $$ = node_assign($1, NEW_SVALUE($3), parse_state);
  580. }
  581. | mlhs '=' arg_value
  582. {
  583. $1->nd_value = ($1->nd_head) ? NEW_TO_ARY($3) : NEW_ARRAY($3);
  584. $$ = $1;
  585. }
  586. | mlhs '=' mrhs
  587. {
  588. $1->nd_value = $3;
  589. $$ = $1;
  590. }
  591. | expr
  592. ;
  593. expr : command_call
  594. | expr kAND expr
  595. {
  596. $$ = logop(NODE_AND, $1, $3, parse_state);
  597. }
  598. | expr kOR expr
  599. {
  600. $$ = logop(NODE_OR, $1, $3, parse_state);
  601. }
  602. | kNOT expr
  603. {
  604. $$ = NEW_NOT(cond($2, parse_state));
  605. }
  606. | '!' command_call
  607. {
  608. $$ = NEW_NOT(cond($2, parse_state));
  609. }
  610. | arg
  611. ;
  612. expr_value : expr
  613. {
  614. value_expr($$);
  615. $$ = $1;
  616. }
  617. ;
  618. command_call : command
  619. | block_command
  620. | kRETURN call_args
  621. {
  622. $$ = NEW_RETURN(ret_args(vps, $2));
  623. }
  624. | kBREAK call_args
  625. {
  626. $$ = NEW_BREAK(ret_args(vps, $2));
  627. }
  628. | kNEXT call_args
  629. {
  630. $$ = NEW_NEXT(ret_args(vps, $2));
  631. }
  632. ;
  633. block_command : block_call
  634. | block_call '.' operation2 command_args
  635. {
  636. $$ = new_call(parse_state, $1, $3, $4);
  637. }
  638. | block_call tCOLON2 operation2 command_args
  639. {
  640. $$ = new_call(parse_state, $1, $3, $4);
  641. }
  642. ;
  643. cmd_brace_block : tLBRACE_ARG
  644. {
  645. $<num>1 = ruby_sourceline;
  646. reset_block(vps);
  647. }
  648. opt_block_var { $<vars>$ = vps->block_vars; }
  649. compstmt
  650. '}'
  651. {
  652. $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
  653. nd_set_line($$, $<num>1);
  654. }
  655. ;
  656. command : operation command_args %prec tLOWEST
  657. {
  658. $$ = new_fcall(parse_state, $1, $2);
  659. fixpos($$, $2);
  660. }
  661. | operation command_args cmd_brace_block
  662. {
  663. $$ = new_fcall(parse_state, $1, $2);
  664. if ($3) {
  665. if (nd_type($$) == NODE_BLOCK_PASS) {
  666. rb_compile_error("both block arg and actual block given");
  667. }
  668. $3->nd_iter = $$;
  669. $$ = $3;
  670. }
  671. fixpos($$, $2);
  672. }
  673. | primary_value '.' operation2 command_args %prec tLOWEST
  674. {
  675. $$ = new_call(parse_state, $1, $3, $4);
  676. fixpos($$, $1);
  677. }
  678. | primary_value '.' operation2 command_args cmd_brace_block
  679. {
  680. $$ = new_call(parse_state, $1, $3, $4);
  681. if ($5) {
  682. if (nd_type($$) == NODE_BLOCK_PASS) {
  683. rb_compile_error("both block arg and actual block given");
  684. }
  685. $5->nd_iter = $$;
  686. $$ = $5;
  687. }
  688. fixpos($$, $1);
  689. }
  690. | primary_value tCOLON2 operation2 command_args %prec tLOWEST
  691. {
  692. $$ = new_call(parse_state, $1, $3, $4);
  693. fixpos($$, $1);
  694. }
  695. | primary_value tCOLON2 operation2 command_args cmd_brace_block
  696. {
  697. $$ = new_call(parse_state, $1, $3, $4);
  698. if ($5) {
  699. if (nd_type($$) == NODE_BLOCK_PASS) {
  700. rb_compile_error("both block arg and actual block given");
  701. }
  702. $5->nd_iter = $$;
  703. $$ = $5;
  704. }
  705. fixpos($$, $1);
  706. }
  707. | kSUPER command_args
  708. {
  709. $$ = new_super(parse_state, $2);
  710. fixpos($$, $2);
  711. }
  712. | kYIELD command_args
  713. {
  714. $$ = new_yield(parse_state, $2);
  715. fixpos($$, $2);
  716. }
  717. ;
  718. mlhs : mlhs_basic
  719. | tLPAREN mlhs_entry ')'
  720. {
  721. $$ = $2;
  722. }
  723. ;
  724. mlhs_entry : mlhs_basic
  725. | tLPAREN mlhs_entry ')'
  726. {
  727. $$ = NEW_MASGN(NEW_LIST($2), 0);
  728. }
  729. ;
  730. mlhs_basic : mlhs_head
  731. {
  732. $$ = NEW_MASGN($1, 0);
  733. }
  734. | mlhs_head mlhs_item
  735. {
  736. $$ = NEW_MASGN(list_append(parse_state, $1,$2), 0);
  737. }
  738. | mlhs_head tSTAR mlhs_node
  739. {
  740. $$ = NEW_MASGN($1, $3);
  741. }
  742. | mlhs_head tSTAR
  743. {
  744. $$ = NEW_MASGN($1, -1);
  745. }
  746. | tSTAR mlhs_node
  747. {
  748. $$ = NEW_MASGN(0, $2);
  749. }
  750. | tSTAR
  751. {
  752. $$ = NEW_MASGN(0, -1);
  753. }
  754. ;
  755. mlhs_item : mlhs_node
  756. | tLPAREN mlhs_entry ')'
  757. {
  758. $$ = $2;
  759. }
  760. ;
  761. mlhs_head : mlhs_item ','
  762. {
  763. $$ = NEW_LIST($1);
  764. }
  765. | mlhs_head mlhs_item ','
  766. {
  767. $$ = list_append(parse_state, $1, $2);
  768. }
  769. ;
  770. mlhs_node : variable
  771. {
  772. $$ = assignable($1, 0, parse_state);
  773. }
  774. | primary_value '[' aref_args ']'
  775. {
  776. $$ = aryset($1, $3, parse_state);
  777. }
  778. | primary_value '.' tIDENTIFIER
  779. {
  780. $$ = attrset($1, $3, parse_state);
  781. }
  782. | primary_value tCOLON2 tIDENTIFIER
  783. {
  784. $$ = attrset($1, $3, parse_state);
  785. }
  786. | primary_value '.' tCONSTANT
  787. {
  788. $$ = attrset($1, $3, parse_state);
  789. }
  790. | primary_value tCOLON2 tCONSTANT
  791. {
  792. if (in_def || in_single)
  793. yyerror("dynamic constant assignment");
  794. $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3));
  795. }
  796. | tCOLON3 tCONSTANT
  797. {
  798. if (in_def || in_single)
  799. yyerror("dynamic constant assignment");
  800. $$ = NEW_CDECL(0, 0, NEW_COLON3($2));
  801. }
  802. | backref
  803. {
  804. rb_backref_error($1);
  805. $$ = 0;
  806. }
  807. ;
  808. lhs : variable
  809. {
  810. $$ = assignable($1, 0, parse_state);
  811. }
  812. | primary_value '[' aref_args ']'
  813. {
  814. $$ = aryset($1, $3, parse_state);
  815. }
  816. | primary_value '.' tIDENTIFIER
  817. {
  818. $$ = attrset($1, $3, parse_state);
  819. }
  820. | primary_value tCOLON2 tIDENTIFIER
  821. {
  822. $$ = attrset($1, $3, parse_state);
  823. }
  824. | primary_value '.' tCONSTANT
  825. {
  826. $$ = attrset($1, $3, parse_state);
  827. }
  828. | primary_value tCOLON2 tCONSTANT
  829. {
  830. if (in_def || in_single)
  831. yyerror("dynamic constant assignment");
  832. $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3));
  833. }
  834. | tCOLON3 tCONSTANT
  835. {
  836. if (in_def || in_single)
  837. yyerror("dynamic constant assignment");
  838. $$ = NEW_CDECL(0, 0, NEW_COLON3($2));
  839. }
  840. | backref
  841. {
  842. rb_backref_error($1);
  843. $$ = 0;
  844. }
  845. ;
  846. cname : tIDENTIFIER
  847. {
  848. yyerror("class/module name must be CONSTANT");
  849. }
  850. | tCONSTANT
  851. ;
  852. cpath : tCOLON3 cname
  853. {
  854. $$ = NEW_COLON3($2);
  855. }
  856. | cname
  857. {
  858. $$ = NEW_COLON2(0, $$);
  859. }
  860. | primary_value tCOLON2 cname
  861. {
  862. $$ = NEW_COLON2($1, $3);
  863. }
  864. ;
  865. fname : tIDENTIFIER
  866. | tCONSTANT
  867. | tFID
  868. | op
  869. {
  870. vps->lex_state = EXPR_END;
  871. $$ = convert_op($1);
  872. }
  873. | reswords
  874. {
  875. vps->lex_state = EXPR_END;
  876. $$ = $<id>1;
  877. }
  878. ;
  879. fitem : fname
  880. | symbol
  881. ;
  882. undef_list : fitem
  883. {
  884. $$ = NEW_UNDEF($1);
  885. }
  886. | undef_list ',' {vps->lex_state = EXPR_FNAME;} fitem
  887. {
  888. $$ = block_append(parse_state, $1, NEW_UNDEF($4));
  889. }
  890. ;
  891. op : '|' { $$ = '|'; }
  892. | '^' { $$ = '^'; }
  893. | '&' { $$ = '&'; }
  894. | tCMP { $$ = tCMP; }
  895. | tEQ { $$ = tEQ; }
  896. | tEQQ { $$ = tEQQ; }
  897. | tMATCH { $$ = tMATCH; }
  898. | '>' { $$ = '>'; }
  899. | tGEQ { $$ = tGEQ; }
  900. | '<' { $$ = '<'; }
  901. | tLEQ { $$ = tLEQ; }
  902. | tLSHFT { $$ = tLSHFT; }
  903. | tRSHFT { $$ = tRSHFT; }
  904. | '+' { $$ = '+'; }
  905. | '-' { $$ = '-'; }
  906. | '*' { $$ = '*'; }
  907. | tSTAR { $$ = '*'; }
  908. | '/' { $$ = '/'; }
  909. | '%' { $$ = '%'; }
  910. | tPOW { $$ = tPOW; }
  911. | '~' { $$ = '~'; }
  912. | tUPLUS { $$ = tUPLUS; }
  913. | tUMINUS { $$ = tUMINUS; }
  914. | tAREF { $$ = tAREF; }
  915. | tASET { $$ = tASET; }
  916. | '`' { $$ = '`'; }
  917. ;
  918. reswords : k__LINE__ | k__FILE__ | klBEGIN | klEND
  919. | kALIAS | kAND | kBEGIN | kBREAK | kCASE | kCLASS | kDEF
  920. | kDEFINED | kDO | kELSE | kELSIF | kEND | kENSURE | kFALSE
  921. | kFOR | kIN | kMODULE | kNEXT | kNIL | kNOT
  922. | kOR | kREDO | kRESCUE | kRETRY | kRETURN | kSELF | kSUPER
  923. | kTHEN | kTRUE | kUNDEF | kWHEN | kYIELD
  924. | kIF_MOD | kUNLESS_MOD | kWHILE_MOD | kUNTIL_MOD | kRESCUE_MOD
  925. ;
  926. arg : lhs '=' arg
  927. {
  928. $$ = node_assign($1, $3, parse_state);
  929. }
  930. | lhs '=' arg kRESCUE_MOD arg
  931. {
  932. $$ = node_assign($1, NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0), parse_state);
  933. }
  934. | var_lhs tOP_ASGN arg
  935. {
  936. value_expr($3);
  937. if ($1) {
  938. ID vid = $1->nd_vid;
  939. if ($2 == tOROP) {
  940. $1->nd_value = $3;
  941. $$ = NEW_OP_ASGN_OR(gettable(vid), $1);
  942. if (is_asgn_or_id(vid)) {
  943. $$->nd_aid = vid;
  944. }
  945. }
  946. else if ($2 == tANDOP) {
  947. $1->nd_value = $3;
  948. $$ = NEW_OP_ASGN_AND(gettable(vid), $1);
  949. }
  950. else {
  951. $$ = $1;
  952. $$->nd_value = call_op(gettable(vid),$2,1,$3, parse_state);
  953. }
  954. }
  955. else {
  956. $$ = 0;
  957. }
  958. }
  959. | primary_value '[' aref_args ']' tOP_ASGN arg
  960. {
  961. NODE *args;
  962. value_expr($6);
  963. args = NEW_LIST($6);
  964. if ($3 && nd_type($3) != NODE_ARRAY)
  965. $3 = NEW_LIST($3);
  966. $3 = list_append(parse_state, $3, NEW_NIL());
  967. list_concat(args, $3);
  968. if ($5 == tOROP) {
  969. $5 = 0;
  970. }
  971. else if ($5 == tANDOP) {
  972. $5 = 1;
  973. }
  974. $$ = NEW_OP_ASGN1($1, $5, args);
  975. fixpos($$, $1);
  976. }
  977. | primary_value '.' tIDENTIFIER tOP_ASGN arg
  978. {
  979. value_expr($5);
  980. if ($4 == tOROP) {
  981. $4 = 0;
  982. }
  983. else if ($4 == tANDOP) {
  984. $4 = 1;
  985. }
  986. $$ = NEW_OP_ASGN2($1, $3, $4, $5);
  987. fixpos($$, $1);
  988. }
  989. | primary_value '.' tCONSTANT tOP_ASGN arg
  990. {
  991. value_expr($5);
  992. if ($4 == tOROP) {
  993. $4 = 0;
  994. }
  995. else if ($4 == tANDOP) {
  996. $4 = 1;
  997. }
  998. $$ = NEW_OP_ASGN2($1, $3, $4, $5);
  999. fixpos($$, $1);
  1000. }
  1001. | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg
  1002. {
  1003. value_expr($5);
  1004. if ($4 == tOROP) {
  1005. $4 = 0;
  1006. }
  1007. else if ($4 == tANDOP) {
  1008. $4 = 1;
  1009. }
  1010. $$ = NEW_OP_ASGN2($1, $3, $4, $5);
  1011. fixpos($$, $1);
  1012. }
  1013. | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
  1014. {
  1015. yyerror("constant re-assignment");
  1016. $$ = 0;
  1017. }
  1018. | tCOLON3 tCONSTANT tOP_ASGN arg
  1019. {
  1020. yyerror("constant re-assignment");
  1021. $$ = 0;
  1022. }
  1023. | backref tOP_ASGN arg
  1024. {
  1025. rb_backref_error($1);
  1026. $$ = 0;
  1027. }
  1028. | arg tDOT2 arg
  1029. {
  1030. value_expr($1);
  1031. value_expr($3);
  1032. $$ = NEW_DOT2($1, $3);
  1033. }
  1034. | arg tDOT3 arg
  1035. {
  1036. value_expr($1);
  1037. value_expr($3);
  1038. $$ = NEW_DOT3($1, $3);
  1039. }
  1040. | arg '+' arg
  1041. {
  1042. $$ = call_op($1, '+', 1, $3, parse_state);
  1043. }
  1044. | arg '-' arg
  1045. {
  1046. $$ = call_op($1, '-', 1, $3, parse_state);
  1047. }
  1048. | arg '*' arg
  1049. {
  1050. $$ = call_op($1, '*', 1, $3, parse_state);
  1051. }
  1052. | arg '/' arg
  1053. {
  1054. $$ = call_op($1, '/', 1, $3, parse_state);
  1055. }
  1056. | arg '%' arg
  1057. {
  1058. $$ = call_op($1, '%', 1, $3, parse_state);
  1059. }
  1060. | arg tPOW arg
  1061. {
  1062. $$ = call_op($1, tPOW, 1, $3, parse_state);
  1063. }
  1064. | tUMINUS_NUM tINTEGER tPOW arg
  1065. {
  1066. $$ = call_op(call_op($2, tPOW, 1, $4, parse_state), tUMINUS, 0, 0, parse_state);
  1067. }
  1068. | tUMINUS_NUM tFLOAT tPOW arg
  1069. {
  1070. $$ = call_op(call_op($2, tPOW, 1, $4, parse_state), tUMINUS, 0, 0, parse_state);
  1071. }
  1072. | tUPLUS arg
  1073. {
  1074. $$ = call_op($2, tUPLUS, 0, 0, parse_state);
  1075. }
  1076. | tUMINUS arg
  1077. {
  1078. $$ = call_op($2, tUMINUS, 0, 0, parse_state);
  1079. }
  1080. | arg '|' arg
  1081. {
  1082. $$ = call_op($1, '|', 1, $3, parse_state);
  1083. }
  1084. | arg '^' arg
  1085. {
  1086. $$ = call_op($1, '^', 1, $3, parse_state);
  1087. }
  1088. | arg '&' arg
  1089. {
  1090. $$ = call_op($1, '&', 1, $3, parse_state);
  1091. }
  1092. | arg tCMP arg
  1093. {
  1094. $$ = call_op($1, tCMP, 1, $3, parse_state);
  1095. }
  1096. | arg '>' arg
  1097. {
  1098. $$ = call_op($1, '>', 1, $3, parse_state);
  1099. }
  1100. | arg tGEQ arg
  1101. {
  1102. $$ = call_op($1, tGEQ, 1, $3, parse_state);
  1103. }
  1104. | arg '<' arg
  1105. {
  1106. $$ = call_op($1, '<', 1, $3, parse_state);
  1107. }
  1108. | arg tLEQ arg
  1109. {
  1110. $$ = call_op($1, tLEQ, 1, $3, parse_state);
  1111. }
  1112. | arg tEQ arg
  1113. {
  1114. $$ = call_op($1, tEQ, 1, $3, parse_state);
  1115. }
  1116. | arg tEQQ arg
  1117. {
  1118. $$ = call_op($1, tEQQ, 1, $3, parse_state);
  1119. }
  1120. | arg tNEQ arg
  1121. {
  1122. $$ = NEW_NOT(call_op($1, tEQ, 1, $3, parse_state));
  1123. }
  1124. | arg tMATCH arg
  1125. {
  1126. $$ = match_gen($1, $3, parse_state);
  1127. }
  1128. | arg tNMATCH arg
  1129. {
  1130. $$ = NEW_NOT(match_gen($1, $3, parse_state));
  1131. }
  1132. | '!' arg
  1133. {
  1134. $$ = NEW_NOT(cond($2, parse_state));
  1135. }
  1136. | '~' arg
  1137. {
  1138. $$ = call_op($2, '~', 0, 0, parse_state);
  1139. }
  1140. | arg tLSHFT arg
  1141. {
  1142. $$ = call_op($1, tLSHFT, 1, $3, parse_state);
  1143. }
  1144. | arg tRSHFT arg
  1145. {
  1146. $$ = call_op($1, tRSHFT, 1, $3, parse_state);
  1147. }
  1148. | arg tANDOP arg
  1149. {
  1150. $$ = logop(NODE_AND, $1, $3, parse_state);
  1151. }
  1152. | arg tOROP arg
  1153. {
  1154. $$ = logop(NODE_OR, $1, $3, parse_state);
  1155. }
  1156. | kDEFINED opt_nl {vps->in_defined = 1;} arg
  1157. {
  1158. vps->in_defined = 0;
  1159. $$ = NEW_DEFINED($4);
  1160. }
  1161. | arg '?' {vps->ternary_colon++;} arg ':' arg
  1162. {
  1163. $$ = NEW_IF(cond($1, parse_state), $4, $6);
  1164. fixpos($$, $1);
  1165. vps->ternary_colon--;
  1166. }
  1167. | primary
  1168. {
  1169. $$ = $1;
  1170. }
  1171. ;
  1172. arg_value : arg
  1173. {
  1174. value_expr($1);
  1175. $$ = $1;
  1176. }
  1177. ;
  1178. aref_args : none
  1179. | command opt_nl
  1180. {
  1181. rb_warn("parenthesize argument(s) for future version");
  1182. $$ = NEW_LIST($1);
  1183. }
  1184. | args trailer
  1185. {
  1186. $$ = $1;
  1187. }
  1188. | args ',' tSTAR arg opt_nl
  1189. {
  1190. value_expr($4);
  1191. $$ = arg_concat(parse_state, $1, $4);
  1192. }
  1193. | assocs trailer
  1194. {
  1195. $$ = NEW_LIST(NEW_HASH($1));
  1196. }
  1197. | tSTAR arg opt_nl
  1198. {
  1199. value_expr($2);
  1200. $$ = NEW_NEWLINE(NEW_SPLAT($2));
  1201. }
  1202. ;
  1203. paren_args : '(' none ')'
  1204. {
  1205. $$ = $2;
  1206. }
  1207. | '(' call_args opt_nl ')'
  1208. {
  1209. $$ = $2;
  1210. }
  1211. | '(' block_call opt_nl ')'
  1212. {
  1213. rb_warn("parenthesize argument for future version");
  1214. $$ = NEW_LIST($2);
  1215. }
  1216. | '(' args ',' block_call opt_nl ')'
  1217. {
  1218. rb_warn("parenthesize argument for future version");
  1219. $$ = list_append(parse_state, $2, $4);
  1220. }
  1221. ;
  1222. opt_paren_args : none
  1223. | paren_args
  1224. ;
  1225. call_args : command
  1226. {
  1227. rb_warn("parenthesize argument(s) for future version");
  1228. $$ = NEW_LIST($1);
  1229. }
  1230. | args opt_block_arg
  1231. {
  1232. $$ = arg_blk_pass($1, $2);
  1233. }
  1234. | args ',' tSTAR arg_value opt_block_arg
  1235. {
  1236. $$ = arg_concat(parse_state, $1, $4);
  1237. $$ = arg_blk_pass($$, $5);
  1238. }
  1239. | assocs opt_block_arg
  1240. {
  1241. $$ = NEW_LIST(NEW_POSITIONAL($1));
  1242. $$ = arg_blk_pass($$, $2);
  1243. }
  1244. | assocs ',' tSTAR arg_value opt_block_arg
  1245. {
  1246. $$ = arg_concat(parse_state, NEW_LIST(NEW_POSITIONAL($1)), $4);
  1247. $$ = arg_blk_pass($$, $5);
  1248. }
  1249. | args ',' assocs opt_block_arg
  1250. {
  1251. $$ = list_append(parse_state, $1, NEW_POSITIONAL($3));
  1252. $$ = arg_blk_pass($$, $4);
  1253. }
  1254. | args ',' assocs ',' tSTAR arg opt_block_arg
  1255. {
  1256. value_expr($6);
  1257. $$ = arg_concat(parse_state, list_append(parse_state, $1, NEW_POSITIONAL($3)), $6);
  1258. $$ = arg_blk_pass($$, $7);
  1259. }
  1260. | tSTAR arg_value opt_block_arg
  1261. {
  1262. $$ = arg_blk_pass(NEW_SPLAT($2), $3);
  1263. }
  1264. | block_arg
  1265. ;
  1266. call_args2 : arg_value ',' args opt_block_arg
  1267. {
  1268. $$ = arg_blk_pass(list_concat(NEW_LIST($1),$3), $4);
  1269. }
  1270. | arg_value ',' block_arg
  1271. {
  1272. $$ = arg_blk_pass($1, $3);
  1273. }
  1274. | arg_value ',' tSTAR arg_value opt_block_arg
  1275. {
  1276. $$ = arg_concat(parse_state, NEW_LIST($1), $4);
  1277. $$ = arg_blk_pass($$, $5);
  1278. }
  1279. | arg_value ',' args ',' tSTAR arg_value opt_block_arg
  1280. {
  1281. $$ = arg_concat(parse_state, list_concat(NEW_LIST($1),$3), $6);
  1282. $$ = arg_blk_pass($$, $7);
  1283. }
  1284. | assocs opt_block_arg
  1285. {
  1286. $$ = NEW_LIST(NEW_POSITIONAL($1));
  1287. $$ = arg_blk_pass($$, $2);
  1288. }
  1289. | assocs ',' tSTAR arg_value opt_block_arg
  1290. {
  1291. $$ = arg_concat(parse_state, NEW_LIST(NEW_POSITIONAL($1)), $4);
  1292. $$ = arg_blk_pass($$, $5);
  1293. }
  1294. | arg_value ',' assocs opt_block_arg
  1295. {
  1296. $$ = list_append(parse_state, NEW_LIST($1), NEW_POSITIONAL($3));
  1297. $$ = arg_blk_pass($$, $4);
  1298. }
  1299. | arg_value ',' args ',' assocs opt_block_arg
  1300. {
  1301. $$ = list_append(parse_state, list_concat(NEW_LIST($1),$3), NEW_POSITIONAL($5));
  1302. $$ = arg_blk_pass($$, $6);
  1303. }
  1304. | arg_value ',' assocs ',' tSTAR arg_value opt_block_arg
  1305. {
  1306. $$ = arg_concat(parse_state, list_append(parse_state, NEW_LIST($1), NEW_POSITIONAL($3)), $6);
  1307. $$ = arg_blk_pass($$, $7);
  1308. }
  1309. | arg_value ',' args ',' assocs ',' tSTAR arg_value opt_block_arg
  1310. {
  1311. $$ = arg_concat(parse_state, list_append(parse_state, list_concat(NEW_LIST($1), $3), NEW_POSITIONAL($5)), $8);
  1312. $$ = arg_blk_pass($$, $9);
  1313. }
  1314. | tSTAR arg_value opt_block_arg
  1315. {
  1316. $$ = arg_blk_pass(NEW_SPLAT($2), $3);
  1317. }
  1318. | block_arg
  1319. ;
  1320. command_args : {
  1321. $<num>$ = vps->cmdarg_stack;
  1322. CMDARG_PUSH(1);
  1323. }
  1324. open_args
  1325. {
  1326. /* CMDARG_POP() */
  1327. vps->cmdarg_stack = $<num>1;
  1328. $$ = $2;
  1329. }
  1330. ;
  1331. open_args : call_args
  1332. | tLPAREN_ARG {vps->lex_state = EXPR_ENDARG;} ')'
  1333. {
  1334. rb_warn("don't put space before argument parentheses");
  1335. $$ = 0;
  1336. }
  1337. | tLPAREN_ARG call_args2 {vps->lex_state = EXPR_ENDARG;} ')'
  1338. {
  1339. rb_warn("don't put space before argument parentheses");
  1340. $$ = $2;
  1341. }
  1342. ;
  1343. block_arg : tAMPER arg_value
  1344. {
  1345. $$ = NEW_BLOCK_PASS($2);
  1346. }
  1347. ;
  1348. opt_block_arg : ',' block_arg
  1349. {
  1350. $$ = $2;
  1351. }
  1352. | none
  1353. ;
  1354. args : arg_value
  1355. {
  1356. $$ = NEW_LIST($1);
  1357. }
  1358. | args ',' arg_value
  1359. {
  1360. $$ = list_append(parse_state, $1, $3);
  1361. }
  1362. ;
  1363. mrhs : args ',' arg_value
  1364. {
  1365. $$ = list_append(parse_state, $1, $3);
  1366. }
  1367. | args ',' tSTAR arg_value
  1368. {
  1369. $$ = arg_concat(parse_state, $1, $4);
  1370. }
  1371. | tSTAR arg_value
  1372. {
  1373. $$ = NEW_SPLAT($2);
  1374. }
  1375. ;
  1376. primary : literal
  1377. | strings
  1378. | xstring
  1379. | regexp
  1380. | words
  1381. | qwords
  1382. | var_ref
  1383. | backref
  1384. | tFID
  1385. {
  1386. $$ = NEW_FCALL($1, 0);
  1387. }
  1388. | kBEGIN
  1389. {
  1390. $<num>1 = ruby_sourceline;
  1391. }
  1392. bodystmt
  1393. kEND
  1394. {
  1395. if ($3 == NULL)
  1396. $$ = NEW_NIL();
  1397. else
  1398. $$ = NEW_BEGIN($3);
  1399. nd_set_line($$, $<num>1);
  1400. }
  1401. | tLPAREN_ARG expr {vps->lex_state = EXPR_ENDARG;} opt_nl ')'
  1402. {
  1403. rb_warning("(...) interpreted as grouped expression");
  1404. $$ = $2;
  1405. }
  1406. | tLPAREN compstmt ')'
  1407. {
  1408. $$ = $2;
  1409. }
  1410. | primary_value tCOLON2 tCONSTANT
  1411. {
  1412. $$ = NEW_COLON2($1, $3);
  1413. }
  1414. | tCOLON3 tCONSTANT
  1415. {
  1416. $$ = NEW_COLON3($2);
  1417. }
  1418. | primary_value '[' aref_args ']'
  1419. {
  1420. if ($1 && nd_type($1) == NODE_SELF) {
  1421. $$ = NEW_FCALL(convert_op(tAREF), $3);
  1422. } else {
  1423. $$ = NEW_CALL($1, convert_op(tAREF), $3);
  1424. }
  1425. fixpos($$, $1);
  1426. }
  1427. | tLBRACK aref_args ']'
  1428. {
  1429. if ($2 == 0) {
  1430. $$ = NEW_ZARRAY(); /* zero length array*/
  1431. }
  1432. else {
  1433. $$ = $2;
  1434. }
  1435. }
  1436. | tLBRACE assoc_list '}'
  1437. {
  1438. $$ = NEW_HASH($2);
  1439. }
  1440. | kRETURN
  1441. {
  1442. $$ = NEW_RETURN(0);
  1443. }
  1444. | kYIELD '(' call_args ')'
  1445. {
  1446. $$ = new_yield(parse_state, $3);
  1447. }
  1448. | kYIELD '(' ')'
  1449. {
  1450. $$ = NEW_YIELD(0, Qfalse);
  1451. }
  1452. | kYIELD
  1453. {
  1454. $$ = NEW_YIELD(0, Qfalse);
  1455. }
  1456. | kDEFINED opt_nl '(' {vps->in_defined = 1;} expr ')'
  1457. {
  1458. vps->in_defined = 0;
  1459. $$ = NEW_DEFINED($5);
  1460. }
  1461. | operation brace_block
  1462. {
  1463. $2->nd_iter = NEW_FCALL($1, 0);
  1464. $$ = $2;
  1465. fixpos($2->nd_iter, $2);
  1466. }
  1467. | method_call
  1468. | method_call brace_block
  1469. {
  1470. if ($1 && nd_type($1) == NODE_BLOCK_PASS) {
  1471. rb_compile_error("both block arg and actual block given");
  1472. }
  1473. $2->nd_iter = $1;
  1474. $$ = $2;
  1475. fixpos($$, $1);
  1476. }
  1477. | kIF expr_value then
  1478. compstmt
  1479. if_tail
  1480. kEND
  1481. {
  1482. $$ = NEW_IF(cond($2, parse_state), $4, $5);
  1483. fixpos($$, $2);
  1484. if (cond_negative(&$$->nd_cond)) {
  1485. NODE *tmp = $$->nd_body;
  1486. $$->nd_body = $$->nd_else;
  1487. $$->nd_else = tmp;
  1488. }
  1489. }
  1490. | kUNLESS expr_value then
  1491. compstmt
  1492. opt_else
  1493. kEND
  1494. {
  1495. $$ = NEW_UNLESS(cond($2, parse_state), $4, $5);
  1496. fixpos($$, $2);
  1497. if (cond_negative(&$$->nd_cond)) {
  1498. NODE *tmp = $$->nd_body;
  1499. $$->nd_body = $$->nd_else;
  1500. $$->nd_else = tmp;
  1501. }
  1502. }
  1503. | kWHILE {COND_PUSH(1);} expr_value do {COND_POP();}
  1504. compstmt
  1505. kEND
  1506. {
  1507. $$ = NEW_WHILE(cond($3, parse_state), $6, 1);
  1508. fixpos($$, $3);
  1509. if (cond_negative(&$$->nd_cond)) {
  1510. nd_set_type($$, NODE_UNTIL);
  1511. }
  1512. }
  1513. | kUNTIL {COND_PUSH(1);} expr_value do {COND_POP();}
  1514. compstmt
  1515. kEND
  1516. {
  1517. $$ = NEW_UNTIL(cond($3, parse_state), $6, 1);
  1518. fixpos($$, $3);
  1519. if (cond_negative(&$$->nd_cond)) {
  1520. nd_set_type($$, NODE_WHILE);
  1521. }
  1522. }
  1523. | kCASE expr_value opt_terms
  1524. case_body
  1525. kEND
  1526. {
  1527. $$ = NEW_CASE($2, $4);
  1528. fixpos($$, $2);
  1529. }
  1530. | kCASE opt_terms case_body kEND
  1531. {
  1532. $$ = $3;
  1533. }
  1534. | kCASE opt_terms kELSE compstmt kEND
  1535. {
  1536. $$ = $4;
  1537. }
  1538. | kFOR block_var kIN {COND_PUSH(1);} expr_value do {COND_POP();}
  1539. compstmt
  1540. kEND
  1541. {
  1542. $$ = NEW_FOR($2, $5, $8);
  1543. fixpos($$, $2);
  1544. }
  1545. | kCLASS cpath superclass
  1546. {
  1547. if (in_def || in_single)
  1548. yyerror("class definition in method body");
  1549. class_nest++;
  1550. local_push(0);
  1551. $<num>$ = ruby_sourceline;
  1552. }
  1553. bodystmt
  1554. kEND
  1555. {
  1556. $$ = NEW_CLASS($2, $5, $3);
  1557. nd_set_line($$, $<num>4);
  1558. local_pop();
  1559. class_nest--;
  1560. }
  1561. | kCLASS tLSHFT expr
  1562. {
  1563. $<num>$ = in_def;
  1564. in_def = 0;
  1565. }
  1566. term
  1567. {
  1568. $<num>$ = in_single;
  1569. in_single = 0;
  1570. class_nest++;
  1571. local_push(0);
  1572. }
  1573. bodystmt
  1574. kEND
  1575. {
  1576. $$ = NEW_SCLASS($3, $7);
  1577. fixpos($$, $3);
  1578. local_pop();
  1579. class_nest--;
  1580. in_def = $<num>4;
  1581. in_single = $<num>6;
  1582. }
  1583. | kMODULE cpath
  1584. {
  1585. if (in_def || in_single)
  1586. yyerror("module definition in method body");
  1587. class_nest++;
  1588. local_push(0);
  1589. $<num>$ = ruby_sourceline;
  1590. }
  1591. bodystmt
  1592. kEND
  1593. {
  1594. $$ = NEW_MODULE($2, $4);
  1595. nd_set_line($$, $<num>3);
  1596. local_pop();
  1597. class_nest--;
  1598. }
  1599. | kDEF fname
  1600. {
  1601. $<id>$ = cur_mid;
  1602. cur_mid = $2;
  1603. in_def++;
  1604. local_push(0);
  1605. }
  1606. f_arglist
  1607. bodystmt
  1608. kEND
  1609. {
  1610. if (!$5) $5 = NEW_NIL();
  1611. $$ = NEW_DEFN($2, $4, $5, NOEX_PRIVATE);
  1612. fixpos($$, $4);
  1613. local_pop();
  1614. in_def--;
  1615. cur_mid = $<id>3;
  1616. }
  1617. | kDEF singleton dot_or_colon {vps->lex_state = EXPR_FNAME;} fname
  1618. {
  1619. in_single++;
  1620. local_push(0);
  1621. vps->lex_state = EXPR_END; /* force for args */
  1622. }
  1623. f_arglist
  1624. bodystmt
  1625. kEND
  1626. {
  1627. $$ = NEW_DEFS($2, $5, $7, $8);
  1628. fixpos($$, $2);
  1629. local_pop();
  1630. in_single--;
  1631. }
  1632. | kBREAK
  1633. {
  1634. $$ = NEW_BREAK(0);
  1635. }
  1636. | kNEXT
  1637. {
  1638. $$ = NEW_NEXT(0);
  1639. }
  1640. | kREDO
  1641. {
  1642. $$ = NEW_REDO();
  1643. }
  1644. | kRETRY
  1645. {
  1646. $$ = NEW_RETRY();
  1647. }
  1648. ;
  1649. primary_value : primary
  1650. {
  1651. value_expr($1);
  1652. $$ = $1;
  1653. }
  1654. ;
  1655. then : term
  1656. | ':'
  1657. | kTHEN
  1658. | term kTHEN
  1659. ;
  1660. do : term
  1661. | ':'
  1662. | kDO_COND
  1663. ;
  1664. if_tail : opt_else
  1665. | kELSIF expr_value then
  1666. compstmt
  1667. if_tail
  1668. {
  1669. $$ = NEW_IF(cond($2, parse_state), $4, $5);
  1670. fixpos($$, $2);
  1671. }
  1672. ;
  1673. opt_else : none
  1674. | kELSE compstmt
  1675. {
  1676. $$ = $2;
  1677. }
  1678. ;
  1679. block_var : lhs
  1680. | mlhs
  1681. ;
  1682. opt_block_var : none
  1683. | '|' /* none */ '|'
  1684. {
  1685. $$ = (NODE*)1;
  1686. }
  1687. | tOROP
  1688. {
  1689. $$ = (NODE*)1;
  1690. }
  1691. | '|' block_var '|'
  1692. {
  1693. $$ = $2;
  1694. }
  1695. ;
  1696. do_block : kDO_BLOCK
  1697. {
  1698. $<num>1 = ruby_sourceline;
  1699. reset_block(vps);
  1700. }
  1701. opt_block_var
  1702. {
  1703. $<vars>$ = vps->block_vars;
  1704. }
  1705. compstmt
  1706. kEND
  1707. {
  1708. $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
  1709. nd_set_line($$, $<num>1);
  1710. }
  1711. ;
  1712. block_call : command do_block
  1713. {
  1714. if ($1 && nd_type($1) == NODE_BLOCK_PASS) {
  1715. rb_compile_error("both block arg and actual block given");
  1716. }
  1717. $2->nd_iter = $1;
  1718. $$ = $2;
  1719. fixpos($$, $1);
  1720. }
  1721. | block_call '.' operation2 opt_paren_args
  1722. {
  1723. $$ = new_call(parse_state, $1, $3, $4);
  1724. }
  1725. | block_call tCOLON2 operation2 opt_paren_args
  1726. {
  1727. $$ = new_call(parse_state, $1, $3, $4);
  1728. }
  1729. ;
  1730. method_call : operation paren_args
  1731. {
  1732. $$ = new_fcall(parse_state, $1, $2);
  1733. fixpos($$, $2);
  1734. }
  1735. | primary_value '.' operation2 opt_paren_args
  1736. {
  1737. $$ = new_call(parse_state, $1, $3, $4);
  1738. fixpos($$, $1);
  1739. }
  1740. | primary_value tCOLON2 operation2 paren_args
  1741. {
  1742. $$ = new_call(parse_state, $1, $3, $4);
  1743. fixpos($$, $1);
  1744. }
  1745. | primary_value tCOLON2 operation3
  1746. {
  1747. $$ = new_call(parse_state, $1, $3, 0);
  1748. }
  1749. | primary_value '\\' operation2
  1750. {
  1751. $$ = NEW_CALL($1, rb_intern("get_reference"), NEW_LIST(NEW_LIT(ID2SYM($3))));
  1752. }
  1753. | tUBS operation2
  1754. {
  1755. $$ = NEW_FCALL(rb_intern("get_reference"), NEW_LIST(NEW_LIT(ID2SYM($2))));
  1756. }
  1757. | kSUPER paren_args
  1758. {
  1759. $$ = new_super(parse_state, $2);
  1760. }
  1761. | kSUPER
  1762. {
  1763. $$ = NEW_ZSUPER();
  1764. }
  1765. ;
  1766. brace_block : '{'
  1767. {
  1768. $<num>1 = ruby_sourceline;
  1769. reset_block(vps);
  1770. }
  1771. opt_block_var { $<vars>$ = vps->block_vars; }
  1772. compstmt '}'
  1773. {
  1774. $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
  1775. nd_set_line($$, $<num>1);
  1776. }
  1777. | kDO
  1778. {
  1779. $<num>1 = ruby_sourceline;
  1780. reset_block(vps);
  1781. }
  1782. opt_block_var { $<vars>$ = vps->block_vars; }
  1783. compstmt kEND
  1784. {
  1785. $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
  1786. nd_set_line($$, $<num>1);
  1787. }
  1788. ;
  1789. case_body : kWHEN when_args then
  1790. compstmt
  1791. cases
  1792. {
  1793. $$ = NEW_WHEN($2, $4, $5);
  1794. }
  1795. ;
  1796. when_args : args
  1797. | args ',' tSTAR arg_value
  1798. {
  1799. $$ = list_append(parse_state, $1, NEW_WHEN($4, 0, 0));
  1800. }
  1801. | tSTAR arg_value
  1802. {
  1803. $$ = NEW_LIST(NEW_WHEN($2, 0, 0));
  1804. }
  1805. ;
  1806. cases : opt_else
  1807. | case_body
  1808. ;
  1809. opt_rescue : kRESCUE exc_list exc_var then
  1810. compstmt
  1811. opt_rescue
  1812. {
  1813. if ($3) {
  1814. $3 = node_assign($3, NEW_GVAR(rb_intern("$!")), parse_state);
  1815. $5 = block_append(parse_state, $3, $5);
  1816. }
  1817. $$ = NEW_RESBODY($2, $5, $6);
  1818. fixpos($$, $2?$2:$5);
  1819. }
  1820. | none
  1821. ;
  1822. exc_list : arg_value
  1823. {
  1824. $$ = NEW_LIST($1);
  1825. }
  1826. | mrhs
  1827. | none
  1828. ;
  1829. exc_var : tASSOC lhs
  1830. {
  1831. $$ = $2;
  1832. }
  1833. | none
  1834. ;
  1835. opt_ensure : kENSURE compstmt
  1836. {
  1837. if ($2)
  1838. $$ = $2;
  1839. else
  1840. /* place holder */
  1841. $$ = NEW_NIL();
  1842. }
  1843. | none
  1844. ;
  1845. literal : numeric
  1846. | symbol
  1847. {
  1848. $$ = NEW_LIT(ID2SYM($1));
  1849. }
  1850. | dsym
  1851. ;
  1852. strings : string
  1853. {
  1854. NODE *node = $1;
  1855. if (!node) {
  1856. node = NEW_STR(string_new(0, 0));
  1857. }
  1858. else {
  1859. node = evstr2dstr(parse_state, node);
  1860. }
  1861. $$ = node;
  1862. }
  1863. ;
  1864. string : string1
  1865. | string string1
  1866. {
  1867. $$ = literal_concat(parse_state, $1, $2);
  1868. }
  1869. ;
  1870. string1 : tSTRING_BEG string_contents tSTRING_END
  1871. {
  1872. $$ = $2;
  1873. }
  1874. ;
  1875. xstring : tXSTRING_BEG xstring_contents tSTRING_END
  1876. {
  1877. ID code = $1;
  1878. NODE *node = $2;
  1879. if (!node) {
  1880. node = NEW_XSTR(string_new(0, 0));
  1881. }
  1882. else {
  1883. switch (nd_type(node)) {
  1884. case NODE_STR:
  1885. nd_set_type(node, NODE_XSTR);
  1886. break;
  1887. case NODE_DSTR:
  1888. nd_set_type(node, NODE_DXSTR);
  1889. break;
  1890. default:
  1891. node = NEW_NODE(NODE_DXSTR, string_new(0, 0), 1, NEW_LIST(node));
  1892. break;
  1893. }
  1894. }
  1895. if(code) {
  1896. node->u2.id = code;
  1897. } else {
  1898. node->u2.id = 0;
  1899. }
  1900. $$ = node;
  1901. }
  1902. ;
  1903. regexp : tREGEXP_BEG xstring_contents tREGEXP_END
  1904. {
  1905. intptr_t options = $3;
  1906. NODE *node = $2;
  1907. if (!node) {
  1908. node = NEW_REGEX(string_new2(""), options & ~RE_OPTION_ONCE);
  1909. }
  1910. else switch (nd_type(node)) {
  1911. case NODE_STR:
  1912. {
  1913. nd_set_type(node, NODE_REGEX);
  1914. node->nd_cnt = options & ~RE_OPTION_ONCE;
  1915. /*
  1916. node->nd_lit = rb_reg_new(RSTRING(src)->ptr,
  1917. RSTRING(src)->len,
  1918. options & ~RE_OPTION_ONCE);
  1919. */
  1920. }
  1921. break;
  1922. default:
  1923. node = NEW_NODE(NODE_DSTR, string_new(0, 0), 1, NEW_LIST(node));
  1924. case NODE_DSTR:
  1925. if (options & RE_OPTION_ONCE) {
  1926. nd_set_type(node, NODE_DREGX_ONCE);
  1927. }
  1928. else {
  1929. nd_set_type(node, NODE_DREGX);
  1930. }
  1931. node->nd_cflag = options & ~RE_OPTION_ONCE;
  1932. break;
  1933. }
  1934. $$ = node;
  1935. }
  1936. ;
  1937. words : tWORDS_BEG ' ' tSTRING_END
  1938. {
  1939. $$ = NEW_ZARRAY();
  1940. }
  1941. | tWORDS_BEG word_list tSTRING_END
  1942. {
  1943. $$ = $2;
  1944. }
  1945. ;
  1946. word_list : /* none */
  1947. {
  1948. $$ = 0;
  1949. }
  1950. | word_list word ' '
  1951. {
  1952. $$ = list_append(parse_state, $1, evstr2dstr(parse_state, $2));
  1953. }
  1954. ;
  1955. word : string_content
  1956. | word string_content
  1957. {
  1958. $$ = literal_concat(parse_state, $1, $2);
  1959. }
  1960. ;
  1961. qwords : tQWORDS_BEG ' ' tSTRING_END
  1962. {
  1963. $$ = NEW_ZARRAY();
  1964. }
  1965. | tQWORDS_BEG qword_list tSTRING_END
  1966. {
  1967. $$ = $2;
  1968. }
  1969. ;
  1970. qword_list : /* none */
  1971. {
  1972. $$ = 0;
  1973. }
  1974. | qword_list tSTRING_CONTENT ' '
  1975. {
  1976. $$ = list_append(parse_state, $1, $2);
  1977. }
  1978. ;
  1979. string_contents : /* none */
  1980. {
  1981. $$ = 0;
  1982. }
  1983. | string_contents string_content
  1984. {
  1985. $$ = literal_concat(parse_state, $1, $2);
  1986. }
  1987. ;
  1988. xstring_contents: /* none */
  1989. {
  1990. $$ = 0;
  1991. }
  1992. | xstring_contents string_content
  1993. {
  1994. $$ = literal_concat(parse_state, $1, $2);
  1995. }
  1996. ;
  1997. string_content : tSTRING_CONTENT
  1998. | tSTRING_DVAR
  1999. {
  2000. $<node>$ = lex_strterm;
  2001. lex_strterm = 0;
  2002. vps->lex_state = EXPR_BEG;
  2003. }
  2004. string_dvar
  2005. {
  2006. lex_strterm = $<node>2;
  2007. $$ = NEW_EVSTR($3);
  2008. }
  2009. | tSTRING_DBEG
  2010. {
  2011. $<node>$ = lex_strterm;
  2012. lex_strterm = 0;
  2013. vps->lex_state = EXPR_BEG;
  2014. COND_PUSH(0);
  2015. CMDARG_PUSH(0);
  2016. }
  2017. compstmt '}'
  2018. {
  2019. lex_strterm = $<node>2;
  2020. COND_LEXPOP();
  2021. CMDARG_LEXPOP();
  2022. if (($$ = $3) && nd_type($$) == NODE_NEWLINE) {
  2023. $$ = $$->nd_next;
  2024. }
  2025. $$ = new_evstr(parse_state, $$);
  2026. }
  2027. ;
  2028. string_dvar : tGVAR {$$ = NEW_GVAR($1);}
  2029. | tIVAR {$$ = NEW_IVAR($1);}
  2030. | tCVAR {$$ = NEW_CVAR($1);}
  2031. | backref
  2032. ;
  2033. symbol : tSYMBEG sym
  2034. {
  2035. vps->lex_state = EXPR_END;
  2036. $$ = $2;
  2037. }
  2038. ;
  2039. sym : fname
  2040. | tIVAR
  2041. | tGVAR
  2042. | tCVAR
  2043. ;
  2044. dsym : tSYMBEG xstring_contents tSTRING_END
  2045. {
  2046. vps->lex_state = EXPR_END;
  2047. if (!($$ = $2)) {
  2048. yyerror("empty symbol literal");
  2049. }
  2050. else {
  2051. switch (nd_type($$)) {
  2052. case NODE_DSTR:
  2053. nd_set_type($$, NODE_DSYM);
  2054. break;
  2055. case NODE_STR:
  2056. /* TODO: this line should never fail unless nd_str is binary */
  2057. if (strlen(bdatae($$->nd_str,"")) == blength($$->nd_str)) {
  2058. ID tmp = rb_intern(bdata($$->nd_str));
  2059. bdestroy($$->nd_str);
  2060. $$->nd_lit = ID2SYM(tmp);
  2061. nd_set_type($$, NODE_LIT);
  2062. break;
  2063. } else {
  2064. bdestroy($$->nd_str);
  2065. }
  2066. /* fall through */
  2067. default:
  2068. $$ = NEW_NODE(NODE_DSYM, string_new(0, 0), 1, NEW_LIST($$));
  2069. break;
  2070. }
  2071. }
  2072. }
  2073. ;
  2074. numeric : tINTEGER
  2075. | tFLOAT
  2076. | tUMINUS_NUM tINTEGER %prec tLOWEST
  2077. {
  2078. $$ = NEW_NEGATE($2);
  2079. }
  2080. | tUMINUS_NUM tFLOAT %prec tLOWEST
  2081. {
  2082. $$ = NEW_NEGATE($2);
  2083. }
  2084. ;
  2085. variable : tIDENTIFIER
  2086. | tIVAR
  2087. | tGVAR
  2088. | tCONSTANT
  2089. | tCVAR
  2090. | kNIL {$$ = kNIL;}
  2091. | kSELF {$$ = kSELF;}
  2092. | kTRUE {$$ = kTRUE;}
  2093. | kFALSE {$$ = kFALSE;}
  2094. | k__FILE__ {$$ = k__FILE__;}
  2095. | k__LINE__ {$$ = k__LINE__;}
  2096. ;
  2097. var_ref : variable
  2098. {
  2099. $$ = gettable($1);
  2100. }
  2101. ;
  2102. var_lhs : variable
  2103. {
  2104. $$ = assignable($1, 0, parse_state);
  2105. }
  2106. ;
  2107. backref : tNTH_REF
  2108. | tBACK_REF
  2109. ;
  2110. superclass : term
  2111. {
  2112. $$ = 0;
  2113. }
  2114. | '<'
  2115. {
  2116. vps->lex_state = EXPR_BEG;
  2117. }
  2118. expr_value term
  2119. {
  2120. $$ = $3;
  2121. }
  2122. | error term {yyerrok; $$ = 0;}
  2123. ;
  2124. f_arglist : '(' f_args opt_nl ')'
  2125. {
  2126. $$ = $2;
  2127. vps->lex_state = EXPR_BEG;
  2128. }
  2129. | f_args term
  2130. {
  2131. $$ = $1;
  2132. }
  2133. ;
  2134. f_args : f_arg ',' f_optarg ',' f_rest_arg opt_f_block_arg
  2135. {
  2136. $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, $3, $5), $6);
  2137. }
  2138. | f_arg ',' f_optarg opt_f_block_arg
  2139. {
  2140. $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, $3, -1), $4);
  2141. }
  2142. | f_arg ',' f_rest_arg opt_f_block_arg
  2143. {
  2144. $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, 0, $3), $4);
  2145. }
  2146. | f_arg opt_f_block_arg
  2147. {
  2148. $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, 0, -1), $2);
  2149. }
  2150. | f_optarg ',' f_rest_arg opt_f_block_arg
  2151. {
  2152. $$ = block_append(parse_state, NEW_ARGS(0, $1, $3), $4);
  2153. }
  2154. | f_optarg opt_f_block_arg
  2155. {
  2156. $$ = block_append(parse_state, NEW_ARGS(0, $1, -1), $2);
  2157. }
  2158. | f_rest_arg opt_f_block_arg
  2159. {
  2160. $$ = block_append(parse_state, NEW_ARGS(0, 0, $1), $2);
  2161. }
  2162. | f_block_arg
  2163. {
  2164. $$ = block_append(parse_state, NEW_ARGS(0, 0, -1), $1);
  2165. }
  2166. | /* none */
  2167. {
  2168. $$ = NEW_ARGS(0, 0, -1);
  2169. }
  2170. ;
  2171. f_norm_arg : tCONSTANT
  2172. {
  2173. yyerror("formal argument cannot be a constant");
  2174. }
  2175. | tIVAR
  2176. {
  2177. yyerror("formal argument cannot be an instance variable");
  2178. }
  2179. | tGVAR
  2180. {
  2181. yyerror("formal argument cannot be a global variable");
  2182. }
  2183. | tCVAR
  2184. {
  2185. yyerror("formal argument cannot be a class variable");
  2186. }
  2187. | tIDENTIFIER
  2188. {
  2189. if (!is_local_id($1))
  2190. yyerror("formal argument must be local variable");
  2191. else if (local_id($1))
  2192. yyerror("duplicate argument name");
  2193. local_cnt($1);
  2194. $$ = 1;
  2195. }
  2196. ;
  2197. f_arg : f_norm_arg
  2198. | f_arg ',' f_norm_arg
  2199. {
  2200. $$ += 1;
  2201. }
  2202. ;
  2203. f_opt : tIDENTIFIER '=' arg_value
  2204. {
  2205. if (!is_local_id($1))
  2206. yyerror("formal argument must be local variable");
  2207. else if (local_id($1))
  2208. yyerror("duplicate optional argument name");
  2209. $$ = assignable($1, $3, parse_state);
  2210. }
  2211. ;
  2212. f_optarg : f_opt
  2213. {
  2214. $$ = NEW_BLOCK($1);
  2215. $$->nd_end = $$;
  2216. }
  2217. | f_optarg ',' f_opt
  2218. {
  2219. $$ = block_append(parse_state, $1, $3);
  2220. }
  2221. ;
  2222. restarg_mark : '*'
  2223. | tSTAR
  2224. ;
  2225. f_rest_arg : restarg_mark tIDENTIFIER
  2226. {
  2227. if (!is_local_id($2))
  2228. yyerror("rest argument must be local variable");
  2229. else if (local_id($2))
  2230. yyerror("duplicate rest argument name");
  2231. $$ = local_cnt($2) + 1;
  2232. }
  2233. | restarg_mark
  2234. {
  2235. $$ = 0;
  2236. }
  2237. ;
  2238. blkarg_mark : '&'
  2239. | tAMPER
  2240. ;
  2241. f_block_arg : blkarg_mark tIDENTIFIER
  2242. {
  2243. if (!is_local_id($2))
  2244. yyerror("block argument must be local variable");
  2245. else if (local_id($2))
  2246. yyerror("duplicate block argument name");
  2247. $$ = NEW_BLOCK_ARG($2);
  2248. }
  2249. ;
  2250. opt_f_block_arg : ',' f_block_arg
  2251. {
  2252. $$ = $2;
  2253. }
  2254. | none
  2255. ;
  2256. singleton : var_ref
  2257. {
  2258. if (nd_type($1) == NODE_SELF) {
  2259. $$ = NEW_SELF();
  2260. }
  2261. else {
  2262. $$ = $1;
  2263. value_expr($$);
  2264. }
  2265. }
  2266. | '(' {vps->lex_state = EXPR_BEG;} expr opt_nl ')'
  2267. {
  2268. if ($3 == 0) {
  2269. yyerror("can't define singleton method for ().");
  2270. }
  2271. else {
  2272. switch (nd_type($3)) {
  2273. case NODE_STR:
  2274. case NODE_DSTR:
  2275. case NODE_XSTR:
  2276. case NODE_DXSTR:
  2277. case NODE_DREGX:
  2278. case NODE_LIT:
  2279. case NODE_ARRAY:
  2280. case NODE_ZARRAY:
  2281. yyerror("can't define singleton method for literals");
  2282. default:
  2283. value_expr($3);
  2284. break;
  2285. }
  2286. }
  2287. $$ = $3;
  2288. }
  2289. ;
  2290. assoc_list : none
  2291. | assocs trailer
  2292. {
  2293. $$ = $1;
  2294. }
  2295. | args trailer
  2296. {
  2297. if ($1->nd_alen%2 != 0) {
  2298. yyerror("odd number list for Hash");
  2299. }
  2300. $$ = $1;
  2301. }
  2302. ;
  2303. assocs : assoc
  2304. | assocs ',' assoc
  2305. {
  2306. $$ = list_concat($1, $3);
  2307. }
  2308. ;
  2309. assoc : arg_value tASSOC arg_value
  2310. {
  2311. $$ = list_append(parse_state, NEW_LIST($1), $3);
  2312. }
  2313. ;
  2314. operation : tIDENTIFIER
  2315. | tCONSTANT
  2316. | tFID
  2317. ;
  2318. operation2 : tIDENTIFIER
  2319. | tCONSTANT
  2320. | tFID
  2321. | op
  2322. ;
  2323. operation3 : tIDENTIFIER
  2324. | tFID
  2325. | op
  2326. ;
  2327. dot_or_colon : '.'
  2328. | tCOLON2
  2329. ;
  2330. opt_terms : /* none */
  2331. | terms
  2332. ;
  2333. opt_nl : /* none */
  2334. | '\n'
  2335. ;
  2336. trailer : /* none */
  2337. | '\n'
  2338. | ','
  2339. ;
  2340. term : ';' {yyerrok;}
  2341. | '\n'
  2342. ;
  2343. terms : term
  2344. | terms ';' {yyerrok;}
  2345. ;
  2346. none : /* none */ {$$ = 0;}
  2347. ;
  2348. %%
  2349. /* We remove any previous definition of `SIGN_EXTEND_CHAR',
  2350. since ours (we hope) works properly with all combinations of
  2351. machines, compilers, `char' and `unsigned char' argument types.
  2352. (Per Bothner suggested the basic approach.) */
  2353. #undef SIGN_EXTEND_CHAR
  2354. #if __STDC__
  2355. # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
  2356. #else /* not __STDC__ */
  2357. /* As in Harbison and Steele. */
  2358. # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
  2359. #endif
  2360. #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_' || ismbchar(c)))
  2361. #define LEAVE_BS 1
  2362. static int
  2363. syd_yyerror(msg, parse_state)
  2364. const char *msg;
  2365. rb_parse_state *parse_state;
  2366. {
  2367. create_error(parse_state, (char *)msg);
  2368. return 1;
  2369. }
  2370. static int
  2371. yycompile(parse_state, f, line)
  2372. rb_parse_state *parse_state;
  2373. char *f;
  2374. int line;
  2375. {
  2376. int n;
  2377. /* Setup an initial empty scope. */
  2378. heredoc_end = 0;
  2379. lex_strterm = 0;
  2380. ruby_sourcefile = f;
  2381. n = yyparse(parse_state);
  2382. ruby_debug_lines = 0;
  2383. compile_for_eval = 0;
  2384. parse_state->cond_stack = 0;
  2385. parse_state->cmdarg_stack = 0;
  2386. command_start = TRUE;
  2387. class_nest = 0;
  2388. in_single = 0;
  2389. in_def = 0;
  2390. cur_mid = 0;
  2391. lex_strterm = 0;
  2392. return n;
  2393. }
  2394. static bool
  2395. lex_get_str(rb_parse_state *parse_state)
  2396. {
  2397. const char *str;
  2398. const char *beg, *end, *pend;
  2399. int sz;
  2400. str = bdata(parse_state->lex_string);
  2401. beg = str;
  2402. if (parse_state->lex_str_used) {
  2403. if (blength(parse_state->lex_string) == parse_state->lex_str_used) {
  2404. return false;
  2405. }
  2406. beg += parse_state->lex_str_used;
  2407. }
  2408. pend = str + blength(parse_state->lex_string);
  2409. end = beg;
  2410. while(end < pend) {
  2411. if(*end++ == '\n') break;
  2412. }
  2413. sz = end - beg;
  2414. bcatblk(parse_state->line_buffer, beg, sz);
  2415. parse_state->lex_str_used += sz;
  2416. return TRUE;
  2417. }
  2418. void syd_add_to_parse_tree(STATE, OBJECT ary,
  2419. NODE * n, int newlines, ID * locals, int line_numbers);
  2420. static OBJECT convert_to_sexp(STATE, NODE *node, int newlines) {
  2421. OBJECT ary;
  2422. ary = array_new(state, 1);
  2423. syd_add_to_parse_tree(state, ary, node, newlines, NULL, FALSE);
  2424. return array_get(state, ary, 0);
  2425. }
  2426. static bool
  2427. lex_getline(rb_parse_state *parse_state)
  2428. {
  2429. if(!parse_state->line_buffer) {
  2430. parse_state->line_buffer = cstr2bstr("");
  2431. } else {
  2432. btrunc(parse_state->line_buffer, 0);
  2433. }
  2434. return parse_state->lex_gets(parse_state);
  2435. }
  2436. OBJECT
  2437. syd_compile_string(STATE, const char *f, bstring s, int line, int newlines)
  2438. {
  2439. int n;
  2440. rb_parse_state *parse_state;
  2441. OBJECT ret;
  2442. parse_state = alloc_parse_state();
  2443. parse_state->state = state;
  2444. parse_state->lex_string = s;
  2445. parse_state->lex_gets = lex_get_str;
  2446. parse_state->lex_pbeg = 0;
  2447. parse_state->lex_p = 0;
  2448. parse_state->lex_pend = 0;
  2449. parse_state->error = Qfalse;
  2450. ruby_sourceline = line - 1;
  2451. compile_for_eval = 1;
  2452. n = yycompile(parse_state, f, line);
  2453. if(parse_state->error == Qfalse) {
  2454. ret = convert_to_sexp(state, parse_state->top, newlines);
  2455. } else {
  2456. ret = parse_state->error;
  2457. }
  2458. pt_free(parse_state);
  2459. free(parse_state);
  2460. return ret;
  2461. }
  2462. static bool parse_io_gets(rb_parse_state *parse_state) {
  2463. if(feof(parse_state->lex_io)) {
  2464. return false;
  2465. }
  2466. while(TRUE) {
  2467. char *ptr, buf[1024];
  2468. int read;
  2469. ptr = fgets(buf, sizeof(buf), parse_state->lex_io);
  2470. if(!ptr) {
  2471. return false;
  2472. }
  2473. read = strlen(ptr);
  2474. bcatblk(parse_state->line_buffer, ptr, read);
  2475. /* check whether we read a full line */
  2476. if(!(read == (sizeof(buf) - 1) && ptr[read] != '\n')) {
  2477. break;
  2478. }
  2479. }
  2480. return TRUE;
  2481. }
  2482. OBJECT
  2483. syd_compile_file(STATE, const char *f, FILE *file, int start, int newlines)
  2484. {
  2485. int n;
  2486. OBJECT ret;
  2487. rb_parse_state *parse_state;
  2488. parse_state = alloc_parse_state();
  2489. parse_state->state = state;
  2490. parse_state->lex_io = file;
  2491. parse_state->lex_gets = parse_io_gets;
  2492. parse_state->lex_pbeg = 0;
  2493. parse_state->lex_p = 0;
  2494. parse_state->lex_pend = 0;
  2495. parse_state->error = Qfalse;
  2496. ruby_sourceline = start - 1;
  2497. n = yycompile(parse_state, f, start);
  2498. if(parse_state->error == Qfalse) {
  2499. ret = convert_to_sexp(state, parse_state->top, newlines);
  2500. } else {
  2501. ret = parse_state->error;
  2502. }
  2503. pt_free(parse_state);
  2504. free(parse_state);
  2505. return ret;
  2506. }
  2507. #define nextc() ps_nextc(parse_state)
  2508. static inline int
  2509. ps_nextc(rb_parse_state *parse_state)
  2510. {
  2511. int c;
  2512. if (parse_state->lex_p == parse_state->lex_pend) {
  2513. bstring v;
  2514. if (!lex_getline(parse_state)) return -1;
  2515. v = parse_state->line_buffer;
  2516. if (heredoc_end > 0) {
  2517. ruby_sourceline = heredoc_end;
  2518. heredoc_end = 0;
  2519. }
  2520. ruby_sourceline++;
  2521. /* This code is setup so that lex_pend can be compared to
  2522. the data in lex_lastline. Thats important, otherwise
  2523. the heredoc code breaks. */
  2524. if(parse_state->lex_lastline) {
  2525. bassign(parse_state->lex_lastline, v);
  2526. } else {
  2527. parse_state->lex_lastline = bstrcpy(v);
  2528. }
  2529. v = parse_state->lex_lastline;
  2530. parse_state->lex_pbeg = parse_state->lex_p = bdata(v);
  2531. parse_state->lex_pend = parse_state->lex_p + blength(v);
  2532. }
  2533. c = (unsigned char)*(parse_state->lex_p++);
  2534. if (c == '\r' && parse_state->lex_p < parse_state->lex_pend && *(parse_state->lex_p) == '\n') {
  2535. parse_state->lex_p++;
  2536. c = '\n';
  2537. parse_state->column = 0;
  2538. } else if(c == '\n') {
  2539. parse_state->column = 0;
  2540. } else {
  2541. parse_state->column++;
  2542. }
  2543. return c;
  2544. }
  2545. static void
  2546. pushback(c, parse_state)
  2547. int c;
  2548. rb_parse_state *parse_state;
  2549. {
  2550. if (c == -1) return;
  2551. parse_state->lex_p--;
  2552. }
  2553. /* Indicates if we're currently at the beginning of a line. */
  2554. #define was_bol() (parse_state->lex_p == parse_state->lex_pbeg + 1)
  2555. #define peek(c) (parse_state->lex_p != parse_state->lex_pend && (c) == *(parse_state->lex_p))
  2556. /* The token buffer. It's just a global string that has
  2557. functions to build up the string easily. */
  2558. #define tokfix() (tokenbuf[tokidx]='\0')
  2559. #define tok() tokenbuf
  2560. #define toklen() tokidx
  2561. #define toklast() (tokidx>0?tokenbuf[tokidx-1]:0)
  2562. static char*
  2563. newtok(rb_parse_state *parse_state)
  2564. {
  2565. tokidx = 0;
  2566. if (!tokenbuf) {
  2567. toksiz = 60;
  2568. tokenbuf = ALLOC_N(char, 60);
  2569. }
  2570. if (toksiz > 4096) {
  2571. toksiz = 60;
  2572. REALLOC_N(tokenbuf, char, 60);
  2573. }
  2574. return tokenbuf;
  2575. }
  2576. static void tokadd(char c, rb_parse_state *parse_state)
  2577. {
  2578. assert(tokidx < toksiz && tokidx >= 0);
  2579. tokenbuf[tokidx++] = c;
  2580. if (tokidx >= toksiz) {
  2581. toksiz *= 2;
  2582. REALLOC_N(tokenbuf, char, toksiz);
  2583. }
  2584. }
  2585. static int
  2586. read_escape(rb_parse_state *parse_state)
  2587. {
  2588. int c;
  2589. switch (c = nextc()) {
  2590. case '\\': /* Backslash */
  2591. return c;
  2592. case 'n': /* newline */
  2593. return '\n';
  2594. case 't': /* horizontal tab */
  2595. return '\t';
  2596. case 'r': /* carriage-return */
  2597. return '\r';
  2598. case 'f': /* form-feed */
  2599. return '\f';
  2600. case 'v': /* vertical tab */
  2601. return '\13';
  2602. case 'a': /* alarm(bell) */
  2603. return '\007';
  2604. case 'e': /* escape */
  2605. return 033;
  2606. case '0': case '1': case '2': case '3': /* octal constant */
  2607. case '4': case '5': case '6': case '7':
  2608. {
  2609. int numlen;
  2610. pushback(c, parse_state);
  2611. c = scan_oct(parse_state->lex_p, 3, &numlen);
  2612. parse_state->lex_p += numlen;
  2613. }
  2614. return c;
  2615. case 'x': /* hex constant */
  2616. {
  2617. int numlen;
  2618. c = scan_hex(parse_state->lex_p, 2, &numlen);
  2619. if (numlen == 0) {
  2620. yyerror("Invalid escape character syntax");
  2621. return 0;
  2622. }
  2623. parse_state->lex_p += numlen;
  2624. }
  2625. return c;
  2626. case 'b': /* backspace */
  2627. return '\010';
  2628. case 's': /* space */
  2629. return ' ';
  2630. case 'M':
  2631. if ((c = nextc()) != '-') {
  2632. yyerror("Invalid escape character syntax");
  2633. pushback(c, parse_state);
  2634. return '\0';
  2635. }
  2636. if ((c = nextc()) == '\\') {
  2637. return read_escape(parse_state) | 0x80;
  2638. }
  2639. else if (c == -1) goto eof;
  2640. else {
  2641. return ((c & 0xff) | 0x80);
  2642. }
  2643. case 'C':
  2644. if ((c = nextc()) != '-') {
  2645. yyerror("Invalid escape character syntax");
  2646. pushback(c, parse_state);
  2647. return '\0';
  2648. }
  2649. case 'c':
  2650. if ((c = nextc())== '\\') {
  2651. c = read_escape(parse_state);
  2652. }
  2653. else if (c == '?')
  2654. return 0177;
  2655. else if (c == -1) goto eof;
  2656. return c & 0x9f;
  2657. eof:
  2658. case -1:
  2659. yyerror("Invalid escape character syntax");
  2660. return '\0';
  2661. default:
  2662. return c;
  2663. }
  2664. }
  2665. static int
  2666. tokadd_escape(term, parse_state)
  2667. int term;
  2668. rb_parse_state *parse_state;
  2669. {
  2670. int c;
  2671. switch (c = nextc()) {
  2672. case '\n':
  2673. return 0; /* just ignore */
  2674. case '0': case '1': case '2': case '3': /* octal constant */
  2675. case '4': case '5': case '6': case '7':
  2676. {
  2677. int i;
  2678. tokadd((char)'\\', parse_state);
  2679. tokadd((char)c, parse_state);
  2680. for (i=0; i<2; i++) {
  2681. c = nextc();
  2682. if (c == -1) goto eof;
  2683. if (c < '0' || '7' < c) {
  2684. pushback(c, parse_state);
  2685. break;
  2686. }
  2687. tokadd((char)c, parse_state);
  2688. }
  2689. }
  2690. return 0;
  2691. case 'x': /* hex constant */
  2692. {
  2693. int numlen;
  2694. tokadd('\\', parse_state);
  2695. tokadd((char)c, parse_state);
  2696. scan_hex(parse_state->lex_p, 2, &numlen);
  2697. if (numlen == 0) {
  2698. yyerror("Invalid escape character syntax");
  2699. return -1;
  2700. }
  2701. while (numlen--)
  2702. tokadd((char)nextc(), parse_state);
  2703. }
  2704. return 0;
  2705. case 'M':
  2706. if ((c = nextc()) != '-') {
  2707. yyerror("Invalid escape character syntax");
  2708. pushback(c, parse_state);
  2709. return 0;
  2710. }
  2711. tokadd('\\',parse_state);
  2712. tokadd('M', parse_state);
  2713. tokadd('-', parse_state);
  2714. goto escaped;
  2715. case 'C':
  2716. if ((c = nextc()) != '-') {
  2717. yyerror("Invalid escape character syntax");
  2718. pushback(c, parse_state);
  2719. return 0;
  2720. }
  2721. tokadd('\\', parse_state);
  2722. tokadd('C', parse_state);
  2723. tokadd('-', parse_state);
  2724. goto escaped;
  2725. case 'c':
  2726. tokadd('\\', parse_state);
  2727. tokadd('c', parse_state);
  2728. escaped:
  2729. if ((c = nextc()) == '\\') {
  2730. return tokadd_escape(term, parse_state);
  2731. }
  2732. else if (c == -1) goto eof;
  2733. tokadd((char)c, parse_state);
  2734. return 0;
  2735. eof:
  2736. case -1:
  2737. yyerror("Invalid escape character syntax");
  2738. return -1;
  2739. default:
  2740. if (c != '\\' || c != term)
  2741. tokadd('\\', parse_state);
  2742. tokadd((char)c, parse_state);
  2743. }
  2744. return 0;
  2745. }
  2746. static int
  2747. regx_options(rb_parse_state *parse_state)
  2748. {
  2749. char kcode = 0;
  2750. int options = 0;
  2751. int c;
  2752. newtok(parse_state);
  2753. while (c = nextc(), ISALPHA(c)) {
  2754. switch (c) {
  2755. case 'i':
  2756. options |= RE_OPTION_IGNORECASE;
  2757. break;
  2758. case 'x':
  2759. options |= RE_OPTION_EXTENDED;
  2760. break;
  2761. case 'm':
  2762. options |= RE_OPTION_MULTILINE;
  2763. break;
  2764. case 'o':
  2765. options |= RE_OPTION_ONCE;
  2766. break;
  2767. case 'n':
  2768. kcode = 16;
  2769. break;
  2770. case 'e':
  2771. kcode = 32;
  2772. break;
  2773. case 's':
  2774. kcode = 48;
  2775. break;
  2776. case 'u':
  2777. kcode = 64;
  2778. break;
  2779. default:
  2780. tokadd((char)c, parse_state);
  2781. break;
  2782. }
  2783. }
  2784. pushback(c, parse_state);
  2785. if (toklen()) {
  2786. tokfix();
  2787. rb_compile_error("unknown regexp option%s - %s",
  2788. toklen() > 1 ? "s" : "", tok());
  2789. }
  2790. return options | kcode;
  2791. }
  2792. #define STR_FUNC_ESCAPE 0x01
  2793. #define STR_FUNC_EXPAND 0x02
  2794. #define STR_FUNC_REGEXP 0x04
  2795. #define STR_FUNC_QWORDS 0x08
  2796. #define STR_FUNC_SYMBOL 0x10
  2797. #define STR_FUNC_INDENT 0x20
  2798. enum string_type {
  2799. str_squote = (0),
  2800. str_dquote = (STR_FUNC_EXPAND),
  2801. str_xquote = (STR_FUNC_EXPAND),
  2802. str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND),
  2803. str_sword = (STR_FUNC_QWORDS),
  2804. str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND),
  2805. str_ssym = (STR_FUNC_SYMBOL),
  2806. str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND),
  2807. };
  2808. static int tokadd_string(int func, int term, int paren, int *nest, rb_parse_state *parse_state)
  2809. {
  2810. int c;
  2811. while ((c = nextc()) != -1) {
  2812. if (paren && c == paren) {
  2813. ++*nest;
  2814. }
  2815. else if (c == term) {
  2816. if (!nest || !*nest) {
  2817. pushback(c, parse_state);
  2818. break;
  2819. }
  2820. --*nest;
  2821. }
  2822. else if ((func & STR_FUNC_EXPAND) && c == '#' && parse_state->lex_p < parse_state->lex_pend) {
  2823. int c2 = *(parse_state->lex_p);
  2824. if (c2 == '$' || c2 == '@' || c2 == '{') {
  2825. pushback(c, parse_state);
  2826. break;
  2827. }
  2828. }
  2829. else if (c == '\\') {
  2830. c = nextc();
  2831. switch (c) {
  2832. case '\n':
  2833. if (func & STR_FUNC_QWORDS) break;
  2834. if (func & STR_FUNC_EXPAND) continue;
  2835. tokadd('\\', parse_state);
  2836. break;
  2837. case '\\':
  2838. if (func & STR_FUNC_ESCAPE) tokadd((char)c, parse_state);
  2839. break;
  2840. default:
  2841. if (func & STR_FUNC_REGEXP) {
  2842. pushback(c, parse_state);
  2843. if (tokadd_escape(term, parse_state) < 0)
  2844. return -1;
  2845. continue;
  2846. }
  2847. else if (func & STR_FUNC_EXPAND) {
  2848. pushback(c, parse_state);
  2849. if (func & STR_FUNC_ESCAPE) tokadd('\\', parse_state);
  2850. c = read_escape(parse_state);
  2851. }
  2852. else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
  2853. /* ignore backslashed spaces in %w */
  2854. }
  2855. else if (c != term && !(paren && c == paren)) {
  2856. tokadd('\\', parse_state);
  2857. }
  2858. }
  2859. }
  2860. else if (ismbchar(c)) {
  2861. int i, len = mbclen(c)-1;
  2862. for (i = 0; i < len; i++) {
  2863. tokadd((char)c, parse_state);
  2864. c = nextc();
  2865. }
  2866. }
  2867. else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
  2868. pushback(c, parse_state);
  2869. break;
  2870. }
  2871. if (!c && (func & STR_FUNC_SYMBOL)) {
  2872. func &= ~STR_FUNC_SYMBOL;
  2873. rb_compile_error("symbol cannot contain '\\0'");
  2874. continue;
  2875. }
  2876. tokadd((char)c, parse_state);
  2877. }
  2878. return c;
  2879. }
  2880. #define NEW_STRTERM(func, term, paren) \
  2881. syd_node_newnode(parse_state, NODE_STRTERM, (OBJECT)(func), (OBJECT)((term) | ((paren) << (CHAR_BIT * 2))), NULL)
  2882. #define pslval ((YYSTYPE *)parse_state->lval)
  2883. static int
  2884. parse_string(quote, parse_state)
  2885. NODE *quote;
  2886. rb_parse_state *parse_state;
  2887. {
  2888. int func = quote->nd_func;
  2889. int term = nd_term(quote);
  2890. int paren = nd_paren(quote);
  2891. int c, space = 0;
  2892. if (func == -1) return tSTRING_END;
  2893. c = nextc();
  2894. if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
  2895. do {c = nextc();} while (ISSPACE(c));
  2896. space = 1;
  2897. }
  2898. if (c == term && !quote->nd_nest) {
  2899. if (func & STR_FUNC_QWORDS) {
  2900. quote->nd_func = -1;
  2901. return ' ';
  2902. }
  2903. if (!(func & STR_FUNC_REGEXP)) return tSTRING_END;
  2904. pslval->num = regx_options(parse_state);
  2905. return tREGEXP_END;
  2906. }
  2907. if (space) {
  2908. pushback(c, parse_state);
  2909. return ' ';
  2910. }
  2911. newtok(parse_state);
  2912. if ((func & STR_FUNC_EXPAND) && c == '#') {
  2913. switch (c = nextc()) {
  2914. case '$':
  2915. case '@':
  2916. pushback(c, parse_state);
  2917. return tSTRING_DVAR;
  2918. case '{':
  2919. return tSTRING_DBEG;
  2920. }
  2921. tokadd('#', parse_state);
  2922. }
  2923. pushback(c, parse_state);
  2924. if (tokadd_string(func, term, paren, (int *)&quote->nd_nest, parse_state) == -1) {
  2925. ruby_sourceline = nd_line(quote);
  2926. rb_compile_error("unterminated string meets end of file");
  2927. return tSTRING_END;
  2928. }
  2929. tokfix();
  2930. pslval->node = NEW_STR(string_new(tok(), toklen()));
  2931. return tSTRING_CONTENT;
  2932. }
  2933. /* Called when the lexer detects a heredoc is beginning. This pulls
  2934. in more characters and detects what kind of heredoc it is. */
  2935. static int
  2936. heredoc_identifier(rb_parse_state *parse_state)
  2937. {
  2938. int c = nextc(), term, func = 0;
  2939. size_t len;
  2940. if (c == '-') {
  2941. c = nextc();
  2942. func = STR_FUNC_INDENT;
  2943. }
  2944. switch (c) {
  2945. case '\'':
  2946. func |= str_squote; goto quoted;
  2947. case '"':
  2948. func |= str_dquote; goto quoted;
  2949. case '`':
  2950. func |= str_xquote;
  2951. quoted:
  2952. /* The heredoc indent is quoted, so its easy to find, we just
  2953. continue to consume characters into the token buffer until
  2954. we hit the terminating character. */
  2955. newtok(parse_state);
  2956. tokadd((char)func, parse_state);
  2957. term = c;
  2958. /* Where of where has the term gone.. */
  2959. while ((c = nextc()) != -1 && c != term) {
  2960. len = mbclen(c);
  2961. do {
  2962. tokadd((char)c, parse_state);
  2963. } while (--len > 0 && (c = nextc()) != -1);
  2964. }
  2965. /* Ack! end of file or end of string. */
  2966. if (c == -1) {
  2967. rb_compile_error("unterminated here document identifier");
  2968. return 0;
  2969. }
  2970. break;
  2971. default:
  2972. /* Ok, this is an unquoted heredoc ident. We just consume
  2973. until we hit a non-ident character. */
  2974. /* Do a quick check that first character is actually valid.
  2975. if it's not, then this isn't actually a heredoc at all!
  2976. It sucks that it's way down here in this function that in
  2977. finally bails with this not being a heredoc.*/
  2978. if (!is_identchar(c)) {
  2979. pushback(c, parse_state);
  2980. if (func & STR_FUNC_INDENT) {
  2981. pushback('-', parse_state);
  2982. }
  2983. return 0;
  2984. }
  2985. /* Finally, setup the token buffer and begin to fill it. */
  2986. newtok(parse_state);
  2987. term = '"';
  2988. tokadd((char)(func |= str_dquote), parse_state);
  2989. do {
  2990. len = mbclen(c);
  2991. do { tokadd((char)c, parse_state); } while (--len > 0 && (c = nextc()) != -1);
  2992. } while ((c = nextc()) != -1 && is_identchar(c));
  2993. pushback(c, parse_state);
  2994. break;
  2995. }
  2996. /* Fixup the token buffer, ie set the last character to null. */
  2997. tokfix();
  2998. len = parse_state->lex_p - parse_state->lex_pbeg;
  2999. parse_state->lex_p = parse_state->lex_pend;
  3000. pslval->id = 0;
  3001. /* Tell the lexer that we're inside a string now. nd_lit is
  3002. the heredoc identifier that we watch the stream for to
  3003. detect the end of the heredoc. */
  3004. bstring str = bstrcpy(parse_state->lex_lastline);
  3005. lex_strterm = syd_node_newnode(parse_state, NODE_HEREDOC,
  3006. (OBJECT)string_new(tok(), toklen()), /* nd_lit */
  3007. (OBJECT)len, /* nd_nth */
  3008. (OBJECT)str); /* nd_orig */
  3009. return term == '`' ? tXSTRING_BEG : tSTRING_BEG;
  3010. }
  3011. static void
  3012. heredoc_restore(here, parse_state)
  3013. NODE *here;
  3014. rb_parse_state *parse_state;
  3015. {
  3016. bstring line = here->nd_orig;
  3017. bdestroy(parse_state->lex_lastline);
  3018. parse_state->lex_lastline = line;
  3019. parse_state->lex_pbeg = bdata(line);
  3020. parse_state->lex_pend = parse_state->lex_pbeg + blength(line);
  3021. parse_state->lex_p = parse_state->lex_pbeg + here->nd_nth;
  3022. heredoc_end = ruby_sourceline;
  3023. ruby_sourceline = nd_line(here);
  3024. bdestroy((bstring)here->nd_lit);
  3025. }
  3026. static int
  3027. whole_match_p(eos, len, indent, parse_state)
  3028. char *eos;
  3029. int len, indent;
  3030. rb_parse_state *parse_state;
  3031. {
  3032. char *p = parse_state->lex_pbeg;
  3033. int n;
  3034. if (indent) {
  3035. while (*p && ISSPACE(*p)) p++;
  3036. }
  3037. n = parse_state->lex_pend - (p + len);
  3038. if (n < 0 || (n > 0 && p[len] != '\n' && p[len] != '\r')) return FALSE;
  3039. if (strncmp(eos, p, len) == 0) return TRUE;
  3040. return FALSE;
  3041. }
  3042. /* Called when the lexer knows it's inside a heredoc. This function
  3043. is responsible for detecting an expandions (ie #{}) in the heredoc
  3044. and emitting a lex token and also detecting the end of the heredoc. */
  3045. static int
  3046. here_document(here, parse_state)
  3047. NODE *here;
  3048. rb_parse_state *parse_state;
  3049. {
  3050. int c, func, indent = 0;
  3051. char *eos, *p, *pend;
  3052. long len;
  3053. bstring str = NULL;
  3054. /* eos == the heredoc ident that we found when the heredoc started */
  3055. eos = bdata(here->nd_str);
  3056. len = blength(here->nd_str) - 1;
  3057. /* indicates if we should search for expansions. */
  3058. indent = (func = *eos++) & STR_FUNC_INDENT;
  3059. /* Ack! EOF or end of input string! */
  3060. if ((c = nextc()) == -1) {
  3061. error:
  3062. rb_compile_error("can't find string \"%s\" anywhere before EOF", eos);
  3063. heredoc_restore(lex_strterm, parse_state);
  3064. lex_strterm = 0;
  3065. return 0;
  3066. }
  3067. /* Gr. not yet sure what was_bol() means other than it seems like
  3068. it means only 1 character has been consumed. */
  3069. if (was_bol() && whole_match_p(eos, len, indent, parse_state)) {
  3070. heredoc_restore(lex_strterm, parse_state);
  3071. return tSTRING_END;
  3072. }
  3073. /* If aren't doing expansions, we can just scan until
  3074. we find the identifier. */
  3075. if ((func & STR_FUNC_EXPAND) == 0) {
  3076. do {
  3077. p = bdata(parse_state->lex_lastline);
  3078. pend = parse_state->lex_pend;
  3079. if (pend > p) {
  3080. switch (pend[-1]) {
  3081. case '\n':
  3082. if (--pend == p || pend[-1] != '\r') {
  3083. pend++;
  3084. break;
  3085. }
  3086. case '\r':
  3087. --pend;
  3088. }
  3089. }
  3090. if (str) {
  3091. bcatblk(str, p, pend - p);
  3092. } else {
  3093. str = blk2bstr(p, pend - p);
  3094. }
  3095. if (pend < parse_state->lex_pend) bcatblk(str, "\n", 1);
  3096. parse_state->lex_p = parse_state->lex_pend;
  3097. if (nextc() == -1) {
  3098. if (str) bdestroy(str);
  3099. goto error;
  3100. }
  3101. } while (!whole_match_p(eos, len, indent, parse_state));
  3102. }
  3103. else {
  3104. newtok(parse_state);
  3105. if (c == '#') {
  3106. switch (c = nextc()) {
  3107. case '$':
  3108. case '@':
  3109. pushback(c, parse_state);
  3110. return tSTRING_DVAR;
  3111. case '{':
  3112. return tSTRING_DBEG;
  3113. }
  3114. tokadd('#', parse_state);
  3115. }
  3116. /* Loop while we haven't found a the heredoc ident. */
  3117. do {
  3118. pushback(c, parse_state);
  3119. /* Scan up until a \n and fill in the token buffer. */
  3120. if ((c = tokadd_string(func, '\n', 0, NULL, parse_state)) == -1) goto error;
  3121. /* We finished scanning, but didn't find a \n, so we setup the node
  3122. and have the lexer file in more. */
  3123. if (c != '\n') {
  3124. pslval->node = NEW_STR(string_new(tok(), toklen()));
  3125. return tSTRING_CONTENT;
  3126. }
  3127. /* I think this consumes the \n */
  3128. tokadd((char)nextc(), parse_state);
  3129. if ((c = nextc()) == -1) goto error;
  3130. } while (!whole_match_p(eos, len, indent, parse_state));
  3131. str = string_new(tok(), toklen());
  3132. }
  3133. heredoc_restore(lex_strterm, parse_state);
  3134. lex_strterm = NEW_STRTERM(-1, 0, 0);
  3135. pslval->node = NEW_STR(str);
  3136. return tSTRING_CONTENT;
  3137. }
  3138. #include "shotgun/lib/grammar_lex.c.tab"
  3139. static void
  3140. arg_ambiguous()
  3141. {
  3142. rb_warning("ambiguous first argument; put parentheses or even spaces");
  3143. }
  3144. #define IS_ARG() (parse_state->lex_state == EXPR_ARG || parse_state->lex_state == EXPR_CMDARG)
  3145. static int
  3146. yylex(YYSTYPE *yylval, void *vstate)
  3147. {
  3148. register int c;
  3149. int space_seen = 0;
  3150. int cmd_state, comment_column;
  3151. struct rb_parse_state *parse_state;
  3152. bstring cur_line;
  3153. parse_state = (struct rb_parse_state*)vstate;
  3154. parse_state->lval = (void *)yylval;
  3155. /*
  3156. c = nextc();
  3157. printf("lex char: %c\n", c);
  3158. pushback(c, parse_state);
  3159. */
  3160. if (lex_strterm) {
  3161. int token;
  3162. if (nd_type(lex_strterm) == NODE_HEREDOC) {
  3163. token = here_document(lex_strterm, parse_state);
  3164. if (token == tSTRING_END) {
  3165. lex_strterm = 0;
  3166. parse_state->lex_state = EXPR_END;
  3167. }
  3168. }
  3169. else {
  3170. token = parse_string(lex_strterm, parse_state);
  3171. if (token == tSTRING_END || token == tREGEXP_END) {
  3172. lex_strterm = 0;
  3173. parse_state->lex_state = EXPR_END;
  3174. }
  3175. }
  3176. return token;
  3177. }
  3178. cmd_state = command_start;
  3179. command_start = FALSE;
  3180. retry:
  3181. switch (c = nextc()) {
  3182. case '\0': /* NUL */
  3183. case '\004': /* ^D */
  3184. case '\032': /* ^Z */
  3185. case -1: /* end of script. */
  3186. return 0;
  3187. /* white spaces */
  3188. case ' ': case '\t': case '\f': case '\r':
  3189. case '\13': /* '\v' */
  3190. space_seen++;
  3191. goto retry;
  3192. case '#': /* it's a comment */
  3193. if(parse_state->comments) {
  3194. comment_column = parse_state->column;
  3195. cur_line = bfromcstralloc(50, "");
  3196. while((c = nextc()) != '\n' && c != -1) {
  3197. bconchar(cur_line, c);
  3198. }
  3199. // FIXME: used to have the file and column too, but took it out.
  3200. ptr_array_append(parse_state->comments, cur_line);
  3201. if(c == -1) {
  3202. return 0;
  3203. }
  3204. } else {
  3205. while ((c = nextc()) != '\n') {
  3206. if (c == -1)
  3207. return 0;
  3208. }
  3209. }
  3210. /* fall through */
  3211. case '\n':
  3212. switch (parse_state->lex_state) {
  3213. case EXPR_BEG:
  3214. case EXPR_FNAME:
  3215. case EXPR_DOT:
  3216. case EXPR_CLASS:
  3217. goto retry;
  3218. default:
  3219. break;
  3220. }
  3221. command_start = TRUE;
  3222. parse_state->lex_state = EXPR_BEG;
  3223. return '\n';
  3224. case '*':
  3225. if ((c = nextc()) == '*') {
  3226. if ((c = nextc()) == '=') {
  3227. pslval->id = tPOW;
  3228. parse_state->lex_state = EXPR_BEG;
  3229. return tOP_ASGN;
  3230. }
  3231. pushback(c, parse_state);
  3232. c = tPOW;
  3233. }
  3234. else {
  3235. if (c == '=') {
  3236. pslval->id = '*';
  3237. parse_state->lex_state = EXPR_BEG;
  3238. return tOP_ASGN;
  3239. }
  3240. pushback(c, parse_state);
  3241. if (IS_ARG() && space_seen && !ISSPACE(c)){
  3242. rb_warning("`*' interpreted as argument prefix");
  3243. c = tSTAR;
  3244. }
  3245. else if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
  3246. c = tSTAR;
  3247. }
  3248. else {
  3249. c = '*';
  3250. }
  3251. }
  3252. switch (parse_state->lex_state) {
  3253. case EXPR_FNAME: case EXPR_DOT:
  3254. parse_state->lex_state = EXPR_ARG; break;
  3255. default:
  3256. parse_state->lex_state = EXPR_BEG; break;
  3257. }
  3258. return c;
  3259. case '!':
  3260. parse_state->lex_state = EXPR_BEG;
  3261. if ((c = nextc()) == '=') {
  3262. return tNEQ;
  3263. }
  3264. if (c == '~') {
  3265. return tNMATCH;
  3266. }
  3267. pushback(c, parse_state);
  3268. return '!';
  3269. case '=':
  3270. if (was_bol()) {
  3271. /* skip embedded rd document */
  3272. if (strncmp(parse_state->lex_p, "begin", 5) == 0 && ISSPACE(parse_state->lex_p[5])) {
  3273. for (;;) {
  3274. parse_state->lex_p = parse_state->lex_pend;
  3275. c = nextc();
  3276. if (c == -1) {
  3277. rb_compile_error("embedded document meets end of file");
  3278. return 0;
  3279. }
  3280. if (c != '=') continue;
  3281. if (strncmp(parse_state->lex_p, "end", 3) == 0 &&
  3282. (parse_state->lex_p + 3 == parse_state->lex_pend || ISSPACE(parse_state->lex_p[3]))) {
  3283. break;
  3284. }
  3285. }
  3286. parse_state->lex_p = parse_state->lex_pend;
  3287. goto retry;
  3288. }
  3289. }
  3290. switch (parse_state->lex_state) {
  3291. case EXPR_FNAME: case EXPR_DOT:
  3292. parse_state->lex_state = EXPR_ARG; break;
  3293. default:
  3294. parse_state->lex_state = EXPR_BEG; break;
  3295. }
  3296. if ((c = nextc()) == '=') {
  3297. if ((c = nextc()) == '=') {
  3298. return tEQQ;
  3299. }
  3300. pushback(c, parse_state);
  3301. return tEQ;
  3302. }
  3303. if (c == '~') {
  3304. return tMATCH;
  3305. }
  3306. else if (c == '>') {
  3307. return tASSOC;
  3308. }
  3309. pushback(c, parse_state);
  3310. return '=';
  3311. case '<':
  3312. c = nextc();
  3313. if (c == '<' &&
  3314. parse_state->lex_state != EXPR_END &&
  3315. parse_state->lex_state != EXPR_DOT &&
  3316. parse_state->lex_state != EXPR_ENDARG &&
  3317. parse_state->lex_state != EXPR_CLASS &&
  3318. (!IS_ARG() || space_seen)) {
  3319. int token = heredoc_identifier(parse_state);
  3320. if (token) return token;
  3321. }
  3322. switch (parse_state->lex_state) {
  3323. case EXPR_FNAME: case EXPR_DOT:
  3324. parse_state->lex_state = EXPR_ARG; break;
  3325. default:
  3326. parse_state->lex_state = EXPR_BEG; break;
  3327. }
  3328. if (c == '=') {
  3329. if ((c = nextc()) == '>') {
  3330. return tCMP;
  3331. }
  3332. pushback(c, parse_state);
  3333. return tLEQ;
  3334. }
  3335. if (c == '<') {
  3336. if ((c = nextc()) == '=') {
  3337. pslval->id = tLSHFT;
  3338. parse_state->lex_state = EXPR_BEG;
  3339. return tOP_ASGN;
  3340. }
  3341. pushback(c, parse_state);
  3342. return tLSHFT;
  3343. }
  3344. pushback(c, parse_state);
  3345. return '<';
  3346. case '>':
  3347. switch (parse_state->lex_state) {
  3348. case EXPR_FNAME: case EXPR_DOT:
  3349. parse_state->lex_state = EXPR_ARG; break;
  3350. default:
  3351. parse_state->lex_state = EXPR_BEG; break;
  3352. }
  3353. if ((c = nextc()) == '=') {
  3354. return tGEQ;
  3355. }
  3356. if (c == '>') {
  3357. if ((c = nextc()) == '=') {
  3358. pslval->id = tRSHFT;
  3359. parse_state->lex_state = EXPR_BEG;
  3360. return tOP_ASGN;
  3361. }
  3362. pushback(c, parse_state);
  3363. return tRSHFT;
  3364. }
  3365. pushback(c, parse_state);
  3366. return '>';
  3367. case '"':
  3368. lex_strterm = NEW_STRTERM(str_dquote, '"', 0);
  3369. return tSTRING_BEG;
  3370. case '`':
  3371. if (parse_state->lex_state == EXPR_FNAME) {
  3372. parse_state->lex_state = EXPR_END;
  3373. return c;
  3374. }
  3375. if (parse_state->lex_state == EXPR_DOT) {
  3376. if (cmd_state)
  3377. parse_state->lex_state = EXPR_CMDARG;
  3378. else
  3379. parse_state->lex_state = EXPR_ARG;
  3380. return c;
  3381. }
  3382. lex_strterm = NEW_STRTERM(str_xquote, '`', 0);
  3383. pslval->id = 0; /* so that xstring gets used normally */
  3384. return tXSTRING_BEG;
  3385. case '\'':
  3386. lex_strterm = NEW_STRTERM(str_squote, '\'', 0);
  3387. pslval->id = 0; /* so that xstring gets used normally */
  3388. return tSTRING_BEG;
  3389. case '?':
  3390. if (parse_state->lex_state == EXPR_END || parse_state->lex_state == EXPR_ENDARG) {
  3391. parse_state->lex_state = EXPR_BEG;
  3392. return '?';
  3393. }
  3394. c = nextc();
  3395. if (c == -1) {
  3396. rb_compile_error("incomplete character syntax");
  3397. return 0;
  3398. }
  3399. if (ISSPACE(c)){
  3400. if (!IS_ARG()){
  3401. int c2 = 0;
  3402. switch (c) {
  3403. case ' ':
  3404. c2 = 's';
  3405. break;
  3406. case '\n':
  3407. c2 = 'n';
  3408. break;
  3409. case '\t':
  3410. c2 = 't';
  3411. break;
  3412. case '\v':
  3413. c2 = 'v';
  3414. break;
  3415. case '\r':
  3416. c2 = 'r';
  3417. break;
  3418. case '\f':
  3419. c2 = 'f';
  3420. break;
  3421. }
  3422. if (c2) {
  3423. rb_warn("invalid character syntax; use ?\\%c", c2);
  3424. }
  3425. }
  3426. ternary:
  3427. pushback(c, parse_state);
  3428. parse_state->lex_state = EXPR_BEG;
  3429. parse_state->ternary_colon = 1;
  3430. return '?';
  3431. }
  3432. else if (ismbchar(c)) {
  3433. rb_warn("multibyte character literal not supported yet; use ?\\%.3o", c);
  3434. goto ternary;
  3435. }
  3436. else if ((ISALNUM(c) || c == '_') && parse_state->lex_p < parse_state->lex_pend && is_identchar(*(parse_state->lex_p))) {
  3437. goto ternary;
  3438. }
  3439. else if (c == '\\') {
  3440. c = read_escape(parse_state);
  3441. }
  3442. c &= 0xff;
  3443. parse_state->lex_state = EXPR_END;
  3444. pslval->node = NEW_FIXNUM((intptr_t)c);
  3445. return tINTEGER;
  3446. case '&':
  3447. if ((c = nextc()) == '&') {
  3448. parse_state->lex_state = EXPR_BEG;
  3449. if ((c = nextc()) == '=') {
  3450. pslval->id = tANDOP;
  3451. parse_state->lex_state = EXPR_BEG;
  3452. return tOP_ASGN;
  3453. }
  3454. pushback(c, parse_state);
  3455. return tANDOP;
  3456. }
  3457. else if (c == '=') {
  3458. pslval->id = '&';
  3459. parse_state->lex_state = EXPR_BEG;
  3460. return tOP_ASGN;
  3461. }
  3462. pushback(c, parse_state);
  3463. if (IS_ARG() && space_seen && !ISSPACE(c)){
  3464. rb_warning("`&' interpreted as argument prefix");
  3465. c = tAMPER;
  3466. }
  3467. else if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
  3468. c = tAMPER;
  3469. }
  3470. else {
  3471. c = '&';
  3472. }
  3473. switch (parse_state->lex_state) {
  3474. case EXPR_FNAME: case EXPR_DOT:
  3475. parse_state->lex_state = EXPR_ARG; break;
  3476. default:
  3477. parse_state->lex_state = EXPR_BEG;
  3478. }
  3479. return c;
  3480. case '|':
  3481. if ((c = nextc()) == '|') {
  3482. parse_state->lex_state = EXPR_BEG;
  3483. if ((c = nextc()) == '=') {
  3484. pslval->id = tOROP;
  3485. parse_state->lex_state = EXPR_BEG;
  3486. return tOP_ASGN;
  3487. }
  3488. pushback(c, parse_state);
  3489. return tOROP;
  3490. }
  3491. if (c == '=') {
  3492. pslval->id = '|';
  3493. parse_state->lex_state = EXPR_BEG;
  3494. return tOP_ASGN;
  3495. }
  3496. if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
  3497. parse_state->lex_state = EXPR_ARG;
  3498. }
  3499. else {
  3500. parse_state->lex_state = EXPR_BEG;
  3501. }
  3502. pushback(c, parse_state);
  3503. return '|';
  3504. case '+':
  3505. c = nextc();
  3506. if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
  3507. parse_state->lex_state = EXPR_ARG;
  3508. if (c == '@') {
  3509. return tUPLUS;
  3510. }
  3511. pushback(c, parse_state);
  3512. return '+';
  3513. }
  3514. if (c == '=') {
  3515. pslval->id = '+';
  3516. parse_state->lex_state = EXPR_BEG;
  3517. return tOP_ASGN;
  3518. }
  3519. if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID ||
  3520. (IS_ARG() && space_seen && !ISSPACE(c))) {
  3521. if (IS_ARG()) arg_ambiguous();
  3522. parse_state->lex_state = EXPR_BEG;
  3523. pushback(c, parse_state);
  3524. if (ISDIGIT(c)) {
  3525. c = '+';
  3526. goto start_num;
  3527. }
  3528. return tUPLUS;
  3529. }
  3530. parse_state->lex_state = EXPR_BEG;
  3531. pushback(c, parse_state);
  3532. return '+';
  3533. case '-':
  3534. c = nextc();
  3535. if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
  3536. parse_state->lex_state = EXPR_ARG;
  3537. if (c == '@') {
  3538. return tUMINUS;
  3539. }
  3540. pushback(c, parse_state);
  3541. return '-';
  3542. }
  3543. if (c == '=') {
  3544. pslval->id = '-';
  3545. parse_state->lex_state = EXPR_BEG;
  3546. return tOP_ASGN;
  3547. }
  3548. if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID ||
  3549. (IS_ARG() && space_seen && !ISSPACE(c))) {
  3550. if (IS_ARG()) arg_ambiguous();
  3551. parse_state->lex_state = EXPR_BEG;
  3552. pushback(c, parse_state);
  3553. if (ISDIGIT(c)) {
  3554. return tUMINUS_NUM;
  3555. }
  3556. return tUMINUS;
  3557. }
  3558. parse_state->lex_state = EXPR_BEG;
  3559. pushback(c, parse_state);
  3560. return '-';
  3561. case '.':
  3562. parse_state->lex_state = EXPR_BEG;
  3563. if ((c = nextc()) == '.') {
  3564. if ((c = nextc()) == '.') {
  3565. return tDOT3;
  3566. }
  3567. pushback(c, parse_state);
  3568. return tDOT2;
  3569. }
  3570. pushback(c, parse_state);
  3571. if (ISDIGIT(c)) {
  3572. yyerror("no .<digit> floating literal anymore; put 0 before dot");
  3573. }
  3574. parse_state->lex_state = EXPR_DOT;
  3575. return '.';
  3576. start_num:
  3577. case '0': case '1': case '2': case '3': case '4':
  3578. case '5': case '6': case '7': case '8': case '9':
  3579. {
  3580. int is_float, seen_point, seen_e, nondigit;
  3581. is_float = seen_point = seen_e = nondigit = 0;
  3582. parse_state->lex_state = EXPR_END;
  3583. newtok(parse_state);
  3584. if (c == '-' || c == '+') {
  3585. tokadd((char)c,parse_state);
  3586. c = nextc();
  3587. }
  3588. if (c == '0') {
  3589. int start = toklen();
  3590. c = nextc();
  3591. if (c == 'x' || c == 'X') {
  3592. /* hexadecimal */
  3593. c = nextc();
  3594. if (ISXDIGIT(c)) {
  3595. do {
  3596. if (c == '_') {
  3597. if (nondigit) break;
  3598. nondigit = c;
  3599. continue;
  3600. }
  3601. if (!ISXDIGIT(c)) break;
  3602. nondigit = 0;
  3603. tokadd((char)c,parse_state);
  3604. } while ((c = nextc()) != -1);
  3605. }
  3606. pushback(c, parse_state);
  3607. tokfix();
  3608. if (toklen() == start) {
  3609. yyerror("numeric literal without digits");
  3610. }
  3611. else if (nondigit) goto trailing_uc;
  3612. pslval->node = NEW_HEXNUM(string_new2(tok()));
  3613. return tINTEGER;
  3614. }
  3615. if (c == 'b' || c == 'B') {
  3616. /* binary */
  3617. c = nextc();
  3618. if (c == '0' || c == '1') {
  3619. do {
  3620. if (c == '_') {
  3621. if (nondigit) break;
  3622. nondigit = c;
  3623. continue;
  3624. }
  3625. if (c != '0' && c != '1') break;
  3626. nondigit = 0;
  3627. tokadd((char)c, parse_state);
  3628. } while ((c = nextc()) != -1);
  3629. }
  3630. pushback(c, parse_state);
  3631. tokfix();
  3632. if (toklen() == start) {
  3633. yyerror("numeric literal without digits");
  3634. }
  3635. else if (nondigit) goto trailing_uc;
  3636. pslval->node = NEW_BINNUM(string_new2(tok()));
  3637. return tINTEGER;
  3638. }
  3639. if (c == 'd' || c == 'D') {
  3640. /* decimal */
  3641. c = nextc();
  3642. if (ISDIGIT(c)) {
  3643. do {
  3644. if (c == '_') {
  3645. if (nondigit) break;
  3646. nondigit = c;
  3647. continue;
  3648. }
  3649. if (!ISDIGIT(c)) break;
  3650. nondigit = 0;
  3651. tokadd((char)c, parse_state);
  3652. } while ((c = nextc()) != -1);
  3653. }
  3654. pushback(c, parse_state);
  3655. tokfix();
  3656. if (toklen() == start) {
  3657. yyerror("numeric literal without digits");
  3658. }
  3659. else if (nondigit) goto trailing_uc;
  3660. pslval->node = NEW_NUMBER(string_new2(tok()));
  3661. return tINTEGER;
  3662. }
  3663. if (c == '_') {
  3664. /* 0_0 */
  3665. goto octal_number;
  3666. }
  3667. if (c == 'o' || c == 'O') {
  3668. /* prefixed octal */
  3669. c = nextc();
  3670. if (c == '_') {
  3671. yyerror("numeric literal without digits");
  3672. }
  3673. }
  3674. if (c >= '0' && c <= '7') {
  3675. /* octal */
  3676. octal_number:
  3677. do {
  3678. if (c == '_') {
  3679. if (nondigit) break;
  3680. nondigit = c;
  3681. continue;
  3682. }
  3683. if (c < '0' || c > '7') break;
  3684. nondigit = 0;
  3685. tokadd((char)c, parse_state);
  3686. } while ((c = nextc()) != -1);
  3687. if (toklen() > start) {
  3688. pushback(c, parse_state);
  3689. tokfix();
  3690. if (nondigit) goto trailing_uc;
  3691. pslval->node = NEW_OCTNUM(string_new2(tok()));
  3692. return tINTEGER;
  3693. }
  3694. if (nondigit) {
  3695. pushback(c, parse_state);
  3696. goto trailing_uc;
  3697. }
  3698. }
  3699. if (c > '7' && c <= '9') {
  3700. yyerror("Illegal octal digit");
  3701. }
  3702. else if (c == '.' || c == 'e' || c == 'E') {
  3703. tokadd('0', parse_state);
  3704. }
  3705. else {
  3706. pushback(c, parse_state);
  3707. pslval->node = NEW_FIXNUM(0);
  3708. return tINTEGER;
  3709. }
  3710. }
  3711. for (;;) {
  3712. switch (c) {
  3713. case '0': case '1': case '2': case '3': case '4':
  3714. case '5': case '6': case '7': case '8': case '9':
  3715. nondigit = 0;
  3716. tokadd((char)c, parse_state);
  3717. break;
  3718. case '.':
  3719. if (nondigit) goto trailing_uc;
  3720. if (seen_point || seen_e) {
  3721. goto decode_num;
  3722. }
  3723. else {
  3724. int c0 = nextc();
  3725. if (!ISDIGIT(c0)) {
  3726. pushback(c0, parse_state);
  3727. goto decode_num;
  3728. }
  3729. c = c0;
  3730. }
  3731. tokadd('.', parse_state);
  3732. tokadd((char)c, parse_state);
  3733. is_float++;
  3734. seen_point++;
  3735. nondigit = 0;
  3736. break;
  3737. case 'e':
  3738. case 'E':
  3739. if (nondigit) {
  3740. pushback(c, parse_state);
  3741. c = nondigit;
  3742. goto decode_num;
  3743. }
  3744. if (seen_e) {
  3745. goto decode_num;
  3746. }
  3747. tokadd((char)c, parse_state);
  3748. seen_e++;
  3749. is_float++;
  3750. nondigit = c;
  3751. c = nextc();
  3752. if (c != '-' && c != '+') continue;
  3753. tokadd((char)c, parse_state);
  3754. nondigit = c;
  3755. break;
  3756. case '_': /* `_' in number just ignored */
  3757. if (nondigit) goto decode_num;
  3758. nondigit = c;
  3759. break;
  3760. default:
  3761. goto decode_num;
  3762. }
  3763. c = nextc();
  3764. }
  3765. decode_num:
  3766. pushback(c, parse_state);
  3767. tokfix();
  3768. if (nondigit) {
  3769. char tmp[30];
  3770. trailing_uc:
  3771. snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit);
  3772. yyerror(tmp);
  3773. }
  3774. if (is_float) {
  3775. /* Some implementations of strtod() don't guarantee to
  3776. * set errno, so we need to reset it ourselves.
  3777. */
  3778. errno = 0;
  3779. strtod(tok(), 0);
  3780. if (errno == ERANGE) {
  3781. rb_warn("Float %s out of range", tok());
  3782. errno = 0;
  3783. }
  3784. pslval->node = NEW_FLOAT(string_new2(tok()));
  3785. return tFLOAT;
  3786. }
  3787. pslval->node = NEW_NUMBER(string_new2(tok()));
  3788. return tINTEGER;
  3789. }
  3790. case ']':
  3791. case '}':
  3792. case ')':
  3793. COND_LEXPOP();
  3794. CMDARG_LEXPOP();
  3795. parse_state->lex_state = EXPR_END;
  3796. return c;
  3797. case ':':
  3798. c = nextc();
  3799. if (c == ':') {
  3800. if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID ||
  3801. parse_state->lex_state == EXPR_CLASS || (IS_ARG() && space_seen)) {
  3802. parse_state->lex_state = EXPR_BEG;
  3803. return tCOLON3;
  3804. }
  3805. parse_state->lex_state = EXPR_DOT;
  3806. return tCOLON2;
  3807. }
  3808. if (parse_state->lex_state == EXPR_END || parse_state->lex_state == EXPR_ENDARG || ISSPACE(c)) {
  3809. pushback(c, parse_state);
  3810. parse_state->lex_state = EXPR_BEG;
  3811. return ':';
  3812. }
  3813. switch (c) {
  3814. case '\'':
  3815. lex_strterm = NEW_STRTERM(str_ssym, (intptr_t)c, 0);
  3816. break;
  3817. case '"':
  3818. lex_strterm = NEW_STRTERM(str_dsym, (intptr_t)c, 0);
  3819. break;
  3820. default:
  3821. pushback(c, parse_state);
  3822. break;
  3823. }
  3824. parse_state->lex_state = EXPR_FNAME;
  3825. return tSYMBEG;
  3826. case '/':
  3827. if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
  3828. lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
  3829. return tREGEXP_BEG;
  3830. }
  3831. if ((c = nextc()) == '=') {
  3832. pslval->id = '/';
  3833. parse_state->lex_state = EXPR_BEG;
  3834. return tOP_ASGN;
  3835. }
  3836. pushback(c, parse_state);
  3837. if (IS_ARG() && space_seen) {
  3838. if (!ISSPACE(c)) {
  3839. arg_ambiguous();
  3840. lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
  3841. return tREGEXP_BEG;
  3842. }
  3843. }
  3844. switch (parse_state->lex_state) {
  3845. case EXPR_FNAME: case EXPR_DOT:
  3846. parse_state->lex_state = EXPR_ARG; break;
  3847. default:
  3848. parse_state->lex_state = EXPR_BEG; break;
  3849. }
  3850. return '/';
  3851. case '^':
  3852. if ((c = nextc()) == '=') {
  3853. pslval->id = '^';
  3854. parse_state->lex_state = EXPR_BEG;
  3855. return tOP_ASGN;
  3856. }
  3857. switch (parse_state->lex_state) {
  3858. case EXPR_FNAME: case EXPR_DOT:
  3859. parse_state->lex_state = EXPR_ARG; break;
  3860. default:
  3861. parse_state->lex_state = EXPR_BEG; break;
  3862. }
  3863. pushback(c, parse_state);
  3864. return '^';
  3865. case ';':
  3866. command_start = TRUE;
  3867. case ',':
  3868. parse_state->lex_state = EXPR_BEG;
  3869. return c;
  3870. case '~':
  3871. if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
  3872. if ((c = nextc()) != '@') {
  3873. pushback(c, parse_state);
  3874. }
  3875. }
  3876. switch (parse_state->lex_state) {
  3877. case EXPR_FNAME: case EXPR_DOT:
  3878. parse_state->lex_state = EXPR_ARG; break;
  3879. default:
  3880. parse_state->lex_state = EXPR_BEG; break;
  3881. }
  3882. return '~';
  3883. case '(':
  3884. command_start = TRUE;
  3885. if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
  3886. c = tLPAREN;
  3887. }
  3888. else if (space_seen) {
  3889. if (parse_state->lex_state == EXPR_CMDARG) {
  3890. c = tLPAREN_ARG;
  3891. }
  3892. else if (parse_state->lex_state == EXPR_ARG) {
  3893. rb_warn("don't put space before argument parentheses");
  3894. c = '(';
  3895. }
  3896. }
  3897. COND_PUSH(0);
  3898. CMDARG_PUSH(0);
  3899. parse_state->lex_state = EXPR_BEG;
  3900. return c;
  3901. case '[':
  3902. if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
  3903. parse_state->lex_state = EXPR_ARG;
  3904. if ((c = nextc()) == ']') {
  3905. if ((c = nextc()) == '=') {
  3906. return tASET;
  3907. }
  3908. pushback(c, parse_state);
  3909. return tAREF;
  3910. }
  3911. pushback(c, parse_state);
  3912. return '[';
  3913. }
  3914. else if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
  3915. c = tLBRACK;
  3916. }
  3917. else if (IS_ARG() && space_seen) {
  3918. c = tLBRACK;
  3919. }
  3920. parse_state->lex_state = EXPR_BEG;
  3921. COND_PUSH(0);
  3922. CMDARG_PUSH(0);
  3923. return c;
  3924. case '{':
  3925. if (IS_ARG() || parse_state->lex_state == EXPR_END)
  3926. c = '{'; /* block (primary) */
  3927. else if (parse_state->lex_state == EXPR_ENDARG)
  3928. c = tLBRACE_ARG; /* block (expr) */
  3929. else
  3930. c = tLBRACE; /* hash */
  3931. COND_PUSH(0);
  3932. CMDARG_PUSH(0);
  3933. parse_state->lex_state = EXPR_BEG;
  3934. return c;
  3935. case '\\':
  3936. c = nextc();
  3937. if (c == '\n') {
  3938. space_seen = 1;
  3939. goto retry; /* skip \\n */
  3940. }
  3941. pushback(c, parse_state);
  3942. if(parse_state->lex_state == EXPR_BEG
  3943. || parse_state->lex_state == EXPR_MID || space_seen) {
  3944. parse_state->lex_state = EXPR_DOT;
  3945. return tUBS;
  3946. }
  3947. parse_state->lex_state = EXPR_DOT;
  3948. return '\\';
  3949. case '%':
  3950. if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
  3951. intptr_t term;
  3952. intptr_t paren;
  3953. char tmpstr[256];
  3954. char *cur;
  3955. c = nextc();
  3956. quotation:
  3957. if (!ISALNUM(c)) {
  3958. term = c;
  3959. c = 'Q';
  3960. }
  3961. else {
  3962. term = nextc();
  3963. if (ISALNUM(term) || ismbchar(term)) {
  3964. cur = tmpstr;
  3965. *cur++ = c;
  3966. while(ISALNUM(term) || ismbchar(term)) {
  3967. *cur++ = term;
  3968. term = nextc();
  3969. }
  3970. *cur = 0;
  3971. c = 1;
  3972. }
  3973. }
  3974. if (c == -1 || term == -1) {
  3975. rb_compile_error("unterminated quoted string meets end of file");
  3976. return 0;
  3977. }
  3978. paren = term;
  3979. if (term == '(') term = ')';
  3980. else if (term == '[') term = ']';
  3981. else if (term == '{') term = '}';
  3982. else if (term == '<') term = '>';
  3983. else paren = 0;
  3984. switch (c) {
  3985. case 'Q':
  3986. lex_strterm = NEW_STRTERM(str_dquote, term, paren);
  3987. return tSTRING_BEG;
  3988. case 'q':
  3989. lex_strterm = NEW_STRTERM(str_squote, term, paren);
  3990. return tSTRING_BEG;
  3991. case 'W':
  3992. lex_strterm = NEW_STRTERM(str_dquote | STR_FUNC_QWORDS, term, paren);
  3993. do {c = nextc();} while (ISSPACE(c));
  3994. pushback(c, parse_state);
  3995. return tWORDS_BEG;
  3996. case 'w':
  3997. lex_strterm = NEW_STRTERM(str_squote | STR_FUNC_QWORDS, term, paren);
  3998. do {c = nextc();} while (ISSPACE(c));
  3999. pushback(c, parse_state);
  4000. return tQWORDS_BEG;
  4001. case 'x':
  4002. lex_strterm = NEW_STRTERM(str_xquote, term, paren);
  4003. pslval->id = 0;
  4004. return tXSTRING_BEG;
  4005. case 'r':
  4006. lex_strterm = NEW_STRTERM(str_regexp, term, paren);
  4007. return tREGEXP_BEG;
  4008. case 's':
  4009. lex_strterm = NEW_STRTERM(str_ssym, term, paren);
  4010. parse_state->lex_state = EXPR_FNAME;
  4011. return tSYMBEG;
  4012. case 1:
  4013. lex_strterm = NEW_STRTERM(str_xquote, term, paren);
  4014. pslval->id = rb_intern(tmpstr);
  4015. return tXSTRING_BEG;
  4016. default:
  4017. lex_strterm = NEW_STRTERM(str_xquote, term, paren);
  4018. tmpstr[0] = c;
  4019. tmpstr[1] = 0;
  4020. pslval->id = rb_intern(tmpstr);
  4021. return tXSTRING_BEG;
  4022. }
  4023. }
  4024. if ((c = nextc()) == '=') {
  4025. pslval->id = '%';
  4026. parse_state->lex_state = EXPR_BEG;
  4027. return tOP_ASGN;
  4028. }
  4029. if (IS_ARG() && space_seen && !ISSPACE(c)) {
  4030. goto quotation;
  4031. }
  4032. switch (parse_state->lex_state) {
  4033. case EXPR_FNAME: case EXPR_DOT:
  4034. parse_state->lex_state = EXPR_ARG; break;
  4035. default:
  4036. parse_state->lex_state = EXPR_BEG; break;
  4037. }
  4038. pushback(c, parse_state);
  4039. return '%';
  4040. case '$':
  4041. parse_state->lex_state = EXPR_END;
  4042. newtok(parse_state);
  4043. c = nextc();
  4044. switch (c) {
  4045. case '_': /* $_: last read line string */
  4046. c = nextc();
  4047. if (is_identchar(c)) {
  4048. tokadd('$', parse_state);
  4049. tokadd('_', parse_state);
  4050. break;
  4051. }
  4052. pushback(c, parse_state);
  4053. c = '_';
  4054. /* fall through */
  4055. case '~': /* $~: match-data */
  4056. local_cnt(c);
  4057. /* fall through */
  4058. case '*': /* $*: argv */
  4059. case '$': /* $$: pid */
  4060. case '?': /* $?: last status */
  4061. case '!': /* $!: error string */
  4062. case '@': /* $@: error position */
  4063. case '/': /* $/: input record separator */
  4064. case '\\': /* $\: output record separator */
  4065. case ';': /* $;: field separator */
  4066. case ',': /* $,: output field separator */
  4067. case '.': /* $.: last read line number */
  4068. case '=': /* $=: ignorecase */
  4069. case ':': /* $:: load path */
  4070. case '<': /* $<: reading filename */
  4071. case '>': /* $>: default output handle */
  4072. case '\"': /* $": already loaded files */
  4073. tokadd('$', parse_state);
  4074. tokadd((char)c, parse_state);
  4075. tokfix();
  4076. pslval->id = rb_intern(tok());
  4077. return tGVAR;
  4078. case '-':
  4079. tokadd('$', parse_state);
  4080. tokadd((char)c, parse_state);
  4081. c = nextc();
  4082. tokadd((char)c, parse_state);
  4083. tokfix();
  4084. pslval->id = rb_intern(tok());
  4085. /* xxx shouldn't check if valid option variable */
  4086. return tGVAR;
  4087. case '&': /* $&: last match */
  4088. case '`': /* $`: string before last match */
  4089. case '\'': /* $': string after last match */
  4090. case '+': /* $+: string matches last paren. */
  4091. pslval->node = NEW_BACK_REF((intptr_t)c);
  4092. return tBACK_REF;
  4093. case '1': case '2': case '3':
  4094. case '4': case '5': case '6':
  4095. case '7': case '8': case '9':
  4096. tokadd('$', parse_state);
  4097. do {
  4098. tokadd((char)c, parse_state);
  4099. c = nextc();
  4100. } while (ISDIGIT(c));
  4101. pushback(c, parse_state);
  4102. tokfix();
  4103. pslval->node = NEW_NTH_REF((intptr_t)atoi(tok()+1));
  4104. return tNTH_REF;
  4105. default:
  4106. if (!is_identchar(c)) {
  4107. pushback(c, parse_state);
  4108. return '$';
  4109. }
  4110. case '0':
  4111. tokadd('$', parse_state);
  4112. }
  4113. break;
  4114. case '@':
  4115. c = nextc();
  4116. newtok(parse_state);
  4117. tokadd('@', parse_state);
  4118. if (c == '@') {
  4119. tokadd('@', parse_state);
  4120. c = nextc();
  4121. }
  4122. if (ISDIGIT(c)) {
  4123. if (tokidx == 1) {
  4124. rb_compile_error("`@%c' is not allowed as an instance variable name", c);
  4125. }
  4126. else {
  4127. rb_compile_error("`@@%c' is not allowed as a class variable name", c);
  4128. }
  4129. }
  4130. if (!is_identchar(c)) {
  4131. pushback(c, parse_state);
  4132. return '@';
  4133. }
  4134. break;
  4135. case '_':
  4136. if (was_bol() && whole_match_p("__END__", 7, 0, parse_state)) {
  4137. parse_state->lex_lastline = 0;
  4138. return -1;
  4139. }
  4140. newtok(parse_state);
  4141. break;
  4142. default:
  4143. if (!is_identchar(c)) {
  4144. rb_compile_error("Invalid char `\\%03o' in expression", c);
  4145. goto retry;
  4146. }
  4147. newtok(parse_state);
  4148. break;
  4149. }
  4150. do {
  4151. tokadd((char)c, parse_state);
  4152. if (ismbchar(c)) {
  4153. int i, len = mbclen(c)-1;
  4154. for (i = 0; i < len; i++) {
  4155. c = nextc();
  4156. tokadd((char)c, parse_state);
  4157. }
  4158. }
  4159. c = nextc();
  4160. } while (is_identchar(c));
  4161. if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) {
  4162. tokadd((char)c, parse_state);
  4163. }
  4164. else {
  4165. pushback(c, parse_state);
  4166. }
  4167. tokfix();
  4168. {
  4169. int result = 0;
  4170. switch (tok()[0]) {
  4171. case '$':
  4172. parse_state->lex_state = EXPR_END;
  4173. result = tGVAR;
  4174. break;
  4175. case '@':
  4176. parse_state->lex_state = EXPR_END;
  4177. if (tok()[1] == '@')
  4178. result = tCVAR;
  4179. else
  4180. result = tIVAR;
  4181. break;
  4182. default:
  4183. if (toklast() == '!' || toklast() == '?') {
  4184. result = tFID;
  4185. }
  4186. else {
  4187. if (parse_state->lex_state == EXPR_FNAME) {
  4188. if ((c = nextc()) == '=' && !peek('~') && !peek('>') &&
  4189. (!peek('=') || (parse_state->lex_p + 1 < parse_state->lex_pend && (parse_state->lex_p)[1] == '>'))) {
  4190. result = tIDENTIFIER;
  4191. tokadd((char)c, parse_state);
  4192. tokfix();
  4193. }
  4194. else {
  4195. pushback(c, parse_state);
  4196. }
  4197. }
  4198. if (result == 0 && ISUPPER(tok()[0])) {
  4199. result = tCONSTANT;
  4200. }
  4201. else {
  4202. result = tIDENTIFIER;
  4203. }
  4204. }
  4205. if (parse_state->lex_state != EXPR_DOT) {
  4206. const struct kwtable *kw;
  4207. /* See if it is a reserved word. */
  4208. kw = syd_reserved_word(tok(), toklen());
  4209. if (kw) {
  4210. enum lex_state state = parse_state->lex_state;
  4211. parse_state->lex_state = kw->state;
  4212. if (state == EXPR_FNAME) {
  4213. pslval->id = rb_intern(kw->name);
  4214. }
  4215. if (kw->id[0] == kDO) {
  4216. if (COND_P()) return kDO_COND;
  4217. if (CMDARG_P() && state != EXPR_CMDARG)
  4218. return kDO_BLOCK;
  4219. if (state == EXPR_ENDARG)
  4220. return kDO_BLOCK;
  4221. return kDO;
  4222. }
  4223. if (state == EXPR_BEG)
  4224. return kw->id[0];
  4225. else {
  4226. if (kw->id[0] != kw->id[1])
  4227. parse_state->lex_state = EXPR_BEG;
  4228. return kw->id[1];
  4229. }
  4230. }
  4231. }
  4232. if (parse_state->lex_state == EXPR_BEG ||
  4233. parse_state->lex_state == EXPR_MID ||
  4234. parse_state->lex_state == EXPR_DOT ||
  4235. parse_state->lex_state == EXPR_ARG ||
  4236. parse_state->lex_state == EXPR_CMDARG) {
  4237. if (cmd_state) {
  4238. parse_state->lex_state = EXPR_CMDARG;
  4239. }
  4240. else {
  4241. parse_state->lex_state = EXPR_ARG;
  4242. }
  4243. }
  4244. else {
  4245. parse_state->lex_state = EXPR_END;
  4246. }
  4247. }
  4248. pslval->id = rb_intern(tok());
  4249. if (is_local_id(pslval->id) && local_id(pslval->id)) {
  4250. parse_state->lex_state = EXPR_END;
  4251. }
  4252. return result;
  4253. }
  4254. }
  4255. static NODE*
  4256. syd_node_newnode(rb_parse_state *st, enum node_type type,
  4257. OBJECT a0, OBJECT a1, OBJECT a2)
  4258. {
  4259. NODE *n = (NODE*)pt_allocate(st, sizeof(NODE));
  4260. n->flags = 0;
  4261. nd_set_type(n, type);
  4262. nd_set_line(n, ruby_sourceline);
  4263. n->nd_file = ruby_sourcefile;
  4264. n->u1.value = a0;
  4265. n->u2.value = a1;
  4266. n->u3.value = a2;
  4267. return n;
  4268. }
  4269. static NODE*
  4270. newline_node(parse_state, node)
  4271. rb_parse_state *parse_state;
  4272. NODE *node;
  4273. {
  4274. NODE *nl = 0;
  4275. if (node) {
  4276. if (nd_type(node) == NODE_NEWLINE) return node;
  4277. nl = NEW_NEWLINE(node);
  4278. fixpos(nl, node);
  4279. nl->nd_nth = nd_line(node);
  4280. }
  4281. return nl;
  4282. }
  4283. static void
  4284. fixpos(node, orig)
  4285. NODE *node, *orig;
  4286. {
  4287. if (!node) return;
  4288. if (!orig) return;
  4289. if (orig == (NODE*)1) return;
  4290. node->nd_file = orig->nd_file;
  4291. nd_set_line(node, nd_line(orig));
  4292. }
  4293. static void
  4294. parser_warning(rb_parse_state *parse_state, NODE *node, const char *mesg)
  4295. {
  4296. int line = ruby_sourceline;
  4297. if(parse_state->emit_warnings) {
  4298. ruby_sourceline = nd_line(node);
  4299. printf("%s:%zi: warning: %s\n", ruby_sourcefile, ruby_sourceline, mesg);
  4300. ruby_sourceline = line;
  4301. }
  4302. }
  4303. static NODE*
  4304. block_append(parse_state, head, tail)
  4305. rb_parse_state *parse_state;
  4306. NODE *head, *tail;
  4307. {
  4308. NODE *end, *h = head;
  4309. if (tail == 0) return head;
  4310. again:
  4311. if (h == 0) return tail;
  4312. switch (nd_type(h)) {
  4313. case NODE_NEWLINE:
  4314. h = h->nd_next;
  4315. goto again;
  4316. case NODE_STR:
  4317. case NODE_LIT:
  4318. parser_warning(parse_state, h, "unused literal ignored");
  4319. default:
  4320. h = end = NEW_BLOCK(head);
  4321. end->nd_end = end;
  4322. fixpos(end, head);
  4323. head = end;
  4324. break;
  4325. case NODE_BLOCK:
  4326. end = h->nd_end;
  4327. break;
  4328. }
  4329. if (RTEST(ruby_verbose)) {
  4330. NODE *nd = end->nd_head;
  4331. newline:
  4332. switch (nd_type(nd)) {
  4333. case NODE_RETURN:
  4334. case NODE_BREAK:
  4335. case NODE_NEXT:
  4336. case NODE_REDO:
  4337. case NODE_RETRY:
  4338. parser_warning(parse_state, nd, "statement not reached");
  4339. break;
  4340. case NODE_NEWLINE:
  4341. nd = nd->nd_next;
  4342. goto newline;
  4343. default:
  4344. break;
  4345. }
  4346. }
  4347. if (nd_type(tail) != NODE_BLOCK) {
  4348. tail = NEW_BLOCK(tail);
  4349. tail->nd_end = tail;
  4350. }
  4351. end->nd_next = tail;
  4352. h->nd_end = tail->nd_end;
  4353. return head;
  4354. }
  4355. /* append item to the list */
  4356. static NODE*
  4357. list_append(parse_state, list, item)
  4358. rb_parse_state *parse_state;
  4359. NODE *list, *item;
  4360. {
  4361. NODE *last;
  4362. if (list == 0) return NEW_LIST(item);
  4363. if (list->nd_next) {
  4364. last = list->nd_next->nd_end;
  4365. }
  4366. else {
  4367. last = list;
  4368. }
  4369. list->nd_alen += 1;
  4370. last->nd_next = NEW_LIST(item);
  4371. list->nd_next->nd_end = last->nd_next;
  4372. return list;
  4373. }
  4374. /* concat two lists */
  4375. static NODE*
  4376. list_concat(head, tail)
  4377. NODE *head, *tail;
  4378. {
  4379. NODE *last;
  4380. if (head->nd_next) {
  4381. last = head->nd_next->nd_end;
  4382. }
  4383. else {
  4384. last = head;
  4385. }
  4386. head->nd_alen += tail->nd_alen;
  4387. last->nd_next = tail;
  4388. if (tail->nd_next) {
  4389. head->nd_next->nd_end = tail->nd_next->nd_end;
  4390. }
  4391. else {
  4392. head->nd_next->nd_end = tail;
  4393. }
  4394. return head;
  4395. }
  4396. /* concat two string literals */
  4397. static NODE *
  4398. literal_concat(parse_state, head, tail)
  4399. rb_parse_state *parse_state;
  4400. NODE *head, *tail;
  4401. {
  4402. enum node_type htype;
  4403. if (!head) return tail;
  4404. if (!tail) return head;
  4405. htype = nd_type(head);
  4406. if (htype == NODE_EVSTR) {
  4407. NODE *node = NEW_DSTR(string_new(0, 0));
  4408. head = list_append(parse_state, node, head);
  4409. }
  4410. switch (nd_type(tail)) {
  4411. case NODE_STR:
  4412. if (htype == NODE_STR) {
  4413. bconcat(head->nd_str, tail->nd_str);
  4414. bdestroy(tail->nd_str);
  4415. }
  4416. else {
  4417. list_append(parse_state, head, tail);
  4418. }
  4419. break;
  4420. case NODE_DSTR:
  4421. if (htype == NODE_STR) {
  4422. bconcat(head->nd_str, tail->nd_str);
  4423. bdestroy(tail->nd_str);
  4424. tail->nd_lit = head->nd_lit;
  4425. head = tail;
  4426. }
  4427. else {
  4428. nd_set_type(tail, NODE_ARRAY);
  4429. tail->nd_head = NEW_STR(tail->nd_lit);
  4430. list_concat(head, tail);
  4431. }
  4432. break;
  4433. case NODE_EVSTR:
  4434. if (htype == NODE_STR) {
  4435. nd_set_type(head, NODE_DSTR);
  4436. head->nd_alen = 1;
  4437. }
  4438. list_append(parse_state, head, tail);
  4439. break;
  4440. }
  4441. return head;
  4442. }
  4443. static NODE *
  4444. evstr2dstr(parse_state, node)
  4445. rb_parse_state *parse_state;
  4446. NODE *node;
  4447. {
  4448. if (nd_type(node) == NODE_EVSTR) {
  4449. node = list_append(parse_state, NEW_DSTR(string_new(0, 0)), node);
  4450. }
  4451. return node;
  4452. }
  4453. static NODE *
  4454. new_evstr(parse_state, node)
  4455. rb_parse_state *parse_state;
  4456. NODE *node;
  4457. {
  4458. NODE *head = node;
  4459. again:
  4460. if (node) {
  4461. switch (nd_type(node)) {
  4462. case NODE_STR: case NODE_DSTR: case NODE_EVSTR:
  4463. return node;
  4464. case NODE_NEWLINE:
  4465. node = node->nd_next;
  4466. goto again;
  4467. }
  4468. }
  4469. return NEW_EVSTR(head);
  4470. }
  4471. static const struct {
  4472. ID token;
  4473. const char name[12];
  4474. } op_tbl[] = {
  4475. {tDOT2, ".."},
  4476. {tDOT3, "..."},
  4477. {'+', "+"},
  4478. {'-', "-"},
  4479. {'+', "+(binary)"},
  4480. {'-', "-(binary)"},
  4481. {'*', "*"},
  4482. {'/', "/"},
  4483. {'%', "%"},
  4484. {tPOW, "**"},
  4485. {tUPLUS, "+@"},
  4486. {tUMINUS, "-@"},
  4487. {tUPLUS, "+(unary)"},
  4488. {tUMINUS, "-(unary)"},
  4489. {'|', "|"},
  4490. {'^', "^"},
  4491. {'&', "&"},
  4492. {tCMP, "<=>"},
  4493. {'>', ">"},
  4494. {tGEQ, ">="},
  4495. {'<', "<"},
  4496. {tLEQ, "<="},
  4497. {tEQ, "=="},
  4498. {tEQQ, "==="},
  4499. {tNEQ, "!="},
  4500. {tMATCH, "=~"},
  4501. {tNMATCH, "!~"},
  4502. {'!', "!"},
  4503. {'~', "~"},
  4504. {'!', "!(unary)"},
  4505. {'~', "~(unary)"},
  4506. {'!', "!@"},
  4507. {'~', "~@"},
  4508. {tAREF, "[]"},
  4509. {tASET, "[]="},
  4510. {tLSHFT, "<<"},
  4511. {tRSHFT, ">>"},
  4512. {tCOLON2, "::"},
  4513. {'`', "`"},
  4514. {0, ""}
  4515. };
  4516. static ID convert_op(ID id) {
  4517. int i;
  4518. for(i = 0; op_tbl[i].token; i++) {
  4519. if(op_tbl[i].token == id) {
  4520. return rb_intern(op_tbl[i].name);
  4521. }
  4522. }
  4523. return id;
  4524. }
  4525. static NODE *
  4526. call_op(recv, id, narg, arg1, parse_state)
  4527. NODE *recv;
  4528. ID id;
  4529. int narg;
  4530. NODE *arg1;
  4531. rb_parse_state *parse_state;
  4532. {
  4533. value_expr(recv);
  4534. if (narg == 1) {
  4535. value_expr(arg1);
  4536. arg1 = NEW_LIST(arg1);
  4537. }
  4538. else {
  4539. arg1 = 0;
  4540. }
  4541. id = convert_op(id);
  4542. return NEW_CALL(recv, id, arg1);
  4543. }
  4544. static NODE*
  4545. match_gen(node1, node2, parse_state)
  4546. NODE *node1;
  4547. NODE *node2;
  4548. rb_parse_state *parse_state;
  4549. {
  4550. local_cnt('~');
  4551. value_expr(node1);
  4552. value_expr(node2);
  4553. if (node1) {
  4554. switch (nd_type(node1)) {
  4555. case NODE_DREGX:
  4556. case NODE_DREGX_ONCE:
  4557. return NEW_MATCH2(node1, node2);
  4558. case NODE_REGEX:
  4559. return NEW_MATCH2(node1, node2);
  4560. }
  4561. }
  4562. if (node2) {
  4563. switch (nd_type(node2)) {
  4564. case NODE_DREGX:
  4565. case NODE_DREGX_ONCE:
  4566. return NEW_MATCH3(node2, node1);
  4567. case NODE_REGEX:
  4568. return NEW_MATCH3(node2, node1);
  4569. }
  4570. }
  4571. return NEW_CALL(node1, convert_op(tMATCH), NEW_LIST(node2));
  4572. }
  4573. static NODE*
  4574. syd_gettable(parse_state, id)
  4575. rb_parse_state *parse_state;
  4576. ID id;
  4577. {
  4578. if (id == kSELF) {
  4579. return NEW_SELF();
  4580. }
  4581. else if (id == kNIL) {
  4582. return NEW_NIL();
  4583. }
  4584. else if (id == kTRUE) {
  4585. return NEW_TRUE();
  4586. }
  4587. else if (id == kFALSE) {
  4588. return NEW_FALSE();
  4589. }
  4590. else if (id == k__FILE__) {
  4591. return NEW_FILE();
  4592. }
  4593. else if (id == k__LINE__) {
  4594. return NEW_FIXNUM(ruby_sourceline);
  4595. }
  4596. else if (is_local_id(id)) {
  4597. if (local_id(id)) return NEW_LVAR(id);
  4598. /* method call without arguments */
  4599. return NEW_VCALL(id);
  4600. }
  4601. else if (is_global_id(id)) {
  4602. return NEW_GVAR(id);
  4603. }
  4604. else if (is_instance_id(id)) {
  4605. return NEW_IVAR(id);
  4606. }
  4607. else if (is_const_id(id)) {
  4608. return NEW_CONST(id);
  4609. }
  4610. else if (is_class_id(id)) {
  4611. return NEW_CVAR(id);
  4612. }
  4613. /* FIXME: indicate which identifier. */
  4614. rb_compile_error("identifier is not valid 1\n");
  4615. return 0;
  4616. }
  4617. static void
  4618. reset_block(rb_parse_state *parse_state) {
  4619. if(!parse_state->block_vars) {
  4620. parse_state->block_vars = var_table_create();
  4621. } else {
  4622. parse_state->block_vars = var_table_push(parse_state->block_vars);
  4623. }
  4624. }
  4625. static NODE *
  4626. extract_block_vars(rb_parse_state *parse_state, NODE* node, var_table vars)
  4627. {
  4628. int i;
  4629. NODE *var, *out = node;
  4630. if (!node) goto out;
  4631. if(var_table_size(vars) == 0) goto out;
  4632. var = NULL;
  4633. for(i = 0; i < var_table_size(vars); i++) {
  4634. var = NEW_DASGN_CURR(var_table_get(vars, i), var);
  4635. }
  4636. out = block_append(parse_state, var, node);
  4637. out:
  4638. assert(vars == parse_state->block_vars);
  4639. parse_state->block_vars = var_table_pop(parse_state->block_vars);
  4640. return out;
  4641. }
  4642. static NODE*
  4643. assignable(id, val, parse_state)
  4644. ID id;
  4645. NODE *val;
  4646. rb_parse_state *parse_state;
  4647. {
  4648. value_expr(val);
  4649. if (id == kSELF) {
  4650. yyerror("Can't change the value of self");
  4651. }
  4652. else if (id == kNIL) {
  4653. yyerror("Can't assign to nil");
  4654. }
  4655. else if (id == kTRUE) {
  4656. yyerror("Can't assign to true");
  4657. }
  4658. else if (id == kFALSE) {
  4659. yyerror("Can't assign to false");
  4660. }
  4661. else if (id == k__FILE__) {
  4662. yyerror("Can't assign to __FILE__");
  4663. }
  4664. else if (id == k__LINE__) {
  4665. yyerror("Can't assign to __LINE__");
  4666. }
  4667. else if (is_local_id(id)) {
  4668. if(parse_state->block_vars) {
  4669. var_table_add(parse_state->block_vars, id);
  4670. }
  4671. return NEW_LASGN(id, val);
  4672. }
  4673. else if (is_global_id(id)) {
  4674. return NEW_GASGN(id, val);
  4675. }
  4676. else if (is_instance_id(id)) {
  4677. return NEW_IASGN(id, val);
  4678. }
  4679. else if (is_const_id(id)) {
  4680. if (in_def || in_single)
  4681. yyerror("dynamic constant assignment");
  4682. return NEW_CDECL(id, val, 0);
  4683. }
  4684. else if (is_class_id(id)) {
  4685. if (in_def || in_single) return NEW_CVASGN(id, val);
  4686. return NEW_CVDECL(id, val);
  4687. }
  4688. else {
  4689. /* FIXME: indicate which identifier. */
  4690. rb_compile_error("identifier is not valid 2 (%d)\n", id);
  4691. }
  4692. return 0;
  4693. }
  4694. static NODE *
  4695. aryset(recv, idx, parse_state)
  4696. NODE *recv, *idx;
  4697. rb_parse_state *parse_state;
  4698. {
  4699. if (recv && nd_type(recv) == NODE_SELF)
  4700. recv = (NODE *)1;
  4701. else
  4702. value_expr(recv);
  4703. return NEW_ATTRASGN(recv, convert_op(tASET), idx);
  4704. }
  4705. static ID
  4706. rb_id_attrset(id)
  4707. ID id;
  4708. {
  4709. id &= ~ID_SCOPE_MASK;
  4710. id |= ID_ATTRSET;
  4711. return id;
  4712. }
  4713. static NODE *
  4714. attrset(recv, id, parse_state)
  4715. NODE *recv;
  4716. ID id;
  4717. rb_parse_state *parse_state;
  4718. {
  4719. if (recv && nd_type(recv) == NODE_SELF)
  4720. recv = (NODE *)1;
  4721. else
  4722. value_expr(recv);
  4723. return NEW_ATTRASGN(recv, rb_id_attrset(id), 0);
  4724. }
  4725. static void
  4726. rb_backref_error(node)
  4727. NODE *node;
  4728. {
  4729. switch (nd_type(node)) {
  4730. case NODE_NTH_REF:
  4731. rb_compile_error("Can't set variable $%u", node->nd_nth);
  4732. break;
  4733. case NODE_BACK_REF:
  4734. rb_compile_error("Can't set variable $%c", (int)node->nd_nth);
  4735. break;
  4736. }
  4737. }
  4738. static NODE *
  4739. arg_concat(parse_state, node1, node2)
  4740. rb_parse_state *parse_state;
  4741. NODE *node1;
  4742. NODE *node2;
  4743. {
  4744. if (!node2) return node1;
  4745. return NEW_ARGSCAT(node1, node2);
  4746. }
  4747. static NODE *
  4748. arg_add(parse_state, node1, node2)
  4749. rb_parse_state *parse_state;
  4750. NODE *node1;
  4751. NODE *node2;
  4752. {
  4753. if (!node1) return NEW_LIST(node2);
  4754. if (nd_type(node1) == NODE_ARRAY) {
  4755. return list_append(parse_state, node1, node2);
  4756. }
  4757. else {
  4758. return NEW_ARGSPUSH(node1, node2);
  4759. }
  4760. }
  4761. static NODE*
  4762. node_assign(lhs, rhs, parse_state)
  4763. NODE *lhs, *rhs;
  4764. rb_parse_state *parse_state;
  4765. {
  4766. if (!lhs) return 0;
  4767. value_expr(rhs);
  4768. switch (nd_type(lhs)) {
  4769. case NODE_GASGN:
  4770. case NODE_IASGN:
  4771. case NODE_LASGN:
  4772. case NODE_DASGN:
  4773. case NODE_DASGN_CURR:
  4774. case NODE_MASGN:
  4775. case NODE_CDECL:
  4776. case NODE_CVDECL:
  4777. case NODE_CVASGN:
  4778. lhs->nd_value = rhs;
  4779. break;
  4780. case NODE_ATTRASGN:
  4781. case NODE_CALL:
  4782. lhs->nd_args = arg_add(parse_state, lhs->nd_args, rhs);
  4783. break;
  4784. default:
  4785. /* should not happen */
  4786. break;
  4787. }
  4788. return lhs;
  4789. }
  4790. static int
  4791. value_expr0(node, parse_state)
  4792. NODE *node;
  4793. rb_parse_state *parse_state;
  4794. {
  4795. int cond = 0;
  4796. while (node) {
  4797. switch (nd_type(node)) {
  4798. case NODE_DEFN:
  4799. case NODE_DEFS:
  4800. parser_warning(parse_state, node, "void value expression");
  4801. return FALSE;
  4802. case NODE_RETURN:
  4803. case NODE_BREAK:
  4804. case NODE_NEXT:
  4805. case NODE_REDO:
  4806. case NODE_RETRY:
  4807. if (!cond) yyerror("void value expression");
  4808. /* or "control never reach"? */
  4809. return FALSE;
  4810. case NODE_BLOCK:
  4811. while (node->nd_next) {
  4812. node = node->nd_next;
  4813. }
  4814. node = node->nd_head;
  4815. break;
  4816. case NODE_BEGIN:
  4817. node = node->nd_body;
  4818. break;
  4819. case NODE_IF:
  4820. if (!value_expr(node->nd_body)) return FALSE;
  4821. node = node->nd_else;
  4822. break;
  4823. case NODE_AND:
  4824. case NODE_OR:
  4825. cond = 1;
  4826. node = node->nd_2nd;
  4827. break;
  4828. case NODE_NEWLINE:
  4829. node = node->nd_next;
  4830. break;
  4831. default:
  4832. return TRUE;
  4833. }
  4834. }
  4835. return TRUE;
  4836. }
  4837. static void
  4838. void_expr0(node)
  4839. NODE *node;
  4840. {
  4841. const char *useless = NULL;
  4842. if (!RTEST(ruby_verbose)) return;
  4843. again:
  4844. if (!node) return;
  4845. switch (nd_type(node)) {
  4846. case NODE_NEWLINE:
  4847. node = node->nd_next;
  4848. goto again;
  4849. case NODE_CALL:
  4850. switch (node->nd_mid) {
  4851. case '+':
  4852. case '-':
  4853. case '*':
  4854. case '/':
  4855. case '%':
  4856. case tPOW:
  4857. case tUPLUS:
  4858. case tUMINUS:
  4859. case '|':
  4860. case '^':
  4861. case '&':
  4862. case tCMP:
  4863. case '>':
  4864. case tGEQ:
  4865. case '<':
  4866. case tLEQ:
  4867. case tEQ:
  4868. case tNEQ:
  4869. useless = "";
  4870. break;
  4871. }
  4872. break;
  4873. case NODE_LVAR:
  4874. case NODE_DVAR:
  4875. case NODE_GVAR:
  4876. case NODE_IVAR:
  4877. case NODE_CVAR:
  4878. case NODE_NTH_REF:
  4879. case NODE_BACK_REF:
  4880. useless = "a variable";
  4881. break;
  4882. case NODE_CONST:
  4883. case NODE_CREF:
  4884. useless = "a constant";
  4885. break;
  4886. case NODE_LIT:
  4887. case NODE_STR:
  4888. case NODE_DSTR:
  4889. case NODE_DREGX:
  4890. case NODE_DREGX_ONCE:
  4891. useless = "a literal";
  4892. break;
  4893. case NODE_COLON2:
  4894. case NODE_COLON3:
  4895. useless = "::";
  4896. break;
  4897. case NODE_DOT2:
  4898. useless = "..";
  4899. break;
  4900. case NODE_DOT3:
  4901. useless = "...";
  4902. break;
  4903. case NODE_SELF:
  4904. useless = "self";
  4905. break;
  4906. case NODE_NIL:
  4907. useless = "nil";
  4908. break;
  4909. case NODE_TRUE:
  4910. useless = "true";
  4911. break;
  4912. case NODE_FALSE:
  4913. useless = "false";
  4914. break;
  4915. case NODE_DEFINED:
  4916. useless = "defined?";
  4917. break;
  4918. }
  4919. if (useless) {
  4920. int line = ruby_sourceline;
  4921. ruby_sourceline = nd_line(node);
  4922. rb_warn("useless use of %s in void context", useless);
  4923. ruby_sourceline = line;
  4924. }
  4925. }
  4926. static void
  4927. void_stmts(node, parse_state)
  4928. NODE *node;
  4929. rb_parse_state *parse_state;
  4930. {
  4931. if (!RTEST(ruby_verbose)) return;
  4932. if (!node) return;
  4933. if (nd_type(node) != NODE_BLOCK) return;
  4934. for (;;) {
  4935. if (!node->nd_next) return;
  4936. void_expr(node->nd_head);
  4937. node = node->nd_next;
  4938. }
  4939. }
  4940. static NODE *
  4941. remove_begin(node)
  4942. NODE *node;
  4943. {
  4944. NODE **n = &node;
  4945. while (*n) {
  4946. switch (nd_type(*n)) {
  4947. case NODE_NEWLINE:
  4948. n = &(*n)->nd_next;
  4949. continue;
  4950. case NODE_BEGIN:
  4951. *n = (*n)->nd_body;
  4952. default:
  4953. return node;
  4954. }
  4955. }
  4956. return node;
  4957. }
  4958. static int
  4959. assign_in_cond(node, parse_state)
  4960. NODE *node;
  4961. rb_parse_state *parse_state;
  4962. {
  4963. switch (nd_type(node)) {
  4964. case NODE_MASGN:
  4965. yyerror("multiple assignment in conditional");
  4966. return 1;
  4967. case NODE_LASGN:
  4968. case NODE_DASGN:
  4969. case NODE_GASGN:
  4970. case NODE_IASGN:
  4971. break;
  4972. case NODE_NEWLINE:
  4973. default:
  4974. return 0;
  4975. }
  4976. switch (nd_type(node->nd_value)) {
  4977. case NODE_LIT:
  4978. case NODE_STR:
  4979. case NODE_NIL:
  4980. case NODE_TRUE:
  4981. case NODE_FALSE:
  4982. return 1;
  4983. case NODE_DSTR:
  4984. case NODE_XSTR:
  4985. case NODE_DXSTR:
  4986. case NODE_EVSTR:
  4987. case NODE_DREGX:
  4988. default:
  4989. break;
  4990. }
  4991. return 1;
  4992. }
  4993. static int
  4994. e_option_supplied()
  4995. {
  4996. if (strcmp(ruby_sourcefile, "-e") == 0)
  4997. return TRUE;
  4998. return FALSE;
  4999. }
  5000. static void
  5001. warn_unless_e_option(ps, node, str)
  5002. rb_parse_state *ps;
  5003. NODE *node;
  5004. const char *str;
  5005. {
  5006. if (!e_option_supplied()) parser_warning(ps, node, str);
  5007. }
  5008. static NODE *cond0();
  5009. static NODE*
  5010. range_op(node, parse_state)
  5011. NODE *node;
  5012. rb_parse_state *parse_state;
  5013. {
  5014. enum node_type type;
  5015. if (!e_option_supplied()) return node;
  5016. if (node == 0) return 0;
  5017. value_expr(node);
  5018. node = cond0(node, parse_state);
  5019. type = nd_type(node);
  5020. if (type == NODE_NEWLINE) {
  5021. node = node->nd_next;
  5022. type = nd_type(node);
  5023. }
  5024. if (type == NODE_LIT && FIXNUM_P(node->nd_lit)) {
  5025. warn_unless_e_option(parse_state, node, "integer literal in conditional range");
  5026. return call_op(node,tEQ,1,NEW_GVAR(rb_intern("$.")), parse_state);
  5027. }
  5028. return node;
  5029. }
  5030. static int
  5031. literal_node(node)
  5032. NODE *node;
  5033. {
  5034. if (!node) return 1; /* same as NODE_NIL */
  5035. switch (nd_type(node)) {
  5036. case NODE_LIT:
  5037. case NODE_STR:
  5038. case NODE_DSTR:
  5039. case NODE_EVSTR:
  5040. case NODE_DREGX:
  5041. case NODE_DREGX_ONCE:
  5042. case NODE_DSYM:
  5043. return 2;
  5044. case NODE_TRUE:
  5045. case NODE_FALSE:
  5046. case NODE_NIL:
  5047. return 1;
  5048. }
  5049. return 0;
  5050. }
  5051. static NODE*
  5052. cond0(node, parse_state)
  5053. NODE *node;
  5054. rb_parse_state *parse_state;
  5055. {
  5056. if (node == 0) return 0;
  5057. assign_in_cond(node, parse_state);
  5058. switch (nd_type(node)) {
  5059. case NODE_DSTR:
  5060. case NODE_EVSTR:
  5061. case NODE_STR:
  5062. break;
  5063. case NODE_DREGX:
  5064. case NODE_DREGX_ONCE:
  5065. local_cnt('_');
  5066. local_cnt('~');
  5067. return NEW_MATCH2(node, NEW_GVAR(rb_intern("$_")));
  5068. case NODE_AND:
  5069. case NODE_OR:
  5070. node->nd_1st = cond0(node->nd_1st, parse_state);
  5071. node->nd_2nd = cond0(node->nd_2nd, parse_state);
  5072. break;
  5073. case NODE_DOT2:
  5074. case NODE_DOT3:
  5075. node->nd_beg = range_op(node->nd_beg, parse_state);
  5076. node->nd_end = range_op(node->nd_end, parse_state);
  5077. if (nd_type(node) == NODE_DOT2) nd_set_type(node,NODE_FLIP2);
  5078. else if (nd_type(node) == NODE_DOT3) nd_set_type(node, NODE_FLIP3);
  5079. if (!e_option_supplied()) {
  5080. int b = literal_node(node->nd_beg);
  5081. int e = literal_node(node->nd_end);
  5082. if ((b == 1 && e == 1) || (b + e >= 2 && RTEST(ruby_verbose))) {
  5083. }
  5084. }
  5085. break;
  5086. case NODE_DSYM:
  5087. break;
  5088. case NODE_REGEX:
  5089. nd_set_type(node, NODE_MATCH);
  5090. local_cnt('_');
  5091. local_cnt('~');
  5092. default:
  5093. break;
  5094. }
  5095. return node;
  5096. }
  5097. static NODE*
  5098. cond(node, parse_state)
  5099. NODE *node;
  5100. rb_parse_state *parse_state;
  5101. {
  5102. if (node == 0) return 0;
  5103. value_expr(node);
  5104. if (nd_type(node) == NODE_NEWLINE){
  5105. node->nd_next = cond0(node->nd_next, parse_state);
  5106. return node;
  5107. }
  5108. return cond0(node, parse_state);
  5109. }
  5110. static NODE*
  5111. logop(type, left, right, parse_state)
  5112. enum node_type type;
  5113. NODE *left, *right;
  5114. rb_parse_state *parse_state;
  5115. {
  5116. value_expr(left);
  5117. if (left && nd_type(left) == type) {
  5118. NODE *node = left, *second;
  5119. while ((second = node->nd_2nd) != 0 && nd_type(second) == type) {
  5120. node = second;
  5121. }
  5122. node->nd_2nd = NEW_NODE(type, second, right, 0);
  5123. return left;
  5124. }
  5125. return NEW_NODE(type, left, right, 0);
  5126. }
  5127. static int
  5128. cond_negative(nodep)
  5129. NODE **nodep;
  5130. {
  5131. NODE *c = *nodep;
  5132. if (!c) return 0;
  5133. switch (nd_type(c)) {
  5134. case NODE_NOT:
  5135. *nodep = c->nd_body;
  5136. return 1;
  5137. case NODE_NEWLINE:
  5138. if (c->nd_next && nd_type(c->nd_next) == NODE_NOT) {
  5139. c->nd_next = c->nd_next->nd_body;
  5140. return 1;
  5141. }
  5142. }
  5143. return 0;
  5144. }
  5145. static void
  5146. no_blockarg(node)
  5147. NODE *node;
  5148. {
  5149. if (node && nd_type(node) == NODE_BLOCK_PASS) {
  5150. rb_compile_error("block argument should not be given");
  5151. }
  5152. }
  5153. static NODE *
  5154. ret_args(parse_state, node)
  5155. rb_parse_state *parse_state;
  5156. NODE *node;
  5157. {
  5158. if (node) {
  5159. no_blockarg(node);
  5160. if (nd_type(node) == NODE_ARRAY && node->nd_next == 0) {
  5161. node = node->nd_head;
  5162. }
  5163. if (node && nd_type(node) == NODE_SPLAT) {
  5164. node = NEW_SVALUE(node);
  5165. }
  5166. }
  5167. return node;
  5168. }
  5169. static NODE *
  5170. new_yield(parse_state, node)
  5171. rb_parse_state *parse_state;
  5172. NODE *node;
  5173. {
  5174. OBJECT state = Qtrue;
  5175. if (node) {
  5176. no_blockarg(node);
  5177. if (nd_type(node) == NODE_ARRAY && node->nd_next == 0) {
  5178. node = node->nd_head;
  5179. state = Qfalse;
  5180. }
  5181. if (node && nd_type(node) == NODE_SPLAT) {
  5182. state = Qtrue;
  5183. }
  5184. }
  5185. else {
  5186. state = Qfalse;
  5187. }
  5188. return NEW_YIELD(node, state);
  5189. }
  5190. static NODE *
  5191. arg_blk_pass(node1, node2)
  5192. NODE *node1;
  5193. NODE *node2;
  5194. {
  5195. if (node2) {
  5196. node2->nd_head = node1;
  5197. return node2;
  5198. }
  5199. return node1;
  5200. }
  5201. static NODE*
  5202. arg_prepend(parse_state, node1, node2)
  5203. rb_parse_state *parse_state;
  5204. NODE *node1, *node2;
  5205. {
  5206. switch (nd_type(node2)) {
  5207. case NODE_ARRAY:
  5208. return list_concat(NEW_LIST(node1), node2);
  5209. case NODE_SPLAT:
  5210. return arg_concat(parse_state, node1, node2->nd_head);
  5211. case NODE_BLOCK_PASS:
  5212. node2->nd_body = arg_prepend(parse_state, node1, node2->nd_body);
  5213. return node2;
  5214. default:
  5215. printf("unknown nodetype(%d) for arg_prepend", nd_type(node2));
  5216. abort();
  5217. }
  5218. return 0; /* not reached */
  5219. }
  5220. static NODE*
  5221. new_call(parse_state, r,m,a)
  5222. rb_parse_state *parse_state;
  5223. NODE *r;
  5224. ID m;
  5225. NODE *a;
  5226. {
  5227. if (a && nd_type(a) == NODE_BLOCK_PASS) {
  5228. a->nd_iter = NEW_CALL(r,convert_op(m),a->nd_head);
  5229. return a;
  5230. }
  5231. return NEW_CALL(r,convert_op(m),a);
  5232. }
  5233. static NODE*
  5234. new_fcall(parse_state, m,a)
  5235. rb_parse_state *parse_state;
  5236. ID m;
  5237. NODE *a;
  5238. {
  5239. if (a && nd_type(a) == NODE_BLOCK_PASS) {
  5240. a->nd_iter = NEW_FCALL(m,a->nd_head);
  5241. return a;
  5242. }
  5243. return NEW_FCALL(m,a);
  5244. }
  5245. static NODE*
  5246. new_super(parse_state,a)
  5247. rb_parse_state *parse_state;
  5248. NODE *a;
  5249. {
  5250. if (a && nd_type(a) == NODE_BLOCK_PASS) {
  5251. a->nd_iter = NEW_SUPER(a->nd_head);
  5252. return a;
  5253. }
  5254. return NEW_SUPER(a);
  5255. }
  5256. static void
  5257. syd_local_push(rb_parse_state *st, int top)
  5258. {
  5259. st->variables = var_table_push(st->variables);
  5260. }
  5261. static void
  5262. syd_local_pop(rb_parse_state *st)
  5263. {
  5264. st->variables = var_table_pop(st->variables);
  5265. }
  5266. static ID*
  5267. syd_local_tbl(rb_parse_state *st)
  5268. {
  5269. ID *lcl_tbl;
  5270. var_table tbl;
  5271. int i, len;
  5272. tbl = st->variables;
  5273. len = var_table_size(tbl);
  5274. lcl_tbl = pt_allocate(st, sizeof(ID) * (len + 3));
  5275. lcl_tbl[0] = (ID)len;
  5276. lcl_tbl[1] = '_';
  5277. lcl_tbl[2] = '~';
  5278. for(i = 0; i < len; i++) {
  5279. lcl_tbl[i + 3] = var_table_get(tbl, i);
  5280. }
  5281. return lcl_tbl;
  5282. }
  5283. static intptr_t
  5284. syd_local_cnt(rb_parse_state *st, ID id)
  5285. {
  5286. int idx;
  5287. /* Leave these hardcoded here because they arne't REALLY ids at all. */
  5288. if(id == '_') {
  5289. return 0;
  5290. } else if(id == '~') {
  5291. return 1;
  5292. }
  5293. idx = var_table_find(st->variables, id);
  5294. if(idx >= 0) return idx + 2;
  5295. return var_table_add(st->variables, id);
  5296. }
  5297. static int
  5298. syd_local_id(rb_parse_state *st, ID id)
  5299. {
  5300. if(var_table_find(st->variables, id) >= 0) return 1;
  5301. return 0;
  5302. }
  5303. static ID
  5304. rb_intern(const char *name)
  5305. {
  5306. const char *m = name;
  5307. ID id, pre, qrk, bef;
  5308. int last;
  5309. id = 0;
  5310. last = strlen(name)-1;
  5311. switch (*name) {
  5312. case '$':
  5313. id |= ID_GLOBAL;
  5314. m++;
  5315. if (!is_identchar(*m)) m++;
  5316. break;
  5317. case '@':
  5318. if (name[1] == '@') {
  5319. m++;
  5320. id |= ID_CLASS;
  5321. }
  5322. else {
  5323. id |= ID_INSTANCE;
  5324. }
  5325. m++;
  5326. break;
  5327. default:
  5328. if (name[0] != '_' && !ISALPHA(name[0]) && !ismbchar(name[0])) {
  5329. int i;
  5330. for (i=0; op_tbl[i].token; i++) {
  5331. if (*op_tbl[i].name == *name &&
  5332. strcmp(op_tbl[i].name, name) == 0) {
  5333. id = op_tbl[i].token;
  5334. return id;
  5335. }
  5336. }
  5337. }
  5338. if (name[last] == '=') {
  5339. id = ID_ATTRSET;
  5340. }
  5341. else if (ISUPPER(name[0])) {
  5342. id = ID_CONST;
  5343. }
  5344. else {
  5345. id = ID_LOCAL;
  5346. }
  5347. break;
  5348. }
  5349. while (m <= name + last && is_identchar(*m)) {
  5350. m += mbclen(*m);
  5351. }
  5352. if (*m) id = ID_JUNK;
  5353. qrk = (ID)quark_from_string(name);
  5354. pre = qrk + tLAST_TOKEN;
  5355. bef = id;
  5356. id |= ( pre << ID_SCOPE_SHIFT );
  5357. return id;
  5358. }
  5359. quark id_to_quark(ID id) {
  5360. quark qrk;
  5361. qrk = (quark)((id >> ID_SCOPE_SHIFT) - tLAST_TOKEN);
  5362. return qrk;
  5363. }
  5364. static unsigned long
  5365. scan_oct(const char *start, int len, int *retlen)
  5366. {
  5367. register const char *s = start;
  5368. register unsigned long retval = 0;
  5369. while (len-- && *s >= '0' && *s <= '7') {
  5370. retval <<= 3;
  5371. retval |= *s++ - '0';
  5372. }
  5373. *retlen = s - start;
  5374. return retval;
  5375. }
  5376. static unsigned long
  5377. scan_hex(const char *start, int len, int *retlen)
  5378. {
  5379. static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF";
  5380. register const char *s = start;
  5381. register unsigned long retval = 0;
  5382. char *tmp;
  5383. while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
  5384. retval <<= 4;
  5385. retval |= (tmp - hexdigit) & 15;
  5386. s++;
  5387. }
  5388. *retlen = s - start;
  5389. return retval;
  5390. }
  5391. const char *op_to_name(ID id) {
  5392. if(id < tLAST_TOKEN) {
  5393. int i = 0;
  5394. for (i=0; op_tbl[i].token; i++) {
  5395. if (op_tbl[i].token == id)
  5396. return op_tbl[i].name;
  5397. }
  5398. }
  5399. return NULL;
  5400. }