/contrib/one-true-awk/awkgram.y

https://bitbucket.org/freebsd/freebsd-head/ · Happy · 486 lines · 428 code · 58 blank · 0 comment · 0 complexity · 7ed81f63e69cf4099898bc879adc84c4 MD5 · raw file

  1. /****************************************************************
  2. Copyright (C) Lucent Technologies 1997
  3. All Rights Reserved
  4. Permission to use, copy, modify, and distribute this software and
  5. its documentation for any purpose and without fee is hereby
  6. granted, provided that the above copyright notice appear in all
  7. copies and that both that the copyright notice and this
  8. permission notice and warranty disclaimer appear in supporting
  9. documentation, and that the name Lucent Technologies or any of
  10. its entities not be used in advertising or publicity pertaining
  11. to distribution of the software without specific, written prior
  12. permission.
  13. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
  14. INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
  15. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
  16. SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  17. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  18. IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  19. ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
  20. THIS SOFTWARE.
  21. ****************************************************************/
  22. %{
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include "awk.h"
  26. void checkdup(Node *list, Cell *item);
  27. int yywrap(void) { return(1); }
  28. Node *beginloc = 0;
  29. Node *endloc = 0;
  30. int infunc = 0; /* = 1 if in arglist or body of func */
  31. int inloop = 0; /* = 1 if in while, for, do */
  32. char *curfname = 0; /* current function name */
  33. Node *arglist = 0; /* list of args for current function */
  34. %}
  35. %union {
  36. Node *p;
  37. Cell *cp;
  38. int i;
  39. char *s;
  40. }
  41. %token <i> FIRSTTOKEN /* must be first */
  42. %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
  43. %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
  44. %token <i> ARRAY
  45. %token <i> MATCH NOTMATCH MATCHOP
  46. %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
  47. %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
  48. %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
  49. %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
  50. %token <i> ADD MINUS MULT DIVIDE MOD
  51. %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
  52. %token <i> PRINT PRINTF SPRINTF
  53. %token <p> ELSE INTEST CONDEXPR
  54. %token <i> POSTINCR PREINCR POSTDECR PREDECR
  55. %token <cp> VAR IVAR VARNF CALL NUMBER STRING
  56. %token <s> REGEXPR
  57. %type <p> pas pattern ppattern plist pplist patlist prarg term re
  58. %type <p> pa_pat pa_stat pa_stats
  59. %type <s> reg_expr
  60. %type <p> simple_stmt opt_simple_stmt stmt stmtlist
  61. %type <p> var varname funcname varlist
  62. %type <p> for if else while
  63. %type <i> do st
  64. %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
  65. %type <i> subop print
  66. %right ASGNOP
  67. %right '?'
  68. %right ':'
  69. %left BOR
  70. %left AND
  71. %left GETLINE
  72. %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
  73. %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
  74. %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
  75. %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
  76. %left REGEXPR VAR VARNF IVAR WHILE '('
  77. %left CAT
  78. %left '+' '-'
  79. %left '*' '/' '%'
  80. %left NOT UMINUS
  81. %right POWER
  82. %right DECR INCR
  83. %left INDIRECT
  84. %token LASTTOKEN /* must be last */
  85. %%
  86. program:
  87. pas { if (errorflag==0)
  88. winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
  89. | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
  90. ;
  91. and:
  92. AND | and NL
  93. ;
  94. bor:
  95. BOR | bor NL
  96. ;
  97. comma:
  98. ',' | comma NL
  99. ;
  100. do:
  101. DO | do NL
  102. ;
  103. else:
  104. ELSE | else NL
  105. ;
  106. for:
  107. FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
  108. { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
  109. | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
  110. { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
  111. | FOR '(' varname IN varname rparen {inloop++;} stmt
  112. { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
  113. ;
  114. funcname:
  115. VAR { setfname($1); }
  116. | CALL { setfname($1); }
  117. ;
  118. if:
  119. IF '(' pattern rparen { $$ = notnull($3); }
  120. ;
  121. lbrace:
  122. '{' | lbrace NL
  123. ;
  124. nl:
  125. NL | nl NL
  126. ;
  127. opt_nl:
  128. /* empty */ { $$ = 0; }
  129. | nl
  130. ;
  131. opt_pst:
  132. /* empty */ { $$ = 0; }
  133. | pst
  134. ;
  135. opt_simple_stmt:
  136. /* empty */ { $$ = 0; }
  137. | simple_stmt
  138. ;
  139. pas:
  140. opt_pst { $$ = 0; }
  141. | opt_pst pa_stats opt_pst { $$ = $2; }
  142. ;
  143. pa_pat:
  144. pattern { $$ = notnull($1); }
  145. ;
  146. pa_stat:
  147. pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
  148. | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
  149. | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
  150. | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
  151. | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
  152. | XBEGIN lbrace stmtlist '}'
  153. { beginloc = linkum(beginloc, $3); $$ = 0; }
  154. | XEND lbrace stmtlist '}'
  155. { endloc = linkum(endloc, $3); $$ = 0; }
  156. | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
  157. { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
  158. ;
  159. pa_stats:
  160. pa_stat
  161. | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
  162. ;
  163. patlist:
  164. pattern
  165. | patlist comma pattern { $$ = linkum($1, $3); }
  166. ;
  167. ppattern:
  168. var ASGNOP ppattern { $$ = op2($2, $1, $3); }
  169. | ppattern '?' ppattern ':' ppattern %prec '?'
  170. { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
  171. | ppattern bor ppattern %prec BOR
  172. { $$ = op2(BOR, notnull($1), notnull($3)); }
  173. | ppattern and ppattern %prec AND
  174. { $$ = op2(AND, notnull($1), notnull($3)); }
  175. | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
  176. | ppattern MATCHOP ppattern
  177. { if (constnode($3))
  178. $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
  179. else
  180. $$ = op3($2, (Node *)1, $1, $3); }
  181. | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
  182. | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
  183. | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
  184. | re
  185. | term
  186. ;
  187. pattern:
  188. var ASGNOP pattern { $$ = op2($2, $1, $3); }
  189. | pattern '?' pattern ':' pattern %prec '?'
  190. { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
  191. | pattern bor pattern %prec BOR
  192. { $$ = op2(BOR, notnull($1), notnull($3)); }
  193. | pattern and pattern %prec AND
  194. { $$ = op2(AND, notnull($1), notnull($3)); }
  195. | pattern EQ pattern { $$ = op2($2, $1, $3); }
  196. | pattern GE pattern { $$ = op2($2, $1, $3); }
  197. | pattern GT pattern { $$ = op2($2, $1, $3); }
  198. | pattern LE pattern { $$ = op2($2, $1, $3); }
  199. | pattern LT pattern { $$ = op2($2, $1, $3); }
  200. | pattern NE pattern { $$ = op2($2, $1, $3); }
  201. | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
  202. | pattern MATCHOP pattern
  203. { if (constnode($3))
  204. $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
  205. else
  206. $$ = op3($2, (Node *)1, $1, $3); }
  207. | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
  208. | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
  209. | pattern '|' GETLINE var {
  210. if (safe) SYNTAX("cmd | getline is unsafe");
  211. else $$ = op3(GETLINE, $4, itonp($2), $1); }
  212. | pattern '|' GETLINE {
  213. if (safe) SYNTAX("cmd | getline is unsafe");
  214. else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
  215. | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
  216. | re
  217. | term
  218. ;
  219. plist:
  220. pattern comma pattern { $$ = linkum($1, $3); }
  221. | plist comma pattern { $$ = linkum($1, $3); }
  222. ;
  223. pplist:
  224. ppattern
  225. | pplist comma ppattern { $$ = linkum($1, $3); }
  226. ;
  227. prarg:
  228. /* empty */ { $$ = rectonode(); }
  229. | pplist
  230. | '(' plist ')' { $$ = $2; }
  231. ;
  232. print:
  233. PRINT | PRINTF
  234. ;
  235. pst:
  236. NL | ';' | pst NL | pst ';'
  237. ;
  238. rbrace:
  239. '}' | rbrace NL
  240. ;
  241. re:
  242. reg_expr
  243. { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
  244. | NOT re { $$ = op1(NOT, notnull($2)); }
  245. ;
  246. reg_expr:
  247. '/' {startreg();} REGEXPR '/' { $$ = $3; }
  248. ;
  249. rparen:
  250. ')' | rparen NL
  251. ;
  252. simple_stmt:
  253. print prarg '|' term {
  254. if (safe) SYNTAX("print | is unsafe");
  255. else $$ = stat3($1, $2, itonp($3), $4); }
  256. | print prarg APPEND term {
  257. if (safe) SYNTAX("print >> is unsafe");
  258. else $$ = stat3($1, $2, itonp($3), $4); }
  259. | print prarg GT term {
  260. if (safe) SYNTAX("print > is unsafe");
  261. else $$ = stat3($1, $2, itonp($3), $4); }
  262. | print prarg { $$ = stat3($1, $2, NIL, NIL); }
  263. | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
  264. | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
  265. | pattern { $$ = exptostat($1); }
  266. | error { yyclearin; SYNTAX("illegal statement"); }
  267. ;
  268. st:
  269. nl
  270. | ';' opt_nl
  271. ;
  272. stmt:
  273. BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
  274. $$ = stat1(BREAK, NIL); }
  275. | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
  276. $$ = stat1(CONTINUE, NIL); }
  277. | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
  278. { $$ = stat2(DO, $3, notnull($7)); }
  279. | EXIT pattern st { $$ = stat1(EXIT, $2); }
  280. | EXIT st { $$ = stat1(EXIT, NIL); }
  281. | for
  282. | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
  283. | if stmt { $$ = stat3(IF, $1, $2, NIL); }
  284. | lbrace stmtlist rbrace { $$ = $2; }
  285. | NEXT st { if (infunc)
  286. SYNTAX("next is illegal inside a function");
  287. $$ = stat1(NEXT, NIL); }
  288. | NEXTFILE st { if (infunc)
  289. SYNTAX("nextfile is illegal inside a function");
  290. $$ = stat1(NEXTFILE, NIL); }
  291. | RETURN pattern st { $$ = stat1(RETURN, $2); }
  292. | RETURN st { $$ = stat1(RETURN, NIL); }
  293. | simple_stmt st
  294. | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
  295. | ';' opt_nl { $$ = 0; }
  296. ;
  297. stmtlist:
  298. stmt
  299. | stmtlist stmt { $$ = linkum($1, $2); }
  300. ;
  301. subop:
  302. SUB | GSUB
  303. ;
  304. term:
  305. term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
  306. | term '+' term { $$ = op2(ADD, $1, $3); }
  307. | term '-' term { $$ = op2(MINUS, $1, $3); }
  308. | term '*' term { $$ = op2(MULT, $1, $3); }
  309. | term '/' term { $$ = op2(DIVIDE, $1, $3); }
  310. | term '%' term { $$ = op2(MOD, $1, $3); }
  311. | term POWER term { $$ = op2(POWER, $1, $3); }
  312. | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
  313. | '+' term %prec UMINUS { $$ = $2; }
  314. | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
  315. | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
  316. | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
  317. | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
  318. | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
  319. | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
  320. | CLOSE term { $$ = op1(CLOSE, $2); }
  321. | DECR var { $$ = op1(PREDECR, $2); }
  322. | INCR var { $$ = op1(PREINCR, $2); }
  323. | var DECR { $$ = op1(POSTDECR, $1); }
  324. | var INCR { $$ = op1(POSTINCR, $1); }
  325. | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
  326. | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
  327. | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
  328. | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
  329. | INDEX '(' pattern comma pattern ')'
  330. { $$ = op2(INDEX, $3, $5); }
  331. | INDEX '(' pattern comma reg_expr ')'
  332. { SYNTAX("index() doesn't permit regular expressions");
  333. $$ = op2(INDEX, $3, (Node*)$5); }
  334. | '(' pattern ')' { $$ = $2; }
  335. | MATCHFCN '(' pattern comma reg_expr ')'
  336. { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
  337. | MATCHFCN '(' pattern comma pattern ')'
  338. { if (constnode($5))
  339. $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
  340. else
  341. $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
  342. | NUMBER { $$ = celltonode($1, CCON); }
  343. | SPLIT '(' pattern comma varname comma pattern ')' /* string */
  344. { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
  345. | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
  346. { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
  347. | SPLIT '(' pattern comma varname ')'
  348. { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
  349. | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
  350. | STRING { $$ = celltonode($1, CCON); }
  351. | subop '(' reg_expr comma pattern ')'
  352. { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
  353. | subop '(' pattern comma pattern ')'
  354. { if (constnode($3))
  355. $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
  356. else
  357. $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
  358. | subop '(' reg_expr comma pattern comma var ')'
  359. { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
  360. | subop '(' pattern comma pattern comma var ')'
  361. { if (constnode($3))
  362. $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
  363. else
  364. $$ = op4($1, (Node *)1, $3, $5, $7); }
  365. | SUBSTR '(' pattern comma pattern comma pattern ')'
  366. { $$ = op3(SUBSTR, $3, $5, $7); }
  367. | SUBSTR '(' pattern comma pattern ')'
  368. { $$ = op3(SUBSTR, $3, $5, NIL); }
  369. | var
  370. ;
  371. var:
  372. varname
  373. | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
  374. | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
  375. | INDIRECT term { $$ = op1(INDIRECT, $2); }
  376. ;
  377. varlist:
  378. /* nothing */ { arglist = $$ = 0; }
  379. | VAR { arglist = $$ = celltonode($1,CVAR); }
  380. | varlist comma VAR {
  381. checkdup($1, $3);
  382. arglist = $$ = linkum($1,celltonode($3,CVAR)); }
  383. ;
  384. varname:
  385. VAR { $$ = celltonode($1, CVAR); }
  386. | ARG { $$ = op1(ARG, itonp($1)); }
  387. | VARNF { $$ = op1(VARNF, (Node *) $1); }
  388. ;
  389. while:
  390. WHILE '(' pattern rparen { $$ = notnull($3); }
  391. ;
  392. %%
  393. void setfname(Cell *p)
  394. {
  395. if (isarr(p))
  396. SYNTAX("%s is an array, not a function", p->nval);
  397. else if (isfcn(p))
  398. SYNTAX("you can't define function %s more than once", p->nval);
  399. curfname = p->nval;
  400. }
  401. int constnode(Node *p)
  402. {
  403. return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
  404. }
  405. char *strnode(Node *p)
  406. {
  407. return ((Cell *)(p->narg[0]))->sval;
  408. }
  409. Node *notnull(Node *n)
  410. {
  411. switch (n->nobj) {
  412. case LE: case LT: case EQ: case NE: case GT: case GE:
  413. case BOR: case AND: case NOT:
  414. return n;
  415. default:
  416. return op2(NE, n, nullnode);
  417. }
  418. }
  419. void checkdup(Node *vl, Cell *cp) /* check if name already in list */
  420. {
  421. char *s = cp->nval;
  422. for ( ; vl; vl = vl->nnext) {
  423. if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
  424. SYNTAX("duplicate argument %s", s);
  425. break;
  426. }
  427. }
  428. }