PageRenderTime 24ms CodeModel.GetById 13ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/contrib/one-true-awk/awkgram.y

https://bitbucket.org/freebsd/freebsd-head/
Happy | 486 lines | 428 code | 58 blank | 0 comment | 0 complexity | 7ed81f63e69cf4099898bc879adc84c4 MD5 | raw file
  1/****************************************************************
  2Copyright (C) Lucent Technologies 1997
  3All Rights Reserved
  4
  5Permission to use, copy, modify, and distribute this software and
  6its documentation for any purpose and without fee is hereby
  7granted, provided that the above copyright notice appear in all
  8copies and that both that the copyright notice and this
  9permission notice and warranty disclaimer appear in supporting
 10documentation, and that the name Lucent Technologies or any of
 11its entities not be used in advertising or publicity pertaining
 12to distribution of the software without specific, written prior
 13permission.
 14
 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 22THIS SOFTWARE.
 23****************************************************************/
 24
 25%{
 26#include <stdio.h>
 27#include <string.h>
 28#include "awk.h"
 29
 30void checkdup(Node *list, Cell *item);
 31int yywrap(void) { return(1); }
 32
 33Node	*beginloc = 0;
 34Node	*endloc = 0;
 35int	infunc	= 0;	/* = 1 if in arglist or body of func */
 36int	inloop	= 0;	/* = 1 if in while, for, do */
 37char	*curfname = 0;	/* current function name */
 38Node	*arglist = 0;	/* list of args for current function */
 39%}
 40
 41%union {
 42	Node	*p;
 43	Cell	*cp;
 44	int	i;
 45	char	*s;
 46}
 47
 48%token	<i>	FIRSTTOKEN	/* must be first */
 49%token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
 50%token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
 51%token	<i>	ARRAY
 52%token	<i>	MATCH NOTMATCH MATCHOP
 53%token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
 54%token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
 55%token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
 56%token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
 57%token	<i>	ADD MINUS MULT DIVIDE MOD
 58%token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
 59%token	<i>	PRINT PRINTF SPRINTF
 60%token	<p>	ELSE INTEST CONDEXPR
 61%token	<i>	POSTINCR PREINCR POSTDECR PREDECR
 62%token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
 63%token	<s>	REGEXPR
 64
 65%type	<p>	pas pattern ppattern plist pplist patlist prarg term re
 66%type	<p>	pa_pat pa_stat pa_stats
 67%type	<s>	reg_expr
 68%type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
 69%type	<p>	var varname funcname varlist
 70%type	<p>	for if else while
 71%type	<i>	do st
 72%type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
 73%type	<i>	subop print
 74
 75%right	ASGNOP
 76%right	'?'
 77%right	':'
 78%left	BOR
 79%left	AND
 80%left	GETLINE
 81%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
 82%left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
 83%left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
 84%left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
 85%left	REGEXPR VAR VARNF IVAR WHILE '('
 86%left	CAT
 87%left	'+' '-'
 88%left	'*' '/' '%'
 89%left	NOT UMINUS
 90%right	POWER
 91%right	DECR INCR
 92%left	INDIRECT
 93%token	LASTTOKEN	/* must be last */
 94
 95%%
 96
 97program:
 98	  pas	{ if (errorflag==0)
 99			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
100	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
101	;
102
103and:
104	  AND | and NL
105	;
106
107bor:
108	  BOR | bor NL
109	;
110
111comma:
112	  ',' | comma NL
113	;
114
115do:
116	  DO | do NL
117	;
118
119else:
120	  ELSE | else NL
121	;
122
123for:
124	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
125		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
126	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
128	| FOR '(' varname IN varname rparen {inloop++;} stmt
129		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
130	;
131
132funcname:
133	  VAR	{ setfname($1); }
134	| CALL	{ setfname($1); }
135	;
136
137if:
138	  IF '(' pattern rparen		{ $$ = notnull($3); }
139	;
140
141lbrace:
142	  '{' | lbrace NL
143	;
144
145nl:
146	  NL | nl NL
147	;
148
149opt_nl:
150	  /* empty */	{ $$ = 0; }
151	| nl
152	;
153
154opt_pst:
155	  /* empty */	{ $$ = 0; }
156	| pst
157	;
158
159
160opt_simple_stmt:
161	  /* empty */			{ $$ = 0; }
162	| simple_stmt
163	;
164
165pas:
166	  opt_pst			{ $$ = 0; }
167	| opt_pst pa_stats opt_pst	{ $$ = $2; }
168	;
169
170pa_pat:
171	  pattern	{ $$ = notnull($1); }
172	;
173
174pa_stat:
175	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
176	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
177	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
178	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
179	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
180	| XBEGIN lbrace stmtlist '}'
181		{ beginloc = linkum(beginloc, $3); $$ = 0; }
182	| XEND lbrace stmtlist '}'
183		{ endloc = linkum(endloc, $3); $$ = 0; }
184	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
185		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
186	;
187
188pa_stats:
189	  pa_stat
190	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
191	;
192
193patlist:
194	  pattern
195	| patlist comma pattern		{ $$ = linkum($1, $3); }
196	;
197
198ppattern:
199	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
200	| ppattern '?' ppattern ':' ppattern %prec '?'
201	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
202	| ppattern bor ppattern %prec BOR
203		{ $$ = op2(BOR, notnull($1), notnull($3)); }
204	| ppattern and ppattern %prec AND
205		{ $$ = op2(AND, notnull($1), notnull($3)); }
206	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
207	| ppattern MATCHOP ppattern
208		{ if (constnode($3))
209			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
210		  else
211			$$ = op3($2, (Node *)1, $1, $3); }
212	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
213	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
214	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
215	| re
216	| term
217	;
218
219pattern:
220	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
221	| pattern '?' pattern ':' pattern %prec '?'
222	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
223	| pattern bor pattern %prec BOR
224		{ $$ = op2(BOR, notnull($1), notnull($3)); }
225	| pattern and pattern %prec AND
226		{ $$ = op2(AND, notnull($1), notnull($3)); }
227	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
228	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
229	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
230	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
231	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
232	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
233	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
234	| pattern MATCHOP pattern
235		{ if (constnode($3))
236			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
237		  else
238			$$ = op3($2, (Node *)1, $1, $3); }
239	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
240	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
241	| pattern '|' GETLINE var	{ 
242			if (safe) SYNTAX("cmd | getline is unsafe");
243			else $$ = op3(GETLINE, $4, itonp($2), $1); }
244	| pattern '|' GETLINE		{ 
245			if (safe) SYNTAX("cmd | getline is unsafe");
246			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
247	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
248	| re
249	| term
250	;
251
252plist:
253	  pattern comma pattern		{ $$ = linkum($1, $3); }
254	| plist comma pattern		{ $$ = linkum($1, $3); }
255	;
256
257pplist:
258	  ppattern
259	| pplist comma ppattern		{ $$ = linkum($1, $3); }
260	;
261
262prarg:
263	  /* empty */			{ $$ = rectonode(); }
264	| pplist
265	| '(' plist ')'			{ $$ = $2; }
266	;
267
268print:
269	  PRINT | PRINTF
270	;
271
272pst:
273	  NL | ';' | pst NL | pst ';'
274	;
275
276rbrace:
277	  '}' | rbrace NL
278	;
279
280re:
281	   reg_expr
282		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
283	| NOT re	{ $$ = op1(NOT, notnull($2)); }
284	;
285
286reg_expr:
287	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
288	;
289
290rparen:
291	  ')' | rparen NL
292	;
293
294simple_stmt:
295	  print prarg '|' term		{ 
296			if (safe) SYNTAX("print | is unsafe");
297			else $$ = stat3($1, $2, itonp($3), $4); }
298	| print prarg APPEND term	{
299			if (safe) SYNTAX("print >> is unsafe");
300			else $$ = stat3($1, $2, itonp($3), $4); }
301	| print prarg GT term		{
302			if (safe) SYNTAX("print > is unsafe");
303			else $$ = stat3($1, $2, itonp($3), $4); }
304	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
305	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
306	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
307	| pattern			{ $$ = exptostat($1); }
308	| error				{ yyclearin; SYNTAX("illegal statement"); }
309	;
310
311st:
312	  nl
313	| ';' opt_nl
314	;
315
316stmt:
317	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
318				  $$ = stat1(BREAK, NIL); }
319	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
320				  $$ = stat1(CONTINUE, NIL); }
321	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
322		{ $$ = stat2(DO, $3, notnull($7)); }
323	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
324	| EXIT st		{ $$ = stat1(EXIT, NIL); }
325	| for
326	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
327	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
328	| lbrace stmtlist rbrace { $$ = $2; }
329	| NEXT st	{ if (infunc)
330				SYNTAX("next is illegal inside a function");
331			  $$ = stat1(NEXT, NIL); }
332	| NEXTFILE st	{ if (infunc)
333				SYNTAX("nextfile is illegal inside a function");
334			  $$ = stat1(NEXTFILE, NIL); }
335	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
336	| RETURN st		{ $$ = stat1(RETURN, NIL); }
337	| simple_stmt st
338	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
339	| ';' opt_nl		{ $$ = 0; }
340	;
341
342stmtlist:
343	  stmt
344	| stmtlist stmt		{ $$ = linkum($1, $2); }
345	;
346
347subop:
348	  SUB | GSUB
349	;
350
351term:
352 	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
353 	| term '+' term			{ $$ = op2(ADD, $1, $3); }
354	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
355	| term '*' term			{ $$ = op2(MULT, $1, $3); }
356	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
357	| term '%' term			{ $$ = op2(MOD, $1, $3); }
358	| term POWER term		{ $$ = op2(POWER, $1, $3); }
359	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
360	| '+' term %prec UMINUS		{ $$ = $2; }
361	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
362	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
363	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
364	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
365	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
366	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
367	| CLOSE term			{ $$ = op1(CLOSE, $2); }
368	| DECR var			{ $$ = op1(PREDECR, $2); }
369	| INCR var			{ $$ = op1(PREINCR, $2); }
370	| var DECR			{ $$ = op1(POSTDECR, $1); }
371	| var INCR			{ $$ = op1(POSTINCR, $1); }
372	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
373	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
374	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
375	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
376	| INDEX '(' pattern comma pattern ')'
377		{ $$ = op2(INDEX, $3, $5); }
378	| INDEX '(' pattern comma reg_expr ')'
379		{ SYNTAX("index() doesn't permit regular expressions");
380		  $$ = op2(INDEX, $3, (Node*)$5); }
381	| '(' pattern ')'		{ $$ = $2; }
382	| MATCHFCN '(' pattern comma reg_expr ')'
383		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
384	| MATCHFCN '(' pattern comma pattern ')'
385		{ if (constnode($5))
386			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
387		  else
388			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
389	| NUMBER			{ $$ = celltonode($1, CCON); }
390	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
391		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
392	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
393		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
394	| SPLIT '(' pattern comma varname ')'
395		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
396	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
397	| STRING	 		{ $$ = celltonode($1, CCON); }
398	| subop '(' reg_expr comma pattern ')'
399		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
400	| subop '(' pattern comma pattern ')'
401		{ if (constnode($3))
402			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
403		  else
404			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
405	| subop '(' reg_expr comma pattern comma var ')'
406		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
407	| subop '(' pattern comma pattern comma var ')'
408		{ if (constnode($3))
409			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
410		  else
411			$$ = op4($1, (Node *)1, $3, $5, $7); }
412	| SUBSTR '(' pattern comma pattern comma pattern ')'
413		{ $$ = op3(SUBSTR, $3, $5, $7); }
414	| SUBSTR '(' pattern comma pattern ')'
415		{ $$ = op3(SUBSTR, $3, $5, NIL); }
416	| var
417	;
418
419var:
420	  varname
421	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
422	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
423	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
424	;	
425
426varlist:
427	  /* nothing */		{ arglist = $$ = 0; }
428	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
429	| varlist comma VAR	{
430			checkdup($1, $3);
431			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
432	;
433
434varname:
435	  VAR			{ $$ = celltonode($1, CVAR); }
436	| ARG 			{ $$ = op1(ARG, itonp($1)); }
437	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
438	;
439
440
441while:
442	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
443	;
444
445%%
446
447void setfname(Cell *p)
448{
449	if (isarr(p))
450		SYNTAX("%s is an array, not a function", p->nval);
451	else if (isfcn(p))
452		SYNTAX("you can't define function %s more than once", p->nval);
453	curfname = p->nval;
454}
455
456int constnode(Node *p)
457{
458	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
459}
460
461char *strnode(Node *p)
462{
463	return ((Cell *)(p->narg[0]))->sval;
464}
465
466Node *notnull(Node *n)
467{
468	switch (n->nobj) {
469	case LE: case LT: case EQ: case NE: case GT: case GE:
470	case BOR: case AND: case NOT:
471		return n;
472	default:
473		return op2(NE, n, nullnode);
474	}
475}
476
477void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
478{
479	char *s = cp->nval;
480	for ( ; vl; vl = vl->nnext) {
481		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
482			SYNTAX("duplicate argument %s", s);
483			break;
484		}
485	}
486}