PageRenderTime 40ms CodeModel.GetById 12ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 1ms

/usr.bin/lex/parse.y

https://bitbucket.org/freebsd/freebsd-head/
Happy | 914 lines | 737 code | 177 blank | 0 comment | 0 complexity | 841cb6e964918a1d0493427b94ccda50 MD5 | raw file
  1/* parse.y - parser for flex input */
  2
  3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
  4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
  5
  6%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
  7%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
  8
  9%{
 10/*-
 11 * Copyright (c) 1990 The Regents of the University of California.
 12 * All rights reserved.
 13 *
 14 * This code is derived from software contributed to Berkeley by
 15 * Vern Paxson.
 16 * 
 17 * The United States Government has rights in this work pursuant
 18 * to contract no. DE-AC03-76SF00098 between the United States
 19 * Department of Energy and the University of California.
 20 *
 21 * Redistribution and use in source and binary forms are permitted provided
 22 * that: (1) source distributions retain this entire copyright notice and
 23 * comment, and (2) distributions including binaries display the following
 24 * acknowledgement:  ``This product includes software developed by the
 25 * University of California, Berkeley and its contributors'' in the
 26 * documentation or other materials provided with the distribution and in
 27 * all advertising materials mentioning features or use of this software.
 28 * Neither the name of the University nor the names of its contributors may
 29 * be used to endorse or promote products derived from this software without
 30 * specific prior written permission.
 31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 34 */
 35
 36/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
 37/* $FreeBSD$ */
 38
 39
 40/* Some versions of bison are broken in that they use alloca() but don't
 41 * declare it properly.  The following is the patented (just kidding!)
 42 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
 43 */
 44#ifdef YYBISON
 45/* AIX requires this to be the first thing in the file.  What a piece.  */
 46# ifdef _AIX
 47 #pragma alloca
 48# endif
 49#endif
 50
 51#include "flexdef.h"
 52
 53/* The remainder of the alloca() cruft has to come after including flexdef.h,
 54 * so HAVE_ALLOCA_H is (possibly) defined.
 55 */
 56#ifdef YYBISON
 57# ifdef __GNUC__
 58#  ifndef alloca
 59#   define alloca __builtin_alloca
 60#  endif
 61# else
 62#  if HAVE_ALLOCA_H
 63#   include <alloca.h>
 64#  else
 65#   ifdef __hpux
 66void *alloca ();
 67#   else
 68#    ifdef __TURBOC__
 69#     include <malloc.h>
 70#    else
 71char *alloca ();
 72#    endif
 73#   endif
 74#  endif
 75# endif
 76#endif
 77
 78/* Bletch, ^^^^ that was ugly! */
 79
 80
 81int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
 82int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
 83
 84int *scon_stk;
 85int scon_stk_ptr;
 86
 87static int madeany = false;  /* whether we've made the '.' character class */
 88int previous_continued_action;	/* whether the previous rule's action was '|' */
 89
 90/* Expand a POSIX character class expression. */
 91#define CCL_EXPR(func) \
 92	{ \
 93	int c; \
 94	for ( c = 0; c < csize; ++c ) \
 95		if ( isascii(c) && func(c) ) \
 96			ccladd( currccl, c ); \
 97	}
 98
 99/* While POSIX defines isblank(), it's not ANSI C. */
100#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
101
102/* On some over-ambitious machines, such as DEC Alpha's, the default
103 * token type is "long" instead of "int"; this leads to problems with
104 * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
105 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
106 * following should ensure that the default token type is "int".
107 */
108#define YYSTYPE int
109
110%}
111
112%%
113goal		:  initlex sect1 sect1end sect2 initforrule
114			{ /* add default rule */
115			int def_rule;
116
117			pat = cclinit();
118			cclnegate( pat );
119
120			def_rule = mkstate( -pat );
121
122			/* Remember the number of the default rule so we
123			 * don't generate "can't match" warnings for it.
124			 */
125			default_rule = num_rules;
126
127			finish_rule( def_rule, false, 0, 0 );
128
129			for ( i = 1; i <= lastsc; ++i )
130				scset[i] = mkbranch( scset[i], def_rule );
131
132			if ( spprdflt )
133				add_action(
134				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
135			else
136				add_action( "ECHO" );
137
138			add_action( ";\n\tYY_BREAK\n" );
139			}
140		;
141
142initlex		:
143			{ /* initialize for processing rules */
144
145			/* Create default DFA start condition. */
146			scinstal( "INITIAL", false );
147			}
148		;
149
150sect1		:  sect1 startconddecl namelist1
151		|  sect1 options
152		|
153		|  error
154			{ synerr( "unknown error processing section 1" ); }
155		;
156
157sect1end	:  SECTEND
158			{
159			check_options();
160			scon_stk = allocate_integer_array( lastsc + 1 );
161			scon_stk_ptr = 0;
162			}
163		;
164
165startconddecl	:  SCDECL
166			{ xcluflg = false; }
167
168		|  XSCDECL
169			{ xcluflg = true; }
170		;
171
172namelist1	:  namelist1 NAME
173			{ scinstal( nmstr, xcluflg ); }
174
175		|  NAME
176			{ scinstal( nmstr, xcluflg ); }
177
178		|  error
179			{ synerr( "bad start condition list" ); }
180		;
181
182options		:  OPTION_OP optionlist
183		;
184
185optionlist	:  optionlist option
186		|
187		;
188
189option		:  OPT_OUTFILE '=' NAME
190			{
191			outfilename = copy_string( nmstr );
192			did_outfilename = 1;
193			}
194		|  OPT_PREFIX '=' NAME
195			{ prefix = copy_string( nmstr ); }
196		|  OPT_YYCLASS '=' NAME
197			{ yyclass = copy_string( nmstr ); }
198		;
199
200sect2		:  sect2 scon initforrule flexrule '\n'
201			{ scon_stk_ptr = $2; }
202		|  sect2 scon '{' sect2 '}'
203			{ scon_stk_ptr = $2; }
204		|
205		;
206
207initforrule	:
208			{
209			/* Initialize for a parse of one rule. */
210			trlcontxt = variable_trail_rule = varlength = false;
211			trailcnt = headcnt = rulelen = 0;
212			current_state_type = STATE_NORMAL;
213			previous_continued_action = continued_action;
214			in_rule = true;
215
216			new_rule();
217			}
218		;
219
220flexrule	:  '^' rule
221			{
222			pat = $2;
223			finish_rule( pat, variable_trail_rule,
224				headcnt, trailcnt );
225
226			if ( scon_stk_ptr > 0 )
227				{
228				for ( i = 1; i <= scon_stk_ptr; ++i )
229					scbol[scon_stk[i]] =
230						mkbranch( scbol[scon_stk[i]],
231								pat );
232				}
233
234			else
235				{
236				/* Add to all non-exclusive start conditions,
237				 * including the default (0) start condition.
238				 */
239
240				for ( i = 1; i <= lastsc; ++i )
241					if ( ! scxclu[i] )
242						scbol[i] = mkbranch( scbol[i],
243									pat );
244				}
245
246			if ( ! bol_needed )
247				{
248				bol_needed = true;
249
250				if ( performance_report > 1 )
251					pinpoint_message(
252			"'^' operator results in sub-optimal performance" );
253				}
254			}
255
256		|  rule
257			{
258			pat = $1;
259			finish_rule( pat, variable_trail_rule,
260				headcnt, trailcnt );
261
262			if ( scon_stk_ptr > 0 )
263				{
264				for ( i = 1; i <= scon_stk_ptr; ++i )
265					scset[scon_stk[i]] =
266						mkbranch( scset[scon_stk[i]],
267								pat );
268				}
269
270			else
271				{
272				for ( i = 1; i <= lastsc; ++i )
273					if ( ! scxclu[i] )
274						scset[i] =
275							mkbranch( scset[i],
276								pat );
277				}
278			}
279
280		|  EOF_OP
281			{
282			if ( scon_stk_ptr > 0 )
283				build_eof_action();
284	
285			else
286				{
287				/* This EOF applies to all start conditions
288				 * which don't already have EOF actions.
289				 */
290				for ( i = 1; i <= lastsc; ++i )
291					if ( ! sceof[i] )
292						scon_stk[++scon_stk_ptr] = i;
293
294				if ( scon_stk_ptr == 0 )
295					warn(
296			"all start conditions already have <<EOF>> rules" );
297
298				else
299					build_eof_action();
300				}
301			}
302
303		|  error
304			{ synerr( "unrecognized rule" ); }
305		;
306
307scon_stk_ptr	:
308			{ $$ = scon_stk_ptr; }
309		;
310
311scon		:  '<' scon_stk_ptr namelist2 '>'
312			{ $$ = $2; }
313
314		|  '<' '*' '>'
315			{
316			$$ = scon_stk_ptr;
317
318			for ( i = 1; i <= lastsc; ++i )
319				{
320				int j;
321
322				for ( j = 1; j <= scon_stk_ptr; ++j )
323					if ( scon_stk[j] == i )
324						break;
325
326				if ( j > scon_stk_ptr )
327					scon_stk[++scon_stk_ptr] = i;
328				}
329			}
330
331		|
332			{ $$ = scon_stk_ptr; }
333		;
334
335namelist2	:  namelist2 ',' sconname
336
337		|  sconname
338
339		|  error
340			{ synerr( "bad start condition list" ); }
341		;
342
343sconname	:  NAME
344			{
345			if ( (scnum = sclookup( nmstr )) == 0 )
346				format_pinpoint_message(
347					"undeclared start condition %s",
348					nmstr );
349			else
350				{
351				for ( i = 1; i <= scon_stk_ptr; ++i )
352					if ( scon_stk[i] == scnum )
353						{
354						format_warn(
355							"<%s> specified twice",
356							scname[scnum] );
357						break;
358						}
359
360				if ( i > scon_stk_ptr )
361					scon_stk[++scon_stk_ptr] = scnum;
362				}
363			}
364		;
365
366rule		:  re2 re
367			{
368			if ( transchar[lastst[$2]] != SYM_EPSILON )
369				/* Provide final transition \now/ so it
370				 * will be marked as a trailing context
371				 * state.
372				 */
373				$2 = link_machines( $2,
374						mkstate( SYM_EPSILON ) );
375
376			mark_beginning_as_normal( $2 );
377			current_state_type = STATE_NORMAL;
378
379			if ( previous_continued_action )
380				{
381				/* We need to treat this as variable trailing
382				 * context so that the backup does not happen
383				 * in the action but before the action switch
384				 * statement.  If the backup happens in the
385				 * action, then the rules "falling into" this
386				 * one's action will *also* do the backup,
387				 * erroneously.
388				 */
389				if ( ! varlength || headcnt != 0 )
390					warn(
391		"trailing context made variable due to preceding '|' action" );
392
393				/* Mark as variable. */
394				varlength = true;
395				headcnt = 0;
396				}
397
398			if ( lex_compat || (varlength && headcnt == 0) )
399				{ /* variable trailing context rule */
400				/* Mark the first part of the rule as the
401				 * accepting "head" part of a trailing
402				 * context rule.
403				 *
404				 * By the way, we didn't do this at the
405				 * beginning of this production because back
406				 * then current_state_type was set up for a
407				 * trail rule, and add_accept() can create
408				 * a new state ...
409				 */
410				add_accept( $1,
411					num_rules | YY_TRAILING_HEAD_MASK );
412				variable_trail_rule = true;
413				}
414			
415			else
416				trailcnt = rulelen;
417
418			$$ = link_machines( $1, $2 );
419			}
420
421		|  re2 re '$'
422			{ synerr( "trailing context used twice" ); }
423
424		|  re '$'
425			{
426			headcnt = 0;
427			trailcnt = 1;
428			rulelen = 1;
429			varlength = false;
430
431			current_state_type = STATE_TRAILING_CONTEXT;
432
433			if ( trlcontxt )
434				{
435				synerr( "trailing context used twice" );
436				$$ = mkstate( SYM_EPSILON );
437				}
438
439			else if ( previous_continued_action )
440				{
441				/* See the comment in the rule for "re2 re"
442				 * above.
443				 */
444				warn(
445		"trailing context made variable due to preceding '|' action" );
446
447				varlength = true;
448				}
449
450			if ( lex_compat || varlength )
451				{
452				/* Again, see the comment in the rule for
453				 * "re2 re" above.
454				 */
455				add_accept( $1,
456					num_rules | YY_TRAILING_HEAD_MASK );
457				variable_trail_rule = true;
458				}
459
460			trlcontxt = true;
461
462			eps = mkstate( SYM_EPSILON );
463			$$ = link_machines( $1,
464				link_machines( eps, mkstate( '\n' ) ) );
465			}
466
467		|  re
468			{
469			$$ = $1;
470
471			if ( trlcontxt )
472				{
473				if ( lex_compat || (varlength && headcnt == 0) )
474					/* Both head and trail are
475					 * variable-length.
476					 */
477					variable_trail_rule = true;
478				else
479					trailcnt = rulelen;
480				}
481			}
482		;
483
484
485re		:  re '|' series
486			{
487			varlength = true;
488			$$ = mkor( $1, $3 );
489			}
490
491		|  series
492			{ $$ = $1; }
493		;
494
495
496re2		:  re '/'
497			{
498			/* This rule is written separately so the
499			 * reduction will occur before the trailing
500			 * series is parsed.
501			 */
502
503			if ( trlcontxt )
504				synerr( "trailing context used twice" );
505			else
506				trlcontxt = true;
507
508			if ( varlength )
509				/* We hope the trailing context is
510				 * fixed-length.
511				 */
512				varlength = false;
513			else
514				headcnt = rulelen;
515
516			rulelen = 0;
517
518			current_state_type = STATE_TRAILING_CONTEXT;
519			$$ = $1;
520			}
521		;
522
523series		:  series singleton
524			{
525			/* This is where concatenation of adjacent patterns
526			 * gets done.
527			 */
528			$$ = link_machines( $1, $2 );
529			}
530
531		|  singleton
532			{ $$ = $1; }
533		;
534
535singleton	:  singleton '*'
536			{
537			varlength = true;
538
539			$$ = mkclos( $1 );
540			}
541
542		|  singleton '+'
543			{
544			varlength = true;
545			$$ = mkposcl( $1 );
546			}
547
548		|  singleton '?'
549			{
550			varlength = true;
551			$$ = mkopt( $1 );
552			}
553
554		|  singleton '{' NUMBER ',' NUMBER '}'
555			{
556			varlength = true;
557
558			if ( $3 > $5 || $3 < 0 )
559				{
560				synerr( "bad iteration values" );
561				$$ = $1;
562				}
563			else
564				{
565				if ( $3 == 0 )
566					{
567					if ( $5 <= 0 )
568						{
569						synerr(
570						"bad iteration values" );
571						$$ = $1;
572						}
573					else
574						$$ = mkopt(
575							mkrep( $1, 1, $5 ) );
576					}
577				else
578					$$ = mkrep( $1, $3, $5 );
579				}
580			}
581
582		|  singleton '{' NUMBER ',' '}'
583			{
584			varlength = true;
585
586			if ( $3 <= 0 )
587				{
588				synerr( "iteration value must be positive" );
589				$$ = $1;
590				}
591
592			else
593				$$ = mkrep( $1, $3, INFINITY );
594			}
595
596		|  singleton '{' NUMBER '}'
597			{
598			/* The singleton could be something like "(foo)",
599			 * in which case we have no idea what its length
600			 * is, so we punt here.
601			 */
602			varlength = true;
603
604			if ( $3 <= 0 )
605				{
606				synerr( "iteration value must be positive" );
607				$$ = $1;
608				}
609
610			else
611				$$ = link_machines( $1,
612						copysingl( $1, $3 - 1 ) );
613			}
614
615		|  '.'
616			{
617			if ( ! madeany )
618				{
619				/* Create the '.' character class. */
620				anyccl = cclinit();
621				ccladd( anyccl, '\n' );
622				cclnegate( anyccl );
623
624				if ( useecs )
625					mkeccl( ccltbl + cclmap[anyccl],
626						ccllen[anyccl], nextecm,
627						ecgroup, csize, csize );
628
629				madeany = true;
630				}
631
632			++rulelen;
633
634			$$ = mkstate( -anyccl );
635			}
636
637		|  fullccl
638			{
639			if ( ! cclsorted )
640				/* Sort characters for fast searching.  We
641				 * use a shell sort since this list could
642				 * be large.
643				 */
644				cshell( ccltbl + cclmap[$1], ccllen[$1], true );
645
646			if ( useecs )
647				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
648					nextecm, ecgroup, csize, csize );
649
650			++rulelen;
651
652			$$ = mkstate( -$1 );
653			}
654
655		|  PREVCCL
656			{
657			++rulelen;
658
659			$$ = mkstate( -$1 );
660			}
661
662		|  '"' string '"'
663			{ $$ = $2; }
664
665		|  '(' re ')'
666			{ $$ = $2; }
667
668		|  CHAR
669			{
670			++rulelen;
671
672			if ( caseins && $1 >= 'A' && $1 <= 'Z' )
673				$1 = clower( $1 );
674
675			$$ = mkstate( $1 );
676			}
677		;
678
679fullccl		:  '[' ccl ']'
680			{ $$ = $2; }
681
682		|  '[' '^' ccl ']'
683			{
684			cclnegate( $3 );
685			$$ = $3;
686			}
687		;
688
689ccl		:  ccl CHAR '-' CHAR
690			{
691			if ( caseins )
692				{
693				if ( $2 >= 'A' && $2 <= 'Z' )
694					$2 = clower( $2 );
695				if ( $4 >= 'A' && $4 <= 'Z' )
696					$4 = clower( $4 );
697				}
698
699			if ( $2 > $4 )
700				synerr( "negative range in character class" );
701
702			else
703				{
704				for ( i = $2; i <= $4; ++i )
705					ccladd( $1, i );
706
707				/* Keep track if this ccl is staying in
708				 * alphabetical order.
709				 */
710				cclsorted = cclsorted && ($2 > lastchar);
711				lastchar = $4;
712				}
713
714			$$ = $1;
715			}
716
717		|  ccl CHAR
718			{
719			if ( caseins && $2 >= 'A' && $2 <= 'Z' )
720				$2 = clower( $2 );
721
722			ccladd( $1, $2 );
723			cclsorted = cclsorted && ($2 > lastchar);
724			lastchar = $2;
725			$$ = $1;
726			}
727
728		|  ccl ccl_expr
729			{
730			/* Too hard to properly maintain cclsorted. */
731			cclsorted = false;
732			$$ = $1;
733			}
734
735		|
736			{
737			cclsorted = true;
738			lastchar = 0;
739			currccl = $$ = cclinit();
740			}
741		;
742
743ccl_expr:	   CCE_ALNUM	{ CCL_EXPR(isalnum) }
744		|  CCE_ALPHA	{ CCL_EXPR(isalpha) }
745		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK) }
746		|  CCE_CNTRL	{ CCL_EXPR(iscntrl) }
747		|  CCE_DIGIT	{ CCL_EXPR(isdigit) }
748		|  CCE_GRAPH	{ CCL_EXPR(isgraph) }
749		|  CCE_LOWER	{ CCL_EXPR(islower) }
750		|  CCE_PRINT	{ CCL_EXPR(isprint) }
751		|  CCE_PUNCT	{ CCL_EXPR(ispunct) }
752		|  CCE_SPACE	{ CCL_EXPR(isspace) }
753		|  CCE_UPPER	{
754				if ( caseins )
755					CCL_EXPR(islower)
756				else
757					CCL_EXPR(isupper)
758				}
759		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit) }
760		;
761		
762string		:  string CHAR
763			{
764			if ( caseins && $2 >= 'A' && $2 <= 'Z' )
765				$2 = clower( $2 );
766
767			++rulelen;
768
769			$$ = link_machines( $1, mkstate( $2 ) );
770			}
771
772		|
773			{ $$ = mkstate( SYM_EPSILON ); }
774		;
775
776%%
777
778
779/* build_eof_action - build the "<<EOF>>" action for the active start
780 *                    conditions
781 */
782
783void build_eof_action()
784	{
785	int i;
786	char action_text[MAXLINE];
787
788	for ( i = 1; i <= scon_stk_ptr; ++i )
789		{
790		if ( sceof[scon_stk[i]] )
791			format_pinpoint_message(
792				"multiple <<EOF>> rules for start condition %s",
793				scname[scon_stk[i]] );
794
795		else
796			{
797			sceof[scon_stk[i]] = true;
798			sprintf( action_text, "case YY_STATE_EOF(%s):\n",
799				scname[scon_stk[i]] );
800			add_action( action_text );
801			}
802		}
803
804	line_directive_out( (FILE *) 0, 1 );
805
806	/* This isn't a normal rule after all - don't count it as
807	 * such, so we don't have any holes in the rule numbering
808	 * (which make generating "rule can never match" warnings
809	 * more difficult.
810	 */
811	--num_rules;
812	++num_eof_rules;
813	}
814
815
816/* format_synerr - write out formatted syntax error */
817
818void format_synerr( msg, arg )
819char msg[], arg[];
820	{
821	char errmsg[MAXLINE];
822
823	(void) sprintf( errmsg, msg, arg );
824	synerr( errmsg );
825	}
826
827
828/* synerr - report a syntax error */
829
830void synerr( str )
831char str[];
832	{
833	syntaxerror = true;
834	pinpoint_message( str );
835	}
836
837
838/* format_warn - write out formatted warning */
839
840void format_warn( msg, arg )
841char msg[], arg[];
842	{
843	char warn_msg[MAXLINE];
844
845	(void) sprintf( warn_msg, msg, arg );
846	warn( warn_msg );
847	}
848
849
850/* warn - report a warning, unless -w was given */
851
852void warn( str )
853char str[];
854	{
855	line_warning( str, linenum );
856	}
857
858/* format_pinpoint_message - write out a message formatted with one string,
859 *			     pinpointing its location
860 */
861
862void format_pinpoint_message( msg, arg )
863char msg[], arg[];
864	{
865	char errmsg[MAXLINE];
866
867	(void) sprintf( errmsg, msg, arg );
868	pinpoint_message( errmsg );
869	}
870
871
872/* pinpoint_message - write out a message, pinpointing its location */
873
874void pinpoint_message( str )
875char str[];
876	{
877	line_pinpoint( str, linenum );
878	}
879
880
881/* line_warning - report a warning at a given line, unless -w was given */
882
883void line_warning( str, line )
884char str[];
885int line;
886	{
887	char warning[MAXLINE];
888
889	if ( ! nowarn )
890		{
891		sprintf( warning, "warning, %s", str );
892		line_pinpoint( warning, line );
893		}
894	}
895
896
897/* line_pinpoint - write out a message, pinpointing it at the given line */
898
899void line_pinpoint( str, line )
900char str[];
901int line;
902	{
903	fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
904	}
905
906
907/* yyerror - eat up an error message from the parser;
908 *	     currently, messages are ignore
909 */
910
911void yyerror( msg )
912char msg[];
913	{
914	}