reader.c | searchcode

/src/mosmlyac/reader.c

https://github.com/bluegnu/mosml
C | 1850 lines | 1628 code | 201 blank | 21 comment | 483 complexity | 0f4f18bbe03edfd93ce978b80f99392e MD5 | raw file
Possible License(s): GPL-2.0

#ifdef ANSI
#include <string.h>
#endif

#include "defs.h"

/*  The line size must be a positive integer.  One hundred was chosen	*/
/*  because few lines in Yacc input grammars exceed 100 characters.	*/
/*  Note that if a line exceeds LINESIZE characters, the line buffer	*/
/*  will be expanded to accomodate it.					*/

#define LINESIZE 100

char *cache;
int cinc, cache_size;

int ntags, tagmax;
char **tag_table;

char saw_eof, unionized;
char *cptr, *line;
int linesize;

bucket *goal;
int prec;
int gensym;
char last_was_action;

int maxitems;
bucket **pitem;

int maxrules;
bucket **plhs;

int name_pool_size;
char *name_pool;

bucket** sortedtokens;
int ntruetokens;

char line_format[] = "(* Line %d, file %s *)\n";


void start_rule(register bucket *bp, int s_lineno);


void cachec(int c)
{
    assert(cinc >= 0);
    if (cinc >= cache_size)
    {
	cache_size += 256;
	cache = REALLOC(cache, cache_size);
	if (cache == 0) no_space();
    }
    cache[cinc] = c;
    ++cinc;
}


void get_line(void)
{
    register FILE *f = input_file;
    register int c;
    register int i;

    if (saw_eof || (c = getc(f)) == EOF)
    {
	if (line) { FREE(line); line = 0; }
	cptr = 0;
	saw_eof = 1;
	return;
    }

    if (line == 0 || linesize != (LINESIZE + 1))
    {
	if (line) FREE(line);
	linesize = LINESIZE + 1;
	line = MALLOC(linesize);
	if (line == 0) no_space();
    }

    i = 0;
    ++lineno;
    for (;;)
    {
	line[i]  =  c;
	if (c == '\n') { cptr = line; return; }
	if (++i >= linesize)
	{
	    linesize += LINESIZE;
	    line = REALLOC(line, linesize);
	    if (line ==  0) no_space();
	}
	c = getc(f);
	if (c ==  EOF)
	{
	    line[i] = '\n';
	    saw_eof = 1;
	    cptr = line;
	    return;
	}
    }
}


char *dup_line(void)
{
    register char *p, *s, *t;

    if (line == 0) return (0);
    s = line;
    while (*s != '\n') ++s;
    p = MALLOC(s - line + 1);
    if (p == 0) no_space();

    s = line;
    t = p;
    while ((*t++ = *s++) != '\n') continue;
    return (p);
}


void skip_comment(void)
{
    register char *s;

    int st_lineno = lineno;
    char *st_line = dup_line();
    char *st_cptr = st_line + (cptr - line);

    s = cptr + 2;
    for (;;)
    {
	if (*s == '*' && s[1] == '/')
	{
	    cptr = s + 2;
	    FREE(st_line);
	    return;
	}
	if (*s == '\n')
	{
	    get_line();
	    if (line == 0)
		unterminated_comment(st_lineno, st_line, st_cptr);
	    s = cptr;
	}
	else
	    ++s;
    }
}


int nextc(void)
{
    register char *s;

    if (line == 0)
    {
	get_line();
	if (line == 0)
	    return (EOF);
    }

    s = cptr;
    for (;;)
    {
	switch (*s)
	{
	case '\n':
	    get_line();
	    if (line == 0) return (EOF);
	    s = cptr;
	    break;

	case ' ':
	case '\t':
	case '\f':
	case '\r':
	case '\v':
	case ',':
	case ';':
	    ++s;
	    break;

	case '\\':
	    cptr = s;
	    return ('%');

	case '/':
	    if (s[1] == '*')
	    {
		cptr = s;
		skip_comment();
		s = cptr;
		break;
	    }
	    else if (s[1] == '/')
	    {
		get_line();
		if (line == 0) return (EOF);
		s = cptr;
		break;
	    }
	    /* fall through */

	default:
	    cptr = s;
	    return (*s);
	}
    }
}


int keyword(void)
{
    register int c;
    char *t_cptr = cptr;

    c = *++cptr;
    if (isalpha(c))
    {
	cinc = 0;
	for (;;)
	{
	    if (isalpha(c))
	    {
		if (isupper(c)) c = tolower(c);
		cachec(c);
	    }
	    else if (isdigit(c) || c == '_' || c == '.' || c == '$')
		cachec(c);
	    else
		break;
	    c = *++cptr;
	}
	cachec(NUL);

	if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
	    return (TOKEN);
	if (strcmp(cache, "type") == 0)
	    return (TYPE);
	if (strcmp(cache, "left") == 0)
	    return (LEFT);
	if (strcmp(cache, "right") == 0)
	    return (RIGHT);
	if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
	    return (NONASSOC);
	if (strcmp(cache, "start") == 0)
	    return (START);
	if (strcmp(cache, "union") == 0)
	    return (UNION);
	if (strcmp(cache, "ident") == 0)
	    return (IDENT);
    }
    else
    {
	++cptr;
	if (c == '{')
	    return (TEXT);
	if (c == '%' || c == '\\')
	    return (MARK);
	if (c == '<')
	    return (LEFT);
	if (c == '>')
	    return (RIGHT);
	if (c == '0')
	    return (TOKEN);
	if (c == '2')
	    return (NONASSOC);
    }
    syntax_error(lineno, line, t_cptr);
    /*NOTREACHED*/
}


void copy_ident(void)
{
    register int c;
    register FILE *f = output_file;

    c = nextc();
    if (c == EOF) unexpected_EOF();
    if (c != '"') syntax_error(lineno, line, cptr);
    ++outline;
    fprintf(f, "#ident \"");
    for (;;)
    {
	c = *++cptr;
	if (c == '\n')
	{
	    fprintf(f, "\"\n");
	    return;
	}
	putc(c, f);
	if (c == '"')
	{
	    putc('\n', f);
	    ++cptr;
	    return;
	}
    }
}


void copy_text(void)
{
    register int c;
    int quote;
    register FILE *f = text_file;
    int need_newline = 0;
    int t_lineno = lineno;
    char *t_line = dup_line();
    char *t_cptr = t_line + (cptr - line - 2);

    if (*cptr == '\n')
    {
	get_line();
	if (line == 0)
	    unterminated_text(t_lineno, t_line, t_cptr);
    }

loop:
    c = *cptr++;
    switch (c)
    {
    case '\n':
    /* next_line: */
	putc('\n', f);
	need_newline = 0;
	get_line();
	if (line) goto loop;
	unterminated_text(t_lineno, t_line, t_cptr);

    case '`':
    case '"':
	{
	    int s_lineno = lineno;
	    char *s_line = dup_line();
	    char *s_cptr = s_line + (cptr - line - 1);

	    quote = c;
	    putc(c, f);
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == quote)
		{
		    need_newline = 1;
		    FREE(s_line);
		    goto loop;
		}
		if (c == '\n')
		    unterminated_string(s_lineno, s_line, s_cptr);
		if (c == '\\')
		{
		    c = *cptr++;
		    putc(c, f);
		    if (c == '\n')
		    {
			get_line();
			if (line == 0)
			    unterminated_string(s_lineno, s_line, s_cptr);
		    }
		}
	    }
	}

    case '(':
	putc(c, f);
	need_newline = 1;
	c = *cptr;
	if (c == '*')
	{
	    int c_lineno = lineno;
	    char *c_line = dup_line();
	    char *c_cptr = c_line + (cptr - line - 1);

	    putc('*', f);
	    ++cptr;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == '*' && *cptr == ')')
		{
		    putc(')', f);
		    ++cptr;
		    FREE(c_line);
		    goto loop;
		}
		if (c == '\n')
		{
		    get_line();
		    if (line == 0)
			unterminated_comment(c_lineno, c_line, c_cptr);
		}
	    }
	}
	need_newline = 1;
	goto loop;

    case '%':
    case '\\':
	if (*cptr == '}')
	{
	    if (need_newline) putc('\n', f);
	    ++cptr;
	    FREE(t_line);
	    return;
	}
	/* fall through */

    default:
	putc(c, f);
	need_newline = 1;
	goto loop;
    }
}


void copy_union(void)
{
    register int c;
    int quote;
    int depth;
    int u_lineno = lineno;
    char *u_line = dup_line();
    char *u_cptr = u_line + (cptr - line - 6);

    if (unionized) over_unionized(cptr - 6);
    unionized = 1;

    if (!lflag)
	fprintf(text_file, line_format, lineno, input_file_name);

    fprintf(text_file, "typedef union");
    if (dflag) fprintf(union_file, "typedef union");

    depth = 1;
    cptr++;

loop:
    c = *cptr++;
    putc(c, text_file);
    if (dflag) putc(c, union_file);
    switch (c)
    {
    case '\n':
    /* next_line: */
	get_line();
	if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
	goto loop;

    case '{':
	++depth;
	goto loop;

    case '}':
        --depth;
        if (c == '}' && depth == 0) {
          fprintf(text_file, " YYSTYPE;\n");
          FREE(u_line);
          return;
        }
        goto loop;

    case '\'':
    case '"':
	{
	    int s_lineno = lineno;
	    char *s_line = dup_line();
	    char *s_cptr = s_line + (cptr - line - 1);

	    quote = c;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, text_file);
		if (dflag) putc(c, union_file);
		if (c == quote)
		{
		    FREE(s_line);
		    goto loop;
		}
		if (c == '\n')
		    unterminated_string(s_lineno, s_line, s_cptr);
		if (c == '\\')
		{
		    c = *cptr++;
		    putc(c, text_file);
		    if (dflag) putc(c, union_file);
		    if (c == '\n')
		    {
			get_line();
			if (line == 0)
			    unterminated_string(s_lineno, s_line, s_cptr);
		    }
		}
	    }
	}

    case '(':
	c = *cptr;
	if (c == '*')
	{
	    int c_lineno = lineno;
	    char *c_line = dup_line();
	    char *c_cptr = c_line + (cptr - line - 1);

	    putc('*', text_file);
	    if (dflag) putc('*', union_file);
	    ++cptr;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, text_file);
		if (dflag) putc(c, union_file);
		if (c == '*' && *cptr == ')')
		{
		    putc(')', text_file);
		    if (dflag) putc(')', union_file);
		    ++cptr;
		    FREE(c_line);
		    goto loop;
		}
		if (c == '\n')
		{
		    get_line();
		    if (line == 0)
			unterminated_comment(c_lineno, c_line, c_cptr);
		}
	    }
	}
	goto loop;

    default:
	goto loop;
    }
}


int hexval(int c)
{
    if (c >= '0' && c <= '9')
	return (c - '0');
    if (c >= 'A' && c <= 'F')
	return (c - 'A' + 10);
    if (c >= 'a' && c <= 'f')
	return (c - 'a' + 10);
    return (-1);
}


bucket *get_literal(void)
{
    register int c, quote;
    register int i;
    register int n;
    register char *s;
    register bucket *bp;
    int s_lineno = lineno;
    char *s_line = dup_line();
    char *s_cptr = s_line + (cptr - line);

    quote = *cptr++;
    cinc = 0;
    for (;;)
    {
	c = *cptr++;
	if (c == quote) break;
	if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
	if (c == '\\')
	{
	    char *c_cptr = cptr - 1;

	    c = *cptr++;
	    switch (c)
	    {
	    case '\n':
		get_line();
		if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
		continue;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
		n = c - '0';
		c = *cptr;
		if (IS_OCTAL(c))
		{
		    n = (n << 3) + (c - '0');
		    c = *++cptr;
		    if (IS_OCTAL(c))
		    {
			n = (n << 3) + (c - '0');
			++cptr;
		    }
		}
		if (n > MAXCHAR) illegal_character(c_cptr);
		c = n;
	    	break;

	    case 'x':
		c = *cptr++;
		n = hexval(c);
		if (n < 0 || n >= 16)
		    illegal_character(c_cptr);
		for (;;)
		{
		    c = *cptr;
		    i = hexval(c);
		    if (i < 0 || i >= 16) break;
		    ++cptr;
		    n = (n << 4) + i;
		    if (n > MAXCHAR) illegal_character(c_cptr);
		}
		c = n;
		break;

	    case 'a': c = 7; break;
	    case 'b': c = '\b'; break;
	    case 'f': c = '\f'; break;
	    case 'n': c = '\n'; break;
	    case 'r': c = '\r'; break;
	    case 't': c = '\t'; break;
	    case 'v': c = '\v'; break;
	    }
	}
	cachec(c);
    }
    FREE(s_line);

    n = cinc;
    s = MALLOC(n);
    if (s == 0) no_space();

    for (i = 0; i < n; ++i)
	s[i] = cache[i];

    cinc = 0;
    if (n == 1)
	cachec('\'');
    else
	cachec('"');

    for (i = 0; i < n; ++i)
    {
	c = ((unsigned char *)s)[i];
	if (c == '\\' || c == cache[0])
	{
	    cachec('\\');
	    cachec(c);
	}
	else if (isprint(c))
	    cachec(c);
	else
	{
	    cachec('\\');
	    switch (c)
	    {
	    case 7: cachec('a'); break;
	    case '\b': cachec('b'); break;
	    case '\f': cachec('f'); break;
	    case '\n': cachec('n'); break;
	    case '\r': cachec('r'); break;
	    case '\t': cachec('t'); break;
	    case '\v': cachec('v'); break;
	    default:
		cachec(((c >> 6) & 7) + '0');
		cachec(((c >> 3) & 7) + '0');
		cachec((c & 7) + '0');
		break;
	    }
	}
    }

    if (n == 1)
	cachec('\'');
    else
	cachec('"');

    cachec(NUL);
    bp = lookup(cache);
    bp->class = TERM;
    if (n == 1 && bp->value == UNDEFINED)
	bp->value = *(unsigned char *)s;
    FREE(s);

    return (bp);
}


int is_reserved(char *name)
{
    char *s;

    if (strcmp(name, ".") == 0 ||
	    strcmp(name, "$accept") == 0 ||
	    strcmp(name, "$end") == 0)
	return (1);

    if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
    {
	s = name + 3;
	while (isdigit(*s)) ++s;
	if (*s == NUL) return (1);
    }

    return (0);
}


bucket *get_name(void)
{
    register int c;

    cinc = 0;
    for (c = *cptr; IS_IDENT(c); c = *++cptr)
	cachec(c);
    cachec(NUL);

    if (is_reserved(cache)) used_reserved(cache);

    return (lookup(cache));
}


int get_number(void)
{
    register int c;
    register int n;

    n = 0;
    for (c = *cptr; isdigit(c); c = *++cptr)
	n = 10*n + (c - '0');

    return (n);
}


char *get_tag(void)
{
    register int c;
    register int i;
    register char *s;
    int t_lineno = lineno;
    char *t_line = dup_line();
    char *t_cptr = t_line + (cptr - line);

    cinc = 0; 
    {int last = ' ';  /* The character preceding the current one (c) */
     while (1) {
        c = *++cptr;
        if (c == '\n') 
	  {last = c; c = nextc(); }
        if (c == EOF) unexpected_EOF();
        /* Do not take `->' to signify the end of a type specification: */
        if ((c == '>') && (last != '-')) break;
        cachec(c); last = c;
     } }
    ++cptr;
    cachec(NUL);

    for (i = 0; i < ntags; ++i)
    {
	if (strcmp(cache, tag_table[i]) == 0)
	    return (tag_table[i]);
    }

    if (ntags >= tagmax)
    {
	tagmax += 16;
	tag_table = (char **)
			(tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
				   : MALLOC(tagmax*sizeof(char *)));
	if (tag_table == 0) no_space();
    }

    s = MALLOC(cinc);
    if  (s == 0) no_space();
    strcpy(s, cache);
    tag_table[ntags] = s;
    ++ntags;
    FREE(t_line);
    return (s);
}


void declare_tokens(int assoc)
{
    register int c;
    register bucket *bp;
    int value;
    char *tag = 0;

    if (assoc != TOKEN) ++prec;

    c = nextc();
    if (c == EOF) unexpected_EOF();
    if (c == '<')
    {
	tag = get_tag();
	c = nextc();
	if (c == EOF) unexpected_EOF();
    }

    for (;;)
    {
	if (isalpha(c) || c == '_' || c == '.' || c == '$')
	    bp = get_name();
	else if (c == '\'' || c == '"')
	    bp = get_literal();
	else
	    return;

	if (bp == goal) tokenized_start(bp->name);
	bp->class = TERM;

	if (tag)
	{
	    if (bp->tag && tag != bp->tag)
		retyped_warning(bp->name);
	    bp->tag = tag;
	}

	if (assoc == TOKEN)
        {
            bp->true_token = 1;
        }
        else
	{
	    if (bp->prec && prec != bp->prec)
		reprec_warning(bp->name);
	    bp->assoc = assoc;
	    bp->prec = prec;
	}


	c = nextc();
	if (c == EOF) unexpected_EOF();
	value = UNDEFINED;
	/* It makes no sense in mosmlyac to explicit specify the tag 
	   number of a token or symbol (sestoft 2000-04-28):

	if (isdigit(c))
	{
	    value = get_number();
	    if (bp->value != UNDEFINED && value != bp->value)
		revalued_warning(bp->name);
	    bp->value = value;
	    c = nextc();
	    if (c == EOF) unexpected_EOF();
	}
	*/
    }
}


void declare_types(void)
{
    register int c;
    register bucket *bp;
    char *tag;

    c = nextc();
    if (c == EOF) unexpected_EOF();
    if (c != '<') syntax_error(lineno, line, cptr);
    tag = get_tag();

    for (;;)
    {
	c = nextc();
	if (isalpha(c) || c == '_' || c == '.' || c == '$')
	    bp = get_name();
	else if (c == '\'' || c == '"')
	    bp = get_literal();
	else
	    return;

	if (bp->tag && tag != bp->tag)
	    retyped_warning(bp->name);
	bp->tag = tag;
    }
}


void declare_start(void)
{
    register int c;
    register bucket *bp;
    static int entry_counter = 0;

    c = nextc();
    if (c == EOF) unexpected_EOF();
    if (!isalpha(c) && c != '_' && c != '.' && c != '$')
	syntax_error(lineno, line, cptr);
    bp = get_name();

    if (bp->class == TERM)
	terminal_start(bp->name);
    bp->entry = ++entry_counter;
    if (entry_counter == 256)
      too_many_entries();
}


void read_declarations(void)
{
    register int c, k;

    cache_size = 256;
    cache = MALLOC(cache_size);
    if (cache == 0) no_space();

    for (;;)
    {
	c = nextc();
	if (c == EOF) unexpected_EOF();
	if (c != '%') syntax_error(lineno, line, cptr);
	switch (k = keyword())
	{
	case MARK:
	    return;

	case IDENT:
	    copy_ident();
	    break;

	case TEXT:
	    copy_text();
	    break;

	case UNION:
	    copy_union();
	    break;

	case TOKEN:
	case LEFT:
	case RIGHT:
	case NONASSOC:
	    declare_tokens(k);
	    break;

	case TYPE:
	    declare_types();
	    break;

	case START:
	    declare_start();
	    break;
	}
    }
}

/* Order tokens lexicographically by modifying next-pointers, first_symbol */

int tokcompare(const void* bpp1, const void* bpp2) 
{ return strcmp((*((bucket**)bpp1))->name, (*((bucket**)bpp2))->name); }


void sort_tokens() {
  int i;
  bucket *bp;

  /* Count tokens */
  ntruetokens = 0;
  for (bp = first_symbol; bp; bp = bp->next) 
    if (bp->class == TERM && bp->true_token) 
      ++ntruetokens;
  
  sortedtokens = (bucket **) MALLOC(ntruetokens*sizeof(bucket *));
  if (sortedtokens == 0) no_space();

  i = 0;
  for (bp = first_symbol; bp; bp = bp->next) 
    if (bp->class == TERM && bp->true_token)
      sortedtokens[i++] = bp;
  assert(i == ntruetokens);

  /* Sort sortedtokens[0..ntruetokens-1] in alphabetical order: */
  
  qsort(sortedtokens, ntruetokens, sizeof(bucket*), tokcompare);
  
  /* Assign explicit token values based on the alphabetical ordering: */
  /* (... and pray that later code does not screw up these values)    */

  for (i=0; i<ntruetokens; i++) 
    sortedtokens[i]->value = i+257;
}

void output_token_type(FILE *output_file)
{
  bucket * bp;
  int n, tyno;
  int i;
  bucket **sorted;

  fprintf(output_file, "local\n");

  /* Make type abbreviations for types that may be products or records: */
  tyno = 1;
  for (i = 0; i<ntruetokens; i++) {
    bp = sortedtokens[i];
    if (bp->tag && (strchr(bp->tag, '*') || strchr(bp->tag, '{')))
    {
      fprintf(output_file, "type t__%d__ = %s\n", tyno, bp->tag);
      tyno++;
    }
  }
  fprintf(output_file, "in\n");

  /* Output the token datatype, using the type abbreviations: */  
  fprintf(output_file, "datatype token =\n");
  n = 0; tyno = 1;
  for (i = 0; i<ntruetokens; i++) {
    bp = sortedtokens[i];
    if (bp->class == TERM && bp->true_token) {
      fprintf(output_file, "  %c %s", n == 0 ? ' ' : '|', bp->name);
      if (bp->tag) {
	/* If it may be a product or record, 
           there is an abbreviation t__nn__ for it: */
         if (strchr(bp->tag, '*') || strchr(bp->tag, '{')) {
	   fprintf(output_file, " of t__%d__", tyno);
  	   tyno++; 
         } else {
	   fprintf(output_file, " of %s", bp->tag); }
      }
      fprintf(output_file, "\n");
      n++;
    }
  }
  fprintf(output_file, "end;\n\n");
}

void initialize_grammar(void)
{
    nitems = 4;
    maxitems = 300;
    pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
    if (pitem == 0) no_space();
    pitem[0] = 0;
    pitem[1] = 0;
    pitem[2] = 0;
    pitem[3] = 0;

    nrules = 3;
    maxrules = 100;
    plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
    if (plhs == 0) no_space();
    plhs[0] = 0;
    plhs[1] = 0;
    plhs[2] = 0;
    rprec = (short *) MALLOC(maxrules*sizeof(short));
    if (rprec == 0) no_space();
    rprec[0] = 0;
    rprec[1] = 0;
    rprec[2] = 0;
    rassoc = (char *) MALLOC(maxrules*sizeof(char));
    if (rassoc == 0) no_space();
    rassoc[0] = TOKEN;
    rassoc[1] = TOKEN;
    rassoc[2] = TOKEN;
}


void expand_items(void)
{
    maxitems += 300;
    pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
    if (pitem == 0) no_space();
}


void expand_rules(void)
{
    maxrules += 100;
    plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
    if (plhs == 0) no_space();
    rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
    if (rprec == 0) no_space();
    rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
    if (rassoc == 0) no_space();
}


void advance_to_start(void)
{
    register int c;
    register bucket *bp;
    char *s_cptr;
    int s_lineno;

    for (;;)
    {
	c = nextc();
	if (c != '%') break;
	s_cptr = cptr;
	switch (keyword())
	{
	case MARK:
	    no_grammar();

	case TEXT:
	    copy_text();
	    break;

	case START:
	    declare_start();
	    break;

	default:
	    syntax_error(lineno, line, s_cptr);
	}
    }

    c = nextc();
    if (!isalpha(c) && c != '_' && c != '.' && c != '_')
	syntax_error(lineno, line, cptr);
    bp = get_name();
    if (goal == 0)
    {
	if (bp->class == TERM)
	    terminal_start(bp->name);
	goal = bp;
    }

    s_lineno = lineno;
    c = nextc();
    if (c == EOF) unexpected_EOF();
    if (c != ':') syntax_error(lineno, line, cptr);
    start_rule(bp, s_lineno);
    ++cptr;
}


void start_rule(register bucket *bp, int s_lineno)
{
    if (bp->class == TERM)
	terminal_lhs(s_lineno);
    bp->class = NONTERM;
    if (nrules >= maxrules)
	expand_rules();
    plhs[nrules] = bp;
    rprec[nrules] = UNDEFINED;
    rassoc[nrules] = TOKEN;
}


void end_rule(void)
{
    register int i;

    if (!last_was_action && plhs[nrules]->tag)
    {
	for (i = nitems - 1; pitem[i]; --i) continue;
	if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
	    default_action_warning();
    }

    last_was_action = 0;
    if (nitems >= maxitems) expand_items();
    pitem[nitems] = 0;
    ++nitems;
    ++nrules;
}


void insert_empty_rule(void)
{
    register bucket *bp, **bpp;

    assert(cache);
    sprintf(cache, "$$%d", ++gensym);
    bp = make_bucket(cache);
    last_symbol->next = bp;
    last_symbol = bp;
    bp->tag = plhs[nrules]->tag;
    bp->class = NONTERM;

    if ((nitems += 2) > maxitems)
	expand_items();
    bpp = pitem + nitems - 1;
    *bpp-- = bp;
    while (bpp[0] = bpp[-1]) --bpp;

    if (++nrules >= maxrules)
	expand_rules();
    plhs[nrules] = plhs[nrules-1];
    plhs[nrules-1] = bp;
    rprec[nrules] = rprec[nrules-1];
    rprec[nrules-1] = 0;
    rassoc[nrules] = rassoc[nrules-1];
    rassoc[nrules-1] = TOKEN;
}


void add_symbol(void)
{
    register int c;
    register bucket *bp;
    int s_lineno = lineno;

    c = *cptr;
    if (c == '\'' || c == '"')
	bp = get_literal();
    else
	bp = get_name();

    c = nextc();
    if (c == ':')
    {
	end_rule();
	start_rule(bp, s_lineno);
	++cptr;
	return;
    }

    if (last_was_action)
	insert_empty_rule();
    last_was_action = 0;

    if (++nitems > maxitems)
	expand_items();
    pitem[nitems-1] = bp;
}


void copy_action(void)
{
    register int c;
    register int i, n;
    int depth;
    int quote;
    bucket *item;
    char *tagres;
    register FILE *f = action_file;
    int a_lineno = lineno;
    char *a_line = dup_line();
    char *a_cptr = a_line + (cptr - line);

    if (last_was_action)
	insert_empty_rule();
    last_was_action = 1;

    fprintf(f, "(* Rule %d, file %s, line %d *)\n",
            nrules-2, input_file_name, lineno);

    n = 0;
    for (i = nitems - 1; pitem[i]; --i) ++n;

    /* Let-bind all $'s early in the semantic action, outside any function 
       closure:

       let val d__1__ = peekVal (n-1)
           val d__2__ = peekVal (n-2)
	   ...
           val d__n__ = peekVal 0
       in ... semantic action ... 
       end
    */
    fprintf(f, "val _ = update_ yyact %d\n(fn () => repr(let\n", nrules-2);
    for (i = 1; i <= n; i++)
       { item = pitem[nitems + i - n - 1];
         if (item->tag) 
            fprintf(f, "val d__%d__ = peekVal %d : %s\n", i, n - i, item->tag);
       }		
    fprintf(f, "in\n(");

    depth = 1;
    cptr++;

loop:
    c = *cptr;
    if (c == '$')
    {
	if (isdigit(cptr[1]))
	{
	    ++cptr;
	    i = get_number();

            if (i <= 0 || i > n)
              unknown_rhs(i);
            item = pitem[nitems + i - n - 1];
            if (item->tag) {
              fprintf(f, "(d__%d__)", i);
            } else {
              if (item->class == TERM)
                { illegal_token_ref(i, item->name); }
	      else
		{ missing_type(item->name); }
            /* This trick, which works in Caml Light and Edinburgh ML,  
             * cannot be used in Standard ML to improve type security:
             * if (sflag)
             *    fprintf(f, "(peekVal %d)", n - i);
             * else
             *    fprintf(f, "(peekVal %d : '%s)", n - i, item->name); 
             */
            }
	    goto loop;
	}
    }
    if (isalpha(c) || c == '_' || c == '$')
    {
	do
	{
	    putc(c, f);
	    c = *++cptr;
	} while (isalnum(c) || c == '_' || c == '$');
	goto loop;
    }
    if (c == '}' && depth == 1) {
      cptr++;
      tagres = plhs[nrules]->tag;
      if (tagres)
        { fprintf(f, ") end : %s))\n", tagres); }
      else         
	{ missing_type(plhs[nrules]->name); }
      /* Same problem as above: don't insert type ascriptions:
       * if (sflag)
       *  fprintf(f, ")))\n");
       * else
       *  fprintf(f, ") : '%s))\n", plhs[nrules]->name);
       */
      /* if (sflag) */
        fprintf(f, ";\n");
      return;
    }
    putc(c, f);
    ++cptr;
    switch (c)
    {
    case '\n':
    /* next_line: */
	get_line();
	if (line) goto loop;
	unterminated_action(a_lineno, a_line, a_cptr);

    case '{':
	++depth;
	goto loop;

    case '}':
	--depth;
        goto loop;

    case '`':
    case '"':
	{
	    int s_lineno = lineno;
	    char *s_line = dup_line();
	    char *s_cptr = s_line + (cptr - line - 1);

	    quote = c;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == quote)
		{
		    FREE(s_line);
		    goto loop;
		}
		if (c == '\n')
		    unterminated_string(s_lineno, s_line, s_cptr);
		if (c == '\\')
		{
		    c = *cptr++;
		    putc(c, f);
		    if (c == '\n')
		    {
			get_line();
			if (line == 0)
			    unterminated_string(s_lineno, s_line, s_cptr);
		    }
		}
	    }
	}

    case '(':
	c = *cptr;
	if (c == '*')
	{
	    int c_lineno = lineno;
	    char *c_line = dup_line();
	    char *c_cptr = c_line + (cptr - line - 1);

	    putc('*', f);
	    ++cptr;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == '*' && *cptr == ')')
		{
		    putc(')', f);
		    ++cptr;
		    FREE(c_line);
		    goto loop;
		}
		if (c == '\n')
		{
		    get_line();
		    if (line == 0)
			unterminated_comment(c_lineno, c_line, c_cptr);
		}
	    }
	}
	goto loop;

    default:
	goto loop;
    }
}


int mark_symbol(void)
{
    register int c;
    register bucket *bp;

    c = cptr[1];
    if (c == '%' || c == '\\')
    {
	cptr += 2;
	return (1);
    }

    if (c == '=')
	cptr += 2;
    else if ((c == 'p' || c == 'P') &&
	     ((c = cptr[2]) == 'r' || c == 'R') &&
	     ((c = cptr[3]) == 'e' || c == 'E') &&
	     ((c = cptr[4]) == 'c' || c == 'C') &&
	     ((c = cptr[5], !IS_IDENT(c))))
	cptr += 5;
    else
	syntax_error(lineno, line, cptr);

    c = nextc();
    if (isalpha(c) || c == '_' || c == '.' || c == '$')
	bp = get_name();
    else if (c == '\'' || c == '"')
	bp = get_literal();
    else
    {
	syntax_error(lineno, line, cptr);
	/*NOTREACHED*/
    }

    if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
	prec_redeclared();

    rprec[nrules] = bp->prec;
    rassoc[nrules] = bp->assoc;
    return (0);
}


void read_grammar(void)
{
    register int c;

    initialize_grammar();
    advance_to_start();

    for (;;)
    {
	c = nextc();
	if (c == EOF) break;
	if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
		c == '"')
	    add_symbol();
	else if (c == '{' || c == '=')
	    copy_action();
	else if (c == '|')
	{
	    end_rule();
	    start_rule(plhs[nrules-1], 0);
	    ++cptr;
	}
	else if (c == '%')
	{
	    if (mark_symbol()) break;
	}
	else
	    syntax_error(lineno, line, cptr);
    }
    end_rule();
}


void free_tags(void)
{
    register int i;

    if (tag_table == 0) return;

    for (i = 0; i < ntags; ++i)
    {
	assert(tag_table[i]);
	FREE(tag_table[i]);
    }
    FREE(tag_table);
}


void pack_names(void)
{
    register bucket *bp;
    register char *p, *s, *t;

    name_pool_size = 13;  /* 13 == sizeof("$end") + sizeof("$accept") */
    for (bp = first_symbol; bp; bp = bp->next)
	name_pool_size += strlen(bp->name) + 1;
    name_pool = MALLOC(name_pool_size);
    if (name_pool == 0) no_space();

    strcpy(name_pool, "$accept");
    strcpy(name_pool+8, "$end");
    t = name_pool + 13;
    for (bp = first_symbol; bp; bp = bp->next)
    {
	p = t;
	s = bp->name;
	while (*t++ = *s++) continue;
	FREE(bp->name);
	bp->name = p;
    }
}


void check_symbols(void)
{
    register bucket *bp;

    if (goal->class == UNKNOWN)
	undefined_goal(goal->name);

    for (bp = first_symbol; bp; bp = bp->next)
    {
	if (bp->class == UNKNOWN)
	{
	    undefined_symbol_warning(bp->name);
	    bp->class = TERM;
	}
    }
}


void pack_symbols(void)
{
    register bucket *bp;
    register bucket **v;
    register int i, j, k, n, itrue;

    nsyms = 2;
    ntokens = 1;
    for (bp = first_symbol; bp; bp = bp->next)
    {
	++nsyms;
	if (bp->class == TERM) ++ntokens;
    }
    start_symbol = ntokens;
    nvars = nsyms - ntokens;

    symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
    if (symbol_name == 0) no_space();
    symbol_value = (short *) MALLOC(nsyms*sizeof(short));
    if (symbol_value == 0) no_space();
    symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
    if (symbol_prec == 0) no_space();
    symbol_assoc = MALLOC(nsyms);
    if (symbol_assoc == 0) no_space();
    symbol_tag = (char **) MALLOC(nsyms*sizeof(char *));
    if (symbol_tag == 0) no_space();
    symbol_true_token = (char *) MALLOC(nsyms*sizeof(char));
    if (symbol_true_token == 0) no_space();

    v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
    if (v == 0) no_space();

    v[0] = 0;
    v[start_symbol] = 0;

    i = 1;
    itrue = 0;
    j = start_symbol + 1;
    for (bp = first_symbol; bp; bp = bp->next)
    {
      if (bp->class == TERM) {
	if (bp->true_token) 
	  v[i] = sortedtokens[itrue++];
	else
	  v[i] = bp;
	i++;
      } else
	v[j++] = bp;
    }
    assert(i == ntokens && j == nsyms);

    for (i = 1; i < ntokens; ++i)
	v[i]->index = i;

    goal->index = start_symbol + 1;
    k = start_symbol + 2;
    while (++i < nsyms)
	if (v[i] != goal)
	{
	    v[i]->index = k;
	    ++k;
	}

    goal->value = 0;
    k = 1;
    for (i = start_symbol + 1; i < nsyms; ++i)
    {
	if (v[i] != goal)
	{
	    v[i]->value = k;
	    ++k;
	}
    }

    k = 0;
    /* This (assigns and) sorts the symbol_value fields for tokens 
       by insertion sort */
    /* It appears to be pretty useless (or dangerous) in the context 
       of mosmlyac */
    for (i = 1; i < ntokens; ++i)
    {
	n = v[i]->value;
	if (n > 256)
	{
	    for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
		symbol_value[j] = symbol_value[j-1];
	    symbol_value[j] = n;
	}
    }

    if (v[1]->value == UNDEFINED)
	v[1]->value = 256;

    j = 0;
    n = 257;
    for (i = 2; i < ntokens; ++i)
    {
	if (v[i]->value == UNDEFINED)
	{
	    while (j < k && n == symbol_value[j])
	    {
		while (++j < k && n == symbol_value[j]) continue;
		++n;
	    }
	    v[i]->value = n;
	    ++n;
	}
    }

    symbol_name[0] = name_pool + 8;
    symbol_value[0] = 0;
    symbol_prec[0] = 0;
    symbol_assoc[0] = TOKEN;
    symbol_tag[0] = "";
    symbol_true_token[0] = 0;
    for (i = 1; i < ntokens; ++i)
    {
	symbol_name[i] = v[i]->name;
	symbol_value[i] = v[i]->value;
	symbol_prec[i] = v[i]->prec;
	symbol_assoc[i] = v[i]->assoc;
	symbol_tag[i] = v[i]->tag;
	symbol_true_token[i] = v[i]->true_token;
    }
    symbol_name[start_symbol] = name_pool;
    symbol_value[start_symbol] = -1;
    symbol_prec[start_symbol] = 0;
    symbol_assoc[start_symbol] = TOKEN;
    symbol_tag[start_symbol] = "";
    symbol_true_token[start_symbol] = 0;
    for (++i; i < nsyms; ++i)
    {
	k = v[i]->index;
	symbol_name[k] = v[i]->name;
	symbol_value[k] = v[i]->value;
	symbol_prec[k] = v[i]->prec;
	symbol_assoc[k] = v[i]->assoc;
	symbol_tag[i] = v[i]->tag;
	symbol_true_token[i] = v[i]->true_token;
    }

    FREE(v);
}


void make_goal(void)
{
  static char name[7] = "'\\xxx'";
  bucket * bp;
  bucket * bc;

  goal = lookup("%entry%");
  ntotalrules = nrules - 2;
  for(bp = first_symbol; bp != 0; bp = bp->next) {
    if (bp->entry) {
      start_rule(goal, 0);
      if (nitems + 2> maxitems)
	expand_items();
      name[2] = '0' + ((bp->entry >> 6) & 7);
      name[3] = '0' + ((bp->entry >> 3) & 7);
      name[4] = '0' + (bp->entry & 7);
      bc = lookup(name);
      bc->class = TERM;
      bc->value = (unsigned char) bp->entry;
      pitem[nitems++] = bc;
      pitem[nitems++] = bp;
      fprintf(entry_file,
              "fun %s lexer lexbuf = yyparse yytables %d lexer lexbuf;\n",
              bp->name, bp->entry);
      if (bp->tag == NULL)
         missing_type(bp->name);
      fprintf(interface_file,
              "val %s :\n  (Lexing.lexbuf -> token) -> Lexing.lexbuf -> %s;\n",
              bp->name,
              bp->tag);
      fprintf(action_file,
              "(* Entry %s *)\n", bp->name);
      fprintf(action_file,
	   "val _ = update_ yyact %d (fn () => raise yyexit (peekVal 0));\n",
	   ntotalrules);
      ntotalrules++;
      last_was_action = 1;
      end_rule();
    }
  }
}

void pack_grammar(void)
{
    register int i, j;
    int assoc, prec;

    ritem = (short *) MALLOC(nitems*sizeof(short));
    if (ritem == 0) no_space();
    rlhs = (short *) MALLOC(nrules*sizeof(short));
    if (rlhs == 0) no_space();
    rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
    if (rrhs == 0) no_space();
    rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
    if (rprec == 0) no_space();
    rassoc = REALLOC(rassoc, nrules);
    if (rassoc == 0) no_space();

    ritem[0] = -1;
    ritem[1] = goal->index;
    ritem[2] = 0;
    ritem[3] = -2;
    rlhs[0] = 0;
    rlhs[1] = 0;
    rlhs[2] = start_symbol;
    rrhs[0] = 0;
    rrhs[1] = 0;
    rrhs[2] = 1;

    j = 4;
    for (i = 3; i < nrules; ++i)
    {
	rlhs[i] = plhs[i]->index;
	rrhs[i] = j;
	assoc = TOKEN;
	prec = 0;
	while (pitem[j])
	{
	    ritem[j] = pitem[j]->index;
	    if (pitem[j]->class == TERM)
	    {
		prec = pitem[j]->prec;
		assoc = pitem[j]->assoc;
	    }
	    ++j;
	}
	ritem[j] = -i;
	++j;
	if (rprec[i] == UNDEFINED)
	{
	    rprec[i] = prec;
	    rassoc[i] = assoc;
	}
    }
    rrhs[i] = j;

    FREE(plhs);
    FREE(pitem);
}


void print_grammar(void)
{
    register int i, j, k;
    int spacing;
    register FILE *f = verbose_file;

    if (!vflag) return;

    k = 1;
    for (i = 2; i < nrules; ++i)
    {
	if (rlhs[i] != rlhs[i-1])
	{
	    if (i != 2) fprintf(f, "\n");
	    fprintf(f, "%4d  %s :", i - 2, symbol_name[rlhs[i]]);
	    spacing = strlen(symbol_name[rlhs[i]]) + 1;
	}
	else
	{
	    fprintf(f, "%4d  ", i - 2);
	    j = spacing;
	    while (--j >= 0) putc(' ', f);
	    putc('|', f);
	}

	while (ritem[k] >= 0)
	{
	    fprintf(f, " %s", symbol_name[ritem[k]]);
	    ++k;
	}
	++k;
	putc('\n', f);
    }
}


void reader(void)
{
    create_symbol_table();
    read_declarations();
    sort_tokens();
    output_token_type(interface_file);
    output_token_type(code_file); 
    read_grammar();
    make_goal();
    free_symbol_table();
    free_tags();
    pack_names();
    check_symbols();
    pack_symbols();
    pack_grammar();
    free_symbols();
    print_grammar();
}