/tags/SN-NG4.1/snavigator/parsers/fortran/forlex.c
C | 1968 lines | 1514 code | 188 blank | 266 comment | 350 complexity | 7f0d7a98dc59265b01afc6d2b1255eca MD5 | raw file
- /*
- Copyright (c) 2000, Red Hat, Inc.
- This file is part of Source-Navigator.
- Source-Navigator is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 2, or (at your option)
- any later version.
- Source-Navigator is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License along
- with Source-Navigator; see the file COPYING. If not, write to
- the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
- MA 02111-1307, USA.
- */
- /* forlex.c:
- Tokenizing routines for Fortran program checker.
- This version implements blank-insensitivity.
- Copyright (C) 1993 by Robert K. Moniot.
- This program is free software. Permission is granted to
- modify it and/or redistribute it, retaining this notice.
- No guarantees accompany this software.
- Part I. yylex() -- gives tokens to the parser.
- Part II. advance() -- bottom-level scanning of input stream.
- */
- #define CASE_SENSITIVE
- /* Declarations shared by all modules */
- #include <stdio.h>
- #include <ctype.h>
- #include <string.h>
- #if defined (__MSVC__) || defined(__STDC__) || defined(__osf__)
- #include <stdlib.h>
- #else
- char *getenv();
- #endif
- #include <tcl.h>
- #include "ftnchek.h"
- #define FORLEX
- #include "symtab.h"
- #include "fortran.h"
- #include "sn.h"
- int max_line_width = 0;
- extern FILE *hig_fp;
- extern int highlight;
- extern Tcl_Encoding encoding;
- /* lexdefs.h:
- Macros and shared info for lexical analysis routines
- */
- #define LEX_SHARED PRIVATE
- #define EOL '\n' /* Character for end of line, not of statement */
- extern YYSTYPE yylval; /* Lexical value for Yacc */
- /* Since EOS is special, need special macros for it */
- #define makeupper(C) (((C) != EOS && islower((int)(C)))? toupper((int)(C)):(C))
- #define iswhitespace(C) ( (C) != EOS && isspace((int)(C)) )
- #define isadigit(C) ( (C) != EOS && isdigit((int)(C)) )
- #define isaletter(C) ( (C) != EOS && isalpha((int)(C)) )
- #define ishex(C) ((C) != EOS && (isdigit((int)(C)) ||\
- (toupper((int)(C))>='A' && toupper((int)(C))<='F') ))
- /* define isidletter to allow underscore and/or dollar sign or not */
- /* both underscore and dollar sign */
- #define isidletter(C) ( (C) != EOS && (isalpha((int)(C)) || \
- (C) == '_' || (C) == '$' ) )
- #define BCD(C) ((C)-'0') /* Binary value of digit */
- #define HEX(C) (isdigit(C)?BCD(C):(makeupper(C)-'A'+10)) /* Hex value */
- /* Blank-insensitive advance */
- #define bi_advance() do {advance();} while(iswhitespace(curr_char))
- #define bi_advanceX() { do {advance(); i_white++; } while(iswhitespace(curr_char)); }
- LEX_SHARED int
- inside_string, /* TRUE when reading a string or hollerith */
- inside_hollerith, /* TRUE when reading a hollerith */
- WHILE_expected, /* DO seen and WHILE is coming up */
- contin_count, /* Number of continuation lines of stmt */
- prev_char, /* shared between forlex.c and advance.c */
- curr_char, /* Current input character */
- next_char; /* Lookahead character */
- #ifdef UNIX_CPP
- LEX_SHARED char
- *next_filename;
- LEX_SHARED int
- cpp_handled;
- #endif
- PRIVATE char acSymbol[1000];
- PRIVATE int Symbol_line_num;
- PRIVATE int Symbol_col_num;
- PRIVATE int Symbol_curr_index;
- PRIVATE char acCppInclude[1000];
- extern int complex_const_allowed, /* shared flags operated by fortran.y */
- inside_format,
- integer_context;
- extern int stmt_sequence_no; /* shared with fortran.y */
- /* Declare shared lexical routines */
- LEX_SHARED
- void advance();
- LEX_SHARED
- int is_keyword(), looking_at_cplx(), looking_at_keywd(), looking_at_relop();
- #ifdef DEBUG_INCLUDE
- LEX_SHARED
- int debug_include=FALSE;
- #endif
- /*
- Part I. yylex()
- Shared functions defined:
- yylex() Returns next token. Called from yyparse().
- implied_id_token(t,s) Creates token for blank common declaration.
- Note: compilation options LEX_STORE_STRINGS and LEX_STORE_HOLLERITHS:
- Define the macro name LEX_STORE_STRINGS to build a version of ftnchek that
- stores string constants, and LEX_STORE_HOLLERITHS to store hollerith
- constants. Now that INCLUDE statements are supported, strings must
- be stored. Holleriths are not used, so they need not be stored.
- */
- #define LEX_STORE_STRINGS
- #ifdef DEVELOPMENT /* For maintaining the program */
- abc
- #define LEX_STORE_HOLLERITHS
- #define DEBUG_FORLEX
- #endif
- #include <math.h>
- /* The following macro says whether a given character is legal,
- * i.e. one of the stream control chars or a valid ANSI Fortran
- * character. Lower case letters are considered legal too.
- * Nondigits in columns 1-6 (except EOF,EOS) are illegal.
- * Hopefully this works for EBCDIC too.
- */
- #define islegal(C) ( ((C) == EOF) || ((C) == EOS) || \
- ( (col_num >= 6 || isdigit(C)) && \
- ((C) >= ' ' && (C) <= 'z' && \
- legal_chars[toascii((int)(C))-toascii(' ')] == (C))) )
- /* Array has x where ASCII character is not valid */
- PRIVATE char legal_chars[]=
- " x\"x$xx'()*+,-./0123456789:x<=>xx\
- ABCDEFGHIJKLMNOPQRSTUVWXYZxxxx_xabcdefghijklmnopqrstuvwxyz";
- PRIVATE int
- #if 0
- curr_index, /* Index in line of curr_char */
- #endif
- next_index; /* Index in line of next_char */
- /* local functions defined */
- PRIVATE void
- #ifdef UNIX_CPP
- get_cpp_directive(),
- #endif
- get_dot(), get_dotted_keyword(), get_edit_descriptor(), get_hollerith(),
- get_identifier(), get_illegal_token(), get_label(),
- get_letter(), get_number(), get_punctuation(),
- get_simple_punctuation(), get_string(),
- #ifdef TYPELESS_CONSTANTS
- get_binary_const(),
- #endif
- get_complex_const();
- static void save_comment(char *filename, char *func, char *classn, int line_num, int col_num, char *acComment);
- PRIVATE void
- closeup();
- /* Gets next token for Yacc. Return value is token.class,
- * and a copy of the token is stored in yylval.
- */
- int
- yylex()
- {
- Token token;
- token.next_token = 0;
- token.dot_token = 0;
- if( acCppInclude[0] )
- {
- char acFilename[1000];
- strcpy( acFilename, acCppInclude );
- acCppInclude[0] = 0;
- open_include_file( SN_StrDup( acFilename ));
- }
- if( acSymbol[0] )
- {
- int h;
- Lsymtab *symt;
- /* printf( "acSymbol: <%s> %d %d\n", acSymbol, Symbol_line_num, Symbol_col_num ); */
- token.subclass = 0;
- token.line_num = Symbol_line_num;
- token.col_num = Symbol_col_num;
- token.curr_index = Symbol_curr_index;
- token.class = tok_identifier;
- token.value.integer = h = hash_lookup(acSymbol);
- if((symt=hashtab[h].loc_symtab) != NULL && symt->array_var) {
- token.class = tok_array_identifier;
- }
- acSymbol[0] = 0;
- yylval = token;
- return token.class;
- }
- /* Initialize token fields to scratch. */
- token.subclass = 0;
- token.value.integer = 0;
- if(curr_char == EOF) {
- token.class = EOF;
- token.line_num = line_num;
- token.col_num = col_num;
- token.curr_index = curr_index;
- }
- else /* not EOF */ {
- /* Skip leading spaces, and give error message if non-ANSI
- * characters are found.
- */
- while(iswhitespace(curr_char) || (! islegal(curr_char)) ) {
- if(!iswhitespace(curr_char)) {
- #ifdef UNIX_CPP
- if(curr_char == '#' && col_num == 1) {
- get_cpp_directive(); /* turn # line into EOS */
- break;
- }
- else
- #endif
- yyerror("Illegal character");
- }
- advance();
- }
- token.line_num = line_num;
- token.col_num = col_num;
- token.curr_index = curr_index;
- if(inside_format) { /* Handle format stuff here to avoid trouble */
- get_edit_descriptor(&token);
- }
- else if(isadigit(curr_char)) {
- if(col_num < 6)
- get_label(&token); /* Stmt label */
- else
- get_number(&token); /* Numeric or hollerith const */
- }
- else if(isidletter(curr_char)) {
- if(implicit_letter_flag)
- get_letter(&token); /* letter in IMPLICIT list */
- else
- get_identifier(&token); /* Identifier or keyword */
- }
- else if(curr_char == '\'' || curr_char == '"') {
- get_string(&token); /* Quoted string */
- }
- else if(curr_char == '.') {
- get_dot(&token); /* '.' lead-in */
- }
- else {
- get_punctuation(&token); /* Punctuation character or EOS */
- }
- }/*end not EOF*/
- if(token.class == EOS) {
- implicit_flag=FALSE; /* in case of errors, reset flags */
- implicit_letter_flag = FALSE;
- }
- prev_token_class = token.class;
- yylval = token;
- /* printf( "Token: %d\n", token.class ); */
- return token.class;
- } /* yylex */
- /* Fills argument with token for an identifer, as if an identifer
- * with name given by string s had been lexed. This will
- * be called by parser when blank common declaration is seen,
- * and when a main prog without program statement is found,
- * and when an unnamed block data statement is found,
- * so processing of named and unnamed cases can be handled uniformly.
- */
- void
- implied_id_token(t,s)
- Token *t;
- char *s;
- {
- int h;
- unsigned long hnum;
- hnum = hash(s);
- while( h=hnum%HASHSZ, hashtab[h].name != NULL &&
- strcmp(hashtab[h].name,s) != 0)
- hnum = rehash(hnum);
- if(hashtab[h].name == NULL) { /* not seen before */
- hashtab[h].name = s;
- hashtab[h].loc_symtab = NULL;
- hashtab[h].glob_symtab = NULL;
- hashtab[h].com_loc_symtab = NULL;
- hashtab[h].com_glob_symtab = NULL;
- }
- t->class = tok_identifier;
- t->value.integer = h;
- } /* implied_id_token */
- #ifdef UNIX_CPP
- /* This does not create a token but just performs the
- actions needed when a cpp directive is seen. It
- advances curr_char to the EOS. The setting of
- filename is delayed to this point because it is not
- stored in tokens but is external, so changing it
- must wait till the previous statement is fully
- parsed and any error messages printed and arg or
- com list headers completed.
- */
- #ifdef rigo
- PRIVATE void
- get_cpp_directive()
- {
- if(next_filename != (char *)NULL) {
- current_filename = next_filename;
- if(incdepth == 0)
- top_filename = next_filename;
- }
- do { /* Skip to end of directive. It will become an EOS */
- advance();
- } while( curr_char != EOS);
- if(f77_standard || !cpp_handled) {
- nonstandard(line_num,col_num);
- msg_tail(": preprocessor directive");
- if(!cpp_handled)
- msg_tail("(not processed)");
- }
- }/*get_cpp_directive*/
- #endif
- static void get_cpp_directive()
- {
- char ac[1000];
- char *pc;
- char *pcFilename;
- char *pcDefinename;
- int my_line_num;
- if( next_filename != (char *)NULL )
- {
- current_filename = next_filename;
- if( incdepth == 0 )
- {
- top_filename = next_filename;
- }
- }
- pc = ac;
- my_line_num = line_num;
- do
- { /* Skip to end of directive. It will become an EOS */
- advance();
- *pc++ = curr_char;
- } while( curr_char != EOS);
- pc[-1] = 0;
- for( pc = ac; *pc; pc++ )
- {
- if( ! isspace( *pc )) break;
- }
- if( strncmp( pc, "include", 7 ) == 0 )
- {
- if( hig_fp )
- {
- if( strcmp( current_filename, top_filename ) == 0 )
- {
- fprintf( hig_fp, "%d key %d.%d %d.%d\n"
- , PAF_HIGH
- , my_line_num
- , 0
- , my_line_num
- , pc - ac + 8
- );
- }
- }
- pcFilename = 0;
- for( ; *pc; pc++ )
- {
- if( *pc == '"' || *pc == '<' )
- {
- pcFilename = pc+1;
- break;
- }
- }
- if( pcFilename )
- {
- for( pc = pcFilename; *pc; pc++ )
- {
- if( *pc == '"' || *pc == '>' )
- {
- *pc = 0;
- break;
- }
- }
- strcpy( acCppInclude, pcFilename );
- }
- }
- else if( strncmp( pc, "define", 6 ) == 0 )
- {
- int my_index;
- if( hig_fp )
- {
- if( strcmp( current_filename, top_filename ) == 0 )
- {
- fprintf( hig_fp, "%d key %d.%d %d.%d\n"
- , PAF_HIGH
- , my_line_num
- , 0
- , my_line_num
- , pc - ac + 7
- );
- }
- }
- pcDefinename = 0;
- pc += 6;
- for( ; *pc; pc++)
- {
- if( ! iswhitespace( *pc ))
- {
- pcDefinename = pc;
- break;
- }
- }
- my_index = pc - ac + 1;
- if( pcDefinename )
- {
- for( pc = pcDefinename; *pc; pc++ )
- {
- #ifndef CASE_SENSITIVE
- *pc = makeupper( *pc );
- #endif
- if( iswhitespace( *pc ) || *pc == '(' )
- {
- *pc = 0;
- break;
- }
- }
- /* Identifier: find its hashtable entry or
- create a new entry. */
- {
- int h;
- h = hash_lookup(pcDefinename);
- hashtab[h].define = 1;
- if( highlight != -1 )
- {
- put_symbol(PAF_CONS_DEF,NULL,pcDefinename,
- current_filename,
- my_line_num,
- my_index,
- 0,0,
- (long)0,NULL,NULL,NULL,
- get_comment(current_filename,my_line_num),
- 0,0,0,0);
- }
- }
- }
- }
- }
- #endif
- PRIVATE void
- get_dot(token)
- Token *token;
- {
- closeup(); /* Advance till nonspace char in next_char */
- if(isadigit(next_char))
- get_number(token); /* Numeric const */
- else if(isaletter(next_char))
- get_dotted_keyword(token); /* .EQ. etc. */
- else
- get_simple_punctuation(token); /* "." out of place */
- }
- #define MAX_DOTTED_KEYWD (sizeof(".FALSE.")/sizeof(char))
- struct {
- char *name;
- int class,subclass;
- } dotted_keywords[]={
- {"EQ",tok_relop,relop_EQ},
- {"NE",tok_relop,relop_NE},
- {"LE",tok_relop,relop_LE},
- {"LT",tok_relop,relop_LT},
- {"GE",tok_relop,relop_GE},
- {"GT",tok_relop,relop_GT},
- {"AND",tok_AND,0},
- {"OR",tok_OR,0},
- {"NOT",tok_NOT,0},
- {"FALSE",tok_logical_const,FALSE},
- {"TRUE",tok_logical_const,TRUE},
- {"EQV",tok_EQV,0},
- {"NEQV",tok_NEQV,0},
- {NULL,0,0}
- };
- PRIVATE void
- get_dotted_keyword(token)
- Token *token;
- {
- char s[8];
- int i=0,
- has_embedded_space, /* Spaces inside keyword */
- space_seen_lately; /* Flag for catching embedded space */
- int j=0;
- initial_flag = FALSE;
- /* Watch for embedded space, but not
- between dots and letters of keyword.
- I.e. ". eq ." is OK, but not ".e q." */
- has_embedded_space = FALSE;
- space_seen_lately = FALSE;
- bi_advance(); /* gobble the initial '.' */
- Symbol_line_num = line_num;
- Symbol_col_num = col_num;
- Symbol_curr_index = curr_index;
- /* while(isaletter(curr_char)) { */
- while(isidletter(curr_char) || isadigit(curr_char)) {
- if( i<7 )
- s[i++] = makeupper(curr_char);
- #ifdef CASE_SENSITIVE
- acSymbol[j++] = curr_char;
- #else
- acSymbol[j++] = makeupper(curr_char);
- #endif
- if(space_seen_lately)
- has_embedded_space = TRUE;
- bi_advance();
- space_seen_lately = iswhitespace(prev_char);
- }
- s[i] = '\0';
- acSymbol[j] = '\0';
- /* printf( "***** <%s>\n", acSymbol ); */
- for(i=0; dotted_keywords[i].name != NULL; i++) {
- if(strcmp(s,dotted_keywords[i].name) == 0) {
- token->class = dotted_keywords[i].class;
- token->subclass = dotted_keywords[i].subclass;
- token->value.string = dotted_keywords[i].name;
- if(curr_char != '.') {
- yyerror("Badly formed logical/relational operator or constant");
- }
- else {
- advance(); /* gobble the final '.' */
- acSymbol[0] = 0;
- }
- return;
- }
- }
- /* keyword not found */
- token->class = '.';
- } /* get_dotted_keyword */
- static void get_edit_descriptor(token)
- Token *token;
- {
- while( curr_char != EOS && curr_char != EOF )
- {
- advance();
- };
- token->class = EOS;
- }
- #ifdef rigo
- PRIVATE void
- get_edit_descriptor(token)
- Token *token;
- {
- int i=0,c;
- long repeat_spec;
- char s[MAXIDSIZE+1]; /* string holding the descriptor: NOT STORED */
- if(isadigit(curr_char)) { /* Digit: repeat spec or holl or kP or nX */
- repeat_spec = 0;
- do {
- repeat_spec = repeat_spec*10L + (long)BCD(curr_char);
- if( makeupper(next_char) == 'H' )
- inside_hollerith = TRUE;/* get ready for hollerith*/
- bi_advance();
- } while(isadigit(curr_char));
- if( makeupper(curr_char) == 'H' ) {
- /* nH... pass off to hollerith routine */
- get_hollerith(token, (int)repeat_spec);
- return;
- }
- else {
- /* Otherwise it is a repeat spec or the
- numeric part of kP or nX which we treat
- as repeat specs too */
- token->class = tok_integer_const;
- token->value.integer = repeat_spec;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nInteger const:\t\t%d",repeat_spec);
- #endif
- }
- }/* end if digit */
- else if(isaletter(curr_char)) {
- c = makeupper(curr_char);
- s[i++] = c;
- bi_advance();
- switch(c) {
- case 'P': /* P of kP k seen previously */
- if(prev_token_class != tok_integer_const) {
- if(f77_standard){
- nonstandard(token->line_num,token->col_num);
- msg_tail(": P must follow a number");
- }
- }
- break;
- case 'X': /* X or nX */
- break;
- case 'S': /* S or SP or SS */
- c = makeupper(curr_char);
- if(c == 'S' || c == 'P') {
- s[i++] = c;
- bi_advance();
- }
- break;
- case 'B': /* BN or BZ */
- c = makeupper(curr_char);
- if(c == 'N' || c == 'Z') {
- s[i++] = c;
- bi_advance();
- }
- else {
- if(f77_standard){
- nonstandard(token->line_num,token->col_num);
- msg_tail(": N or Z expected after B");
- }
- }
- break;
- case 'T': /* Tc or TLc or TRc */
- c = makeupper(curr_char);
- if(c == 'L' || c == 'R') {
- s[i++] = c;
- bi_advance();
- }
- goto get_w_d;
- /* Iw, Ew.c and similar forms */
- case 'A': case 'D': case 'E':
- case 'F': case 'G': case 'L':
- case 'I':
- get_w_d: /* Get the w field if any */
- while( isadigit(curr_char) ){
- if(i < MAXIDSIZE) /* Store it temporarily (up to a point) */
- s[i++] = curr_char;
- bi_advance();
- }
- /* Include any dot followed by number (e.g. F10.5)
- */
- if( curr_char == '.' ) {
- do {
- if(i < MAXIDSIZE)
- s[i++] = curr_char;
- bi_advance();
- } while( isadigit(curr_char) );
- }
- break;
- default:
- if(f77_standard) {
- nonstandard(token->line_num,token->col_num);
- msg_tail(": edit descriptor");
- s[i] = '\0';
- msg_tail(s);
- }
- goto get_w_d;
- }/*end switch*/
- token->class = tok_edit_descriptor;
- token->value.string = NULL;
- s[i++] = '\0';
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nEdit descriptor:\t%s",s);
- #endif
- }/*end else if isaletter*/
- /* Apostrophe means a string */
- else if( curr_char == '\'' || curr_char == '"' ) {
- get_string(token);
- }
- /* Otherwise it is mere punctuation. Handle
- it here ourself to avoid complications. */
- else {
- get_simple_punctuation(token);
- }
- }
- #endif
- PRIVATE void
- get_hollerith(token,n) /* Gets string of form nHaaaa */
- Token *token;
- int n;
- {
- int i,last_col_num;
- /* Holl. consts are not stored unless the macro name LEX_STORE_HOLLERITHS
- is defined. */
- #ifdef LEX_STORE_HOLLERITHS
- int strsize=n;
- char *s;
- #else
- char *s = "Not stored";
- #endif
- initial_flag = FALSE;
- #ifdef LEX_STORE_HOLLERITHS
- if( (s=(char *)ckalloc((unsigned)(strsize+1))) == (char *)NULL ) {
- oops_message(OOPS_NONFATAL,line_num,col_num,
- "Out of string space for hollerith constant");
- strsize=0;
- }
- memset (s, 0, (strsize+1));
- #endif
- if(n==1)
- inside_hollerith=FALSE;/* turn off flag ahead of next_char */
- advance();/* Gobble the 'H' */
- last_col_num = col_num;
- for(i=0; i<n; i++) {
- while(curr_char == EOL) {
- /* Treat short line as if extended with blanks */
- int col;
- for(col=last_col_num; i<n && col<max_stmt_col; i++,col++) {
- #ifdef LEX_STORE_HOLLERITHS
- if(i < strsize)
- s[i] = ' ';
- #endif
- }
- last_col_num = col_num;
- advance();
- }
- if(i==n) break;
- if(curr_char == EOS || curr_char == EOF) {
- int col;
- for(col=last_col_num; i<n && col<max_stmt_col; i++,col++) {
- #ifdef LEX_STORE_HOLLERITHS
- if(i < strsize)
- s[i] = ' ';
- #endif
- }
- #ifdef LEX_STORE_HOLLERITHS
- strsize=i; /* in case it did not fill up */
- #endif
- break;
- }
- else {
- #ifdef LEX_STORE_HOLLERITHS
- s[i] = curr_char;
- #endif
- last_col_num = col_num;
- if(i==n-2)/* turn flag off ahead of next_char*/
- inside_hollerith = FALSE;
- advance();
- }
- }
- #ifdef LEX_STORE_HOLLERITHS
- if(strsize > 0)
- s[strsize] = '\0';
- #endif
- inside_hollerith = FALSE;
- token->class = tok_hollerith;
- token->value.string = s;
- token->size = n;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nHollerith:\t\t%s",s);
- #endif
- } /* get_hollerith */
- #include "keywords.h"
- /* get_identifier reads a string of characters satisfying
- isidletter. As they are read and as long as they are
- alphabetic, it looks for a match to a keyword, and
- whenever one is found, checks with is_keyword to see
- if the context is right. If so, it returns the keyword.
- Otherwise it keeps going and eventually returns the id.
- */
- PRIVATE void
- get_identifier(token)
- Token *token;
- {
- char s_upper[MAXIDSIZE+1]; /* string holding the identifier */
- char s_lower[MAXIDSIZE+1]; /* string holding the identifier */
- int c, /* Uppercase version of current letter */
- preceding_c,/* Char preceding latest id */
- has_embedded_space, /* Spaces inside keyword or id */
- space_seen_lately, /* Flag for catching embedded space */
- i, /* Index in s of current letter */
- lo,hi, /* Indices in keyword table where match may be */
- klen, /* Length of id read so far (after keyword test) */
- keywd_class;/* Class number returned by is_keyword */
- int possible_keyword;
- int i_white;
- token->class = tok_identifier;
- keywd_class = FALSE;
- i = klen = 0;
- lo = 0;
- hi = NUM_KEYWORDS-1;
- i_white = 0;
- /* Define shorthand for the keyword letter under study */
- #define KN(i) keywords[i].name
- #define KL(i) keywords[i].name[klen]
- possible_keyword = TRUE;
- preceding_c = prev_char;
- has_embedded_space = FALSE;
- space_seen_lately = FALSE;
- /* This loop gets letter [letter|digit]* forms */
- while(isidletter(curr_char) || isadigit(curr_char)) {
- c = makeupper(curr_char); /* Get the next char of id */
- if(i < MAXIDSIZE) /* Store it (up to a point) */
- {
- s_upper[i] = c;
- s_lower[i] = curr_char;
- i++;
- }
- if(space_seen_lately)
- {
- has_embedded_space = TRUE;
- }
- bi_advanceX(); /* Pull in the next character */
- space_seen_lately = iswhitespace(prev_char);
- /* As long as it may yet be a keyword,
- keep track of whether to invoke is_keyword.
- */
- if(possible_keyword) {
- if(!isaletter(c) /* If not alphabetic, cannot be keyword */
- || klen >= sizeof(keywords[0].name)-1) /* or overlength */
- {
- #ifdef DEBUG_IS_KEYWORD
- if(debug_lexer && getenv("BISECTION")) {
- s[i] = '\0';
- fprintf(list_fd,"\n%s not a keyword because",s);
- if(!isaletter(c))
- fprintf(list_fd," non-letter at %c",c);
- if(klen >= sizeof(keywords[0].name)-1)
- fprintf(list_fd,"length %d >= max %d",klen,sizeof(keywords[0].name)-1);
- }
- #endif
- possible_keyword = FALSE;
- }
- else {
- int mid;
- #ifdef DEBUG_IS_KEYWORD
- if(debug_lexer && getenv("BISECTION")) {
- fprintf(list_fd,"\nklen=%d c=%c",klen,c);
- fprintf(list_fd,"\nBisecting [lo,hi]=[%d,%d] \"%s\"..\"%s\"",
- lo,hi,KN(lo),KN(hi));
- }
- #endif
- /* Bisect lo .. hi looking for match
- on characters found so far. */
- while(lo <= hi) {
- mid = (lo + hi)/2;
- if( KL(mid) < c ) { /* No match in lower half */
- lo = mid+1;
- }
- else if( KL(mid) > c ) {/* No match in upper half */
- hi = mid-1;
- }
- else { /* Match at midpoint: Bisect each
- half to find the new subinterval. */
- int midlo=mid, midhi=mid;
- /* Bisect lo .. mid */
- while( lo < midlo-1 && KL(lo) != c) {
- mid = (lo + midlo)/2;
- if( KL(mid) < c ) {
- lo = mid+1;
- }
- else { /* equal */
- midlo = mid;
- }
- }
- if( KL(lo) != c )
- lo = midlo;
- /* Bisect mid .. hi */
- while( midhi < hi-1 && KL(hi) != c ) {
- mid = (midhi + hi)/2;
- if( KL(mid) > c ) {
- hi = mid-1;
- }
- else { /* equal */
- midhi = mid;
- }
- }
- if( KL(hi) != c )
- hi = midhi;
- break; /* After bisecting each half, we are done */
- } /* end else KL(mid) == c */
- } /* end while(lo <= hi) */
- klen++; /* Now increment the length */
- #ifdef DEBUG_IS_KEYWORD
- if(debug_lexer && getenv("BISECTION")) {
- fprintf(list_fd,"\nNew [lo,hi]=[%d,%d] \"%s\"..\"%s\"",
- lo,hi,KN(lo),KN(hi));
- }
- #endif
- /* If range is null, a match has been ruled out. */
- if(lo > hi) {
- #ifdef DEBUG_IS_KEYWORD
- if(debug_lexer && getenv("BISECTION")) {
- s[i]='\0';
- fprintf(list_fd,"\nKeyword ruled out for %s at length %d since lo %d > hi %d",
- s,klen,lo,hi);
- }
- #endif
- possible_keyword = FALSE;
- }
- /* If length of first keyword in range is equal
- to the new length, then we have a match at
- this point. Check it out with is_keyword.
- */
- else if(KN(lo)[klen] == '\0') {
- if( (keywd_class = is_keyword(lo)) != FALSE) {
- token->class = keywd_class; /* It's a keyword */
- token->value.string = NULL;
- s_upper[i] = 0;
- s_lower[i] = 0;
- i++;
- if( hig_fp )
- {
- if( strcmp( current_filename, top_filename ) == 0 )
- {
- fprintf( hig_fp, "%d key %d.%d %d.%d\n"
- , PAF_HIGH
- , token->line_num
- , token->curr_index
- , token->line_num
- , token->curr_index + i_white
- );
- }
- }
- break; /* Quit the input loop */
- }
- else if(lo == hi) { /* Match is unique and ruled out */
- possible_keyword = FALSE;
- }
- }
- }/* end else isaletter(c) */
- }/* end if(possible_keyword) */
- }/* end while(isidletter || isadigit) */
- if(keywd_class == FALSE) { /* it is an identifier */
- /* Identifier: find its hashtable entry or
- create a new entry. */
- int h;
- Lsymtab *symt;
- #ifdef TYPELESS_CONSTANTS
- /* Watch out for const like X'nnn' */
- if(i == 1 && curr_char == '\'') {
- get_binary_const(token,s_upper[0],NULL);
- return;
- }
- #endif
- s_upper[i] = '\0';
- s_lower[i] = '\0';
- i++;
- #ifdef CASE_SENSITIVE
- token->value.integer = h = hash_lookup(s_lower);
- #else
- token->value.integer = h = hash_lookup(s_upper);
- #endif
- /* If it is an array give it a special token
- class, so that arrays can be distinguished
- from functions in the grammar. */
- if((symt=hashtab[h].loc_symtab) != NULL
- && symt->array_var) {
- token->class = tok_array_identifier;
- }
- }
- /* Check identifiers for being juxtaposed
- to keywords or having internal space.
- Keywords are immune to warning since
- want to allow both GOTO and GO TO, etc.
- */
- if(pretty_flag &&
- (token->class==tok_identifier || token->class==tok_array_identifier)
- && ( isidletter(preceding_c) || isadigit(preceding_c)
- || has_embedded_space ) ) {
- ugly_code(token->line_num,token->col_num,"identifier");
- msg_tail(hashtab[token->value.integer].name);
- #if 0 /* Keywords immune for now */
- ugly_code(token->line_num,token->col_num,"keyword");
- msg_tail(keywords[keytab_index[keywd_class-keytab_offset]].name);
- #endif
- if(has_embedded_space)
- msg_tail("has embedded space");
- else
- msg_tail("not clearly separated from context");
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer){
- switch(token->class) {
- case tok_identifier:
- fprintf(list_fd,"\nIdentifier:\t\t%s",s);
- break;
- case tok_array_identifier:
- fprintf(list_fd,"\nArray_identifier:\t%s",s);
- break;
- default:
- fprintf(list_fd,"\nKeyword:\t\ttok_%s",s);
- break;
- }
- }
- #endif
- } /* get_identifier */
- /* iskeyword:
- Determines (to the best of its current ability) whether a given
- identifier is a keyword or not. Hopefully now no keywords are
- reserved.
- Method uses context from start of statement up to and including
- the character following the putative keyword to eliminate as
- many cases as possible. Any non-IK keywords (those that need not
- be in the initial series of keywords of statement) have special
- code to handle them. Any IK's that are always the second word of a
- pair are accepted if the predecessor was just seen. The rest are
- handed off to looking_at_keywd which tries to see if
- it is an assignment statement.
- Note that some rules that could be used if F77 Standard were
- adhered to strictly are not used here. The idea is to allow
- extensions, and leave catching syntax errors in the parser.
- For example, specification-statement keywords are not excluded
- after the first executable statement has been seen. The status
- of a variable as declared array or character type is not consulted
- in ruling out an assignment statement if following parentheses
- are present. Etc.
- */
- /* Macro to test if all the specified bits are set */
- #define MATCH(CONTEXT) ((keywords[i].context & (CONTEXT)) == (CONTEXT))
- LEX_SHARED int
- is_keyword(i)
- int i; /* Index in keywords table */
- {
- int ans = FALSE;
- int putative_keyword_class; /* Class of the supposed keyword */
- while(iswhitespace(curr_char)) /* Move to lookahead char */
- advance();
- #ifdef DEBUG_IS_KEYWORD
- if(debug_lexer){
- fprintf(list_fd,
- "\nkeyword %s: initialflag=%d implicitflag=%d ",
- keywords[i].name,initial_flag,implicit_flag);
- fprintf(list_fd,
- "context=%o, next char=%c %o",keywords[i].context,
- curr_char,curr_char);
- }
- #endif
- putative_keyword_class = keywords[i].class;
- if( !initial_flag && MATCH(IK) ) {
- /* Dispose of keywords which can only occur in initial
- part of statement, if found elsewhere. */
- ans = FALSE;
- }
- #if 0 /* This does not work: curr_stmt_class not cleared beforehand */
- else if(curr_stmt_class == tok_IF && MATCH(NI)) {
- /* Dispose of keywords which cannot occur in stmt
- field of logical IF if that is where we are.
- */
- ans = FALSE;
- }
- #endif
- else if(MATCH(NA) && isalpha(curr_char)) {
- /* Dispose of keywords which cannot be followed
- by alphabetic character if that is so.
- */
- ans = FALSE;
- }
- else if(putative_keyword_class == tok_TO) {/* A non-IK case */
- /* TO always follows the word GO or
- is followed by a variable
- name (in ASSIGN statement).
- */
- #ifdef SPLIT_KEYWORDS
- #define in_assign_stmt (curr_stmt_class == tok_ASSIGN)
- ans = (prev_token_class == (in_assign_stmt?
- tok_integer_const:
- tok_GO));
- #else
- ans = ( curr_stmt_class == tok_ASSIGN
- && prev_token_class == tok_integer_const);
- #endif
- }
- else if(putative_keyword_class == tok_FUNCTION /* A non-IK case */
- && (stmt_sequence_no != 0 /* not the first statement of module */
- || !(initial_flag /* if not initial can only be preceded by type */
- || is_a_type_token(curr_stmt_class)) )) {
- ans = FALSE; /* otherwise it will be handled correctly by looking_at */
- }
- else if(putative_keyword_class == tok_WHILE) { /* A non-IK case */
- ans = WHILE_expected; /* Only occurs in DO label [,] WHILE */
- WHILE_expected = FALSE;
- }
- /* Remaining cases are IK in initial part */
- /* Eliminate those which can are never followed
- by '(' or '=' if that is what we have.
- */
- else if(MATCH(NP) &&
- (curr_char == '(' || curr_char == '=') ) {
- ans = FALSE;
- }
- /* Likewise with those that must be followed by
- '(' but aren't */
- else if(MATCH(MP) && curr_char != '(') {
- ans = FALSE;
- }
- /* PRECISION always follows the word DOUBLE */
- else if( putative_keyword_class == tok_PRECISION ){
- ans = (prev_token_class == tok_DOUBLE);
- }
- /* END DO: handle its DO here */
- else if( putative_keyword_class == tok_DO && curr_char == EOS ) {
- /* Also must have prev_token_class == tok_END, but
- no need to check since end-of-statement suffices. */
- ans = TRUE;
- }
- /* Other type names always follow the word
- IMPLICIT */
- else if( implicit_flag ) {
- ans = MATCH(TY);
- }
- else {
- /* Remaining cases are keywords that must be in
- initial position. If followed by '=' must be an
- identifier. If followed by '(' then may be an array
- or character lvalue, so use looking_at to scan ahead
- to see if this is an assignment statement. */
- ans = looking_at_keywd(putative_keyword_class);
- }
- /* Save initial token class for use by parser.
- Either set it to keyword token or to id for
- assignment stmt. */
- if(initial_flag) {
- curr_stmt_class = (ans? keywords[i].class: tok_identifier);
- }
- /* Turn off the initial-keyword flag if this is a
- keyword that cannot be followed by another keyword
- or if it is not a keyword.
- */
- if(ans) {
- if(keywords[i].context & EK)
- initial_flag = FALSE;
- return keywords[i].class;
- }
- else { /* If no more letters follow, then keyword here
- is ruled out. Turn off initial_flag. */
- if( ! isalpha(curr_char) )
- initial_flag = FALSE;
- return 0; /* Not found in list */
- }
- }/* End of is_keyword */
- /* init_keyhashtab:
- */
- /* Hashing is no longer used. This guy now only
- initializes the table of indices that allow
- keywords to be looked up by their token class*/
- void
- init_keyhashtab()
- {
- int i,k,kmin,kmax;
- kmin = kmax = keywords[0].class; /* Find min and max token classes */
- for(i=1; i<NUM_KEYWORDS; i++) {
- k = keywords[i].class;
- if(k < kmin) kmin = k;
- if(k > kmax) kmax = k;
- }
- keytab_offset = kmin; /* Index table from [kmin..kmax] -> [0..size-1] */
- keytab_size = (unsigned) (kmax-kmin+1);
- if( (keytab_index=(short *)ckalloc(keytab_size*sizeof(keytab_index[0])))
- == (short *)NULL) {
- oops_message(OOPS_FATAL,NO_LINE_NUM,NO_COL_NUM,
- "cannot allocate space for keytab_index");
- }
- memset (keytab_index, 0, keytab_size*sizeof(keytab_index[0]));
- /* Now fill in the lookup table, indexed
- by class - offset */
- for(i=0; i<NUM_KEYWORDS; i++) {
- k = keywords[i].class;
- keytab_index[k - keytab_offset] = i;
- }
- }
- PRIVATE void
- get_illegal_token(token) /* Handle an illegal input situation */
- Token *token;
- {
- token->class = tok_illegal;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nILLEGAL TOKEN");
- #endif
- } /* get_illegal_token */
- /* Read a label from label field. */
- PRIVATE void
- get_label(token)
- Token *token;
- {
- int value=0;
- int space_seen=FALSE, has_embedded_space=FALSE;
- while( isadigit(curr_char) && col_num < 6 ) {
- if(space_seen)
- has_embedded_space = TRUE;
- value = value*10 + BCD(curr_char);
- advance();
- while(curr_char==' ' && col_num < 6) {
- space_seen = TRUE;
- advance();
- }
- }
- if(pretty_flag && has_embedded_space) {
- ugly_code(token->line_num,token->col_num,
- "label has embedded space");
- }
- token->class = tok_label;
- token->value.integer = value;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nLabel:\t\t\t%d",value);
- #endif
- } /* get_label */
- PRIVATE void
- get_letter(token) /* Gets letter in IMPLICIT list */
- Token *token;
- {
- token->class = tok_letter;
- token->subclass = makeupper(curr_char);
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nLetter:\t\t\t%c",token->subclass);
- #endif
- advance();
- } /* get_letter */
- /* get_number reads a number and determines data type: integer,
- * real, or double precision.
- */
- /* This belongs in ftnchek.h, perhaps. Defines number of significant
- figures that are reasonable for a single-precision real constant.
- Works out to 9 for wordsize=4, 21 for wordsize=8. These allow
- for a couple of extra digits for rounding. Used in -trunc warning. */
- #define REAL_SIGFIGS (local_wordsize==0? 8: (local_wordsize-1)*3)
- PRIVATE void
- get_number(token)
- Token *token;
- {
- double dvalue,leftside,rightside,pwr_of_ten;
- int exponent,expsign,datatype,c;
- int sigfigs;
- initial_flag = FALSE;
- leftside = 0.0;
- sigfigs = 0;
- datatype = tok_integer_const;
- while(isadigit(curr_char)) {
- leftside = leftside*10.0 + (double)BCD(curr_char);
- ++sigfigs;
- if( !integer_context && makeupper(next_char) == 'H' )
- inside_hollerith = TRUE;/* get ready for hollerith*/
- bi_advance();
- }
- /* If context specifies integer expected, skip to end.
- Otherwise scan on ahead for more. */
- if( integer_context) {
- if(sigfigs == 0) {
- yyerror("integer expected");
- advance(); /* gobble something to avoid infinite loop */
- }
- }
- else {/* not integer_context */
- if( makeupper(curr_char) == 'H' ){ /* nnH means hollerith */
- if(leftside == 0.0) {
- yyerror("Zero-length hollerith constant");
- inside_hollerith = FALSE;
- advance();
- get_illegal_token(token);
- }
- else {
- get_hollerith(token, (int)leftside);
- }
- return;
- }
- rightside = 0.0;
- pwr_of_ten = 1.0;
- closeup(); /* Pull in the lookahead character */
- if( curr_char == '.' &&
- /* don't be fooled by 1.eq.N or
- I.eq.1.and. etc */
- !looking_at_relop() ) {
- datatype = tok_real_const;
- bi_advance();
- while(isadigit(curr_char)) {
- rightside = rightside*10.0 + (double)BCD(curr_char);
- ++sigfigs;
- pwr_of_ten *= 0.10;
- bi_advance();
- }
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- dvalue = leftside + rightside*pwr_of_ten;
- #endif
- exponent = 0;
- expsign = 1;
- /* Integer followed by E or D gives a real/d.p constant */
- if( ( (c = makeupper(curr_char)) == 'E' || c == 'D' ) )
- {
- datatype = ((c == 'E')? tok_real_const: tok_dp_const);
- bi_advance();
- if(curr_char == '+') {
- expsign = 1;
- bi_advance();
- }
- else if(curr_char == '-') {
- expsign = -1;
- bi_advance();
- }
- if(!isadigit(curr_char)) {
- yyerror("Badly formed real constant");
- }
- else while(isadigit(curr_char)) {
- exponent = exponent*10 + (curr_char-'0');
- bi_advance();
- }
- /* Compute real value only if debugging. If it exceeds max magnitude,
- computing it may cause crash. At this time, value of real const
- is not used for anything. */
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- dvalue *= pow(10.0, (double)(exponent*expsign));
- else
- #endif
- dvalue = 0.0;
- }
- }/* end if(!integer_context) */
- token->class = datatype;
- switch(datatype) {
- case tok_integer_const:
- token->value.integer = (long)leftside;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nInteger const:\t\t%ld",token->value.integer);
- #endif
- break;
- case tok_real_const:
- /* store single as double lest it overflow */
- token->value.dbl = dvalue;
- if(trunc_check && sigfigs >= REAL_SIGFIGS) {
- warning(token->line_num,token->col_num,
- "Single-precision real constant has more digits than are stored");
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nReal const:\t\t%g",token->value.dbl);
- #endif
- break;
- case tok_dp_const:
- token->value.dbl = dvalue;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nDouble const:\t\t%lg",token->value.dbl);
- #endif
- break;
- }
- } /* get_number */
- /* get_complex_constant reads an entity of the form (num,num)
- where num is any [signed] numeric constant. It will only be
- called when looking_at() has guaranteed that there is one there.
- The token receives the real part as a number. The imaginary part
- is not stored. Whitespace is allowed between ( and num, around
- the comma, and between num and ) but not within num. */
- PRIVATE void
- get_complex_const(token)
- Token *token;
- {
- Token imag_part; /* temporary to hold imag part */
- double sign=1.0;
- int dble_size=FALSE; /* flag to set if parts are D floats */
- int imag_dble_size=FALSE;/* if imaginary part D float */
- unsigned comma_line_num,comma_col_num;
- initial_flag = FALSE;
- bi_advance(); /* skip over the initial paren */
- if(curr_char == '+' || curr_char == '-') {
- if(curr_char == '-') sign = -1.0;
- bi_advance();
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer){
- fprintf(list_fd,"\nComplex const:(");
- if(sign < 0.0) fprintf(list_fd," -");
- }
- #endif
- get_number(token);
- switch(token->class) {
- case tok_integer_const:
- token->value.dbl = sign*(double)token->value.integer;
- break;
- case tok_dp_const:
- dble_size=TRUE;
- /*fallthru*/
- case tok_real_const:
- token->value.dbl = sign*token->value.dbl;
- break;
- }
- while(iswhitespace(curr_char))
- advance();
- comma_line_num = line_num;
- comma_col_num = col_num;
- bi_advance(); /* skip over the comma */
- if(curr_char == '+' || curr_char == '-') {
- if(curr_char == '-') sign = -1.0;
- bi_advance();
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer){
- fprintf(list_fd,"\n,");
- if(sign < 0.0) fprintf(list_fd," -");
- }
- #endif
- get_number(&imag_part);
- imag_dble_size = (imag_part.class == tok_dp_const);
- if(dble_size != imag_dble_size) {
- warning(comma_line_num,comma_col_num,
- "different precision in real and imaginary parts");
- }
- else if(f77_standard) {
- if(dble_size)
- warning(token->line_num,token->col_num,
- "nonstandard double precision complex constant");
- }
- dble_size = (dble_size || imag_dble_size);
- while(iswhitespace(curr_char))
- advance();
- advance(); /* skip over final paren */
- if(dble_size)
- token->class = tok_dcomplex_const;
- else
- token->class = tok_complex_const;
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\n)");
- #endif
- }
- #ifdef TYPELESS_CONSTANTS
- /* Routine to get constants of the forms:
- B'nnnn' 'nnnn'B -- binary
- O'nnnn' 'nnnn'O -- octal
- X'nnnn' Z'nnnn' 'nnnn'X 'nnnn'Z -- hex
- No check of whether digits are less than base.
- Nonstandard warning is issued here since the constant
- looks like a normal integer by the time the parser sees it.
- */
- PRIVATE void
- get_binary_const(token,c,s)
- Token *token;
- int c; /* base character: madeupper'ed by caller */
- char *s; /* string of digits, or NULL */
- {
- long value=0;
- int base;
- if(c == 'O') base = 8;
- else if(c == 'X' || c == 'Z') base = 16;
- else if(c == 'B') base = 2;
- else {
- syntax_error(token->line_num,token->col_num,
- "Unknown base for typeless constant -- octal assumed");
- base = 8;
- }
- /* Two forms: X'nnnn' and 'nnnn'X. For the first, string has not
- been scanned yet, and s is null. For second, s=digit string. */
- if(s == NULL) {
- bi_advance(); /* gobble the leading quote */
- while(ishex(curr_char)){
- value = value*base + HEX(curr_char);
- bi_advance();
- }
- if(curr_char != '\'') {
- syntax_error(line_num,col_num, "Closing quote missing");
- }
- else
- advance(); /* gobble the trailing quote */
- }
- else { /* Use the given string */
- while(*s != '\0') {
- if(!isspace(*s)) /* skip blanks */
- value = value*base + HEX(*s);
- s++;
- }
- }
- token->class = tok_integer_const;
- token->value.integer = value;
- if(f77_standard) {
- nonstandard(token->line_num,token->col_num);
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nInteger const:\t\t%d",token->value.integer);
- #endif
- }/*get_binary_const*/
- #endif/*TYPELESS_CONSTANTS*/
- PRIVATE void
- get_punctuation(token)
- Token *token;
- {
- initial_flag = FALSE;
- closeup();
- if(curr_char == '*' && next_char == '*') {
- token->class = tok_power;
- advance();
- }
- else if(curr_char == '/' && next_char == '/' ) {
- token->class = tok_concat;
- advance();
- }
- /* paren can be the start of complex constant if everything
- is just right. Maybe more tests needed here. */
- else if(complex_const_allowed && curr_char == '(' &&
- ( (prev_token_class<256 && ispunct(prev_token_class))
- || prev_token_class == tok_relop
- || prev_token_class == tok_power )
- && looking_at_cplx()) {
- get_complex_const(token);
- return;
- }
- else
- token->class = curr_char;
- advance();
- #ifdef DEBUG_FORLEX
- if(debug_lexer) {
- if(token->class == EOS)
- fprintf(list_fd,"\n\t\t\tEOS");
- else if(token->class == tok_power)
- fprintf(list_fd,"\nPunctuation:\t\t**");
- else if(token->class == tok_concat)
- fprintf(list_fd,"\nPunctuation:\t\t//");
- else
- fprintf(list_fd,"\nPunctuation:\t\t%c",token->class);
- }
- #endif
- } /* get_punctuation */
- PRIVATE void
- get_simple_punctuation(token)
- Token *token;
- {
- /* Like get_punctuation but lacks special cases. Just
- gets the punctuation character. */
- token->class = curr_char;
- advance();
- #ifdef DEBUG_FORLEX
- if(debug_lexer) {
- if(token->class == EOS)
- fprintf(list_fd,"\n\t\t\tEOS");
- else
- fprintf(list_fd,"\nPunctuation:\t\t%c",token->class);
- }
- #endif
- } /* get_simple_punctuation */
- PRIVATE void
- get_string(token) /* Gets string of form 'aaaa' */
- Token *token;
- {
- int i,len,last_col_num;
- int first_char = curr_char;
- /* String consts are not stored unless the macro name LEX_STORE_STRINGS
- is defined. */
- #ifdef LEX_STORE_STRINGS
- char *s;
- char tmpstr[MAXSTR+1];
- #else
- char *s = "Not stored";
- #endif
- initial_flag = FALSE;
- inside_string = TRUE;
- last_col_num=col_num;
- advance(); /* Gobble leading quote */
- i = len = 0;
- for(;;) {
- while(curr_char == EOL) {
- /* Treat short line as if extended with blanks */
- int col;
- for(col=last_col_num; col<max_stmt_col; col++) {
- #ifdef LEX_STORE_STRINGS
- if(i < MAXSTR)
- tmpstr[i++] = ' ';
- #endif
- ++len;
- }
- last_col_num=col_num;
- advance();
- }
- if(curr_char == EOS || curr_char == EOF) {
- yyerror("Closing quote missing from string");
- break;
- }
- if(curr_char == first_char) {
- inside_string = FALSE;/* assume so for now */
- /* Handle possible continuation */
- if(next_char == EOL && col_num == max_stmt_col)
- advance();
- last_col_num=col_num;
- advance();
- if(curr_char == first_char) { /* '' becomes ' in string */
- inside_string = TRUE; /* not a closing quote */
- #ifdef LEX_STORE_STRINGS
- if(i < MAXSTR)
- tmpstr[i++] = curr_char;
- #endif
- ++len;
- last_col_num=col_num;
- advance();
- }
- else {
- break; /* It was a closing quote after all */
- }
- }
- else {
- #ifdef LEX_STORE_STRINGS
- if(i < MAXSTR)
- tmpstr[i++] = curr_char;
- #endif
- ++len;
- last_col_num=col_num;
- advance();
- }
- }
- #ifdef LEX_STORE_STRINGS
- tmpstr[i++] = '\0';
- #ifdef TYPELESS_CONSTANTS
- /* Watch for const like 'nnn'X */
- if(!inside_format) {
- while(iswhitespace(curr_char))
- advance();
- if(isaletter(curr_char)) {
- int c=makeupper(curr_char);
- advance(); /* Gobble the base character */
- get_binary_const(token,c,tmpstr);
- return;
- }
- }
- #endif
- if( (s=(char *)ckalloc(i)) == (char *)NULL ) {
- oops_message(OOPS_NONFATAL,line_num,col_num,
- "Out of string space for character constant");
- }
- else {
- memset (s, 0, i);
- (void) strcpy(s,tmpstr);
- }
- #endif
- if(len == 0) {
- warning(line_num,col_num,
- "Zero-length string not allowed\n");
- len = 1;
- }
- inside_string = FALSE;
- token->class = tok_string;
- token->value.string = s;
- token->size = len;
- /* Under -port warn if char size > 255 */
- if(port_check) {
- if(len > 255)
- nonportable(line_num,col_num,
- "character constant length exceeds 255");
- }
- #ifdef DEBUG_FORLEX
- if(debug_lexer)
- fprintf(list_fd,"\nString:\t\t\t%s",s);
- #endif
- } /* get_string */
- /* End of Forlex module */
- /*
- II. Advance
- */
- /* advance.c:
- Low-level input routines for Fortran program checker.
- Shared functions defined:
- init_scan() Initializes an input stream.
- finish_scan() Finishes processing an input stream.
- advance() Reads next char, removing comments and
- handling continuation lines.
- looking_at_x Handles lookahead up to end of line:
- looking_at_cplx() Identifies complex constant.
- looking_at_keywd() Identifies assgnmt stmts vs keywords.
- looking_at_relop() Distinguishes .EQ. from .Eexp .
- flush_line_out(n) Prints lines up to line n if not already
- printed, so error messages come out looking OK.
- */
- /* Define tab stops: nxttab[col_num] is column of next tab stop */
- #define do8(X) X,X,X,X,X,X,X,X
- PRIVATE int nxttab[]={ 0, do8(9), do8(17), do8(25), do8(33),
- do8(41), do8(49), do8(57), do8(65), do8(73), do8(81)};
- PRIVATE int
- prev_comment_line, /* True if previous line was comment */
- curr_comment_line, /* True if current line is comment */
- noncomment_line_count, /* Number of noncomment lines read so far */
- line_is_printed, /* True if line has been flushed (printed) */
- prev_line_is_printed, /* True if line has been flushed (printed) */
- sticky_EOF; /* Signal to delay EOF a bit for sake
- of error messages in include files. */
- PRIVATE unsigned
- prev_line_num; /* line number of previous input line */
- unsigned prev_stmt_line_num; /* line number of previous noncomment */
- PRIVATE char
- lineA[MAXLINE+1],lineB[MAXLINE+1], /* Buffers holding input lines */
- *prev_line,*line; /* Pointers to input buffers */
- PRIVATE char
- *getstrn();
- #ifdef UNIX_CPP
- PRIVATE int
- take_cpp_line(); /* Reads #line directives and ignores others */
- #endif
- /* Lookahead routines that scan the input
- line for various things. The is_whatever routines take a
- string as argument and return TRUE if it satisfies the
- criterion. The skip_whatever routines take an index and
- string as argument and return the index of the next
- nonspace character in the string after the expected thing,
- which must be there in a syntactically correct program.
- The given index points at the character after a known
- lead-in (except for see_a_number, which can be given the
- index of 1st char of number). The see_whatever routines
- are similar but return -1 if the expected thing is not
- seen, which it need not be. */
- PRIVATE int
- is_comment(), is_continuation();
- #if 0
- PRIVATE int, is_overlength();
- #endif
- PRIVATE int
- see_a_number(), see_dowhile(), see_expression(), see_keyword();
- PRIVATE int
- skip_balanced_parens(), skip_idletters(), skip_quoted_string(),
- skip_hollerith();
- #ifdef ALLOW_INCLUDE
- /* Definition of structure for saving the input stream parameters while
- processing an include file.
- */
- typedef struct {
- FILE *yyin;
- char *fname;
- char line[MAXLINE]; /* MAXLINE is defined in ftnchek.h */
- int curr_char;
- int curr_index;
- int next_char;
- int