/* L E X I C A L A N A L Y S E R F O R I S O - P A S C A L */ #include #include #include #include "parameters.h" #include "debug.h" #include #include #include #include "LLlex.h" #include "Lpars.h" #include "class.h" #include "const.h" #include "f_info.h" #include "idf.h" #include "input.h" #include "main.h" #include "type.h" extern long str2long(); extern char *Malloc(); #define TO_LOWER(ch) (ch |= ( ch>='A' && ch<='Z' ) ? 0x0020 : 0) #ifdef DEBUG extern int cntlines; #endif int idfsize = IDFSIZE; struct token dot, aside; struct type *toktype, *asidetype; static int eofseen; int tokenseen = 0; /* Some comment-options must precede any program text */ /* Warning: The options specified inside comments take precedence over * the ones on the command line. */ CommentOptions() { register int ch, ci; int on_on_minus = 0; /* Parse options inside comments */ do { LoadChar(ch); ci = ch; switch ( ci ) { case 'c': /* for strings */ case 'd': /* for longs */ case 's': /* check for standard */ case 'u': /* for underscores */ case 'C': /* for different cases */ case 'U': /* for underscores */ if( tokenseen ) { lexwarning("the '%c' option must precede any program text", ci); break; } LoadChar(ch); if( ci == 's' && options[ci] && ch == '-') lexwarning("option '%c-' overrides previous one", ci); if( ch == '-' ) options[ci] = 0; else if( ch == '+' ) options[ci] = 1; else PushBack(); break; case 'l': ci = 'L' ; /* for indexing */ /* fall through */ case 'L': /* FIL & LIN instructions */ case 'R': /* range checks */ case 'a': /* assertions */ on_on_minus = 1; /* fall through */ case 't': /* tracing */ case 'A': /* extra array range-checks */ LoadChar(ch); if( ch == '-' ) options[ci] = on_on_minus; else if( ch == '+' ) options[ci] = !on_on_minus; else PushBack(); on_on_minus = 0; break; case 'i': { register int i=0; LoadChar(ch); while( ch >= '0' && ch <= '9' ) { i = 10 * i + (ch - '0'); LoadChar(ch); } PushBack(); if( tokenseen ) { lexwarning("the '%c' option must precede any program text", ci); break; } if( i <= 0 ) { lexwarning("bad '%c' option", ci); break; } max_intset = i; break; } default: break; } LoadChar(ch); } while (ch == ',' ); PushBack(); } STATIC void SkipComment() { /* Skip ISO-Pascal comments (* ... *) or { ... }. Note : comments may not be nested (ISO 6.1.8). (* and { are interchangeable, so are *) and }. */ register int ch; LoadChar(ch); if (ch == '$') CommentOptions(); for (;;) { if( class(ch) == STNL ) { LineNumber++; #ifdef DEBUG cntlines++; #endif } else if( ch == '*' ) { LoadChar(ch); if( ch == ')' ) return; /* *) */ else continue; } else if( ch == '}' ) return; else if( ch == EOI ) { lexerror("unterminated comment"); break; } LoadChar(ch); } } STATIC struct string * GetString( delim ) register int delim; { /* Read a Pascal string, delimited by the character ' or ". */ register int ch; register struct string *str = (struct string *) Malloc((unsigned) sizeof(struct string)); register char *p; register int len = ISTRSIZE; str->s_str = p = Malloc((unsigned int) ISTRSIZE); for( ; ; ) { LoadChar(ch); if( class(ch) == STNL ) { lexerror("newline in string"); LineNumber++; #ifdef DEBUG cntlines++; #endif break; } if( ch == EOI ) { lexerror("end-of-file in string"); break; } if( ch == delim ) { LoadChar(ch); if( ch != delim ) break; } *p++ = ch; if( p - str->s_str == len ) { extern char *Srealloc(); str->s_str = Srealloc(str->s_str, (unsigned int) len + RSTRSIZE); p = str->s_str + len; len += RSTRSIZE; } } if( ch == EOI ) eofseen = 1; else PushBack(); str->s_length = p - str->s_str; *p++ = '\0'; /* ISO 6.1.7: string length at least 1 */ if( str->s_length == 0 ) { lexerror("character-string: at least one character expected"); str->s_length = 1; } return str; } static char *s_error = "illegal line directive"; void CheckForLineDirective() { register int ch; register int i = 0; char buf[IDFSIZE + 2]; register char *c = buf; LoadChar(ch); if( ch != '#' ) { PushBack(); return; } do { /* * Skip to next digit. Do not skip newlines. */ LoadChar(ch); if( class(ch) == STNL ) { LineNumber++; lexerror(s_error); return; } else if( ch == EOI ) { eofseen = 1; break; } } while( class(ch) != STNUM ); while( class(ch) == STNUM ) { i = i * 10 + (ch - '0'); LoadChar(ch); } if( ch == EOI ) { eofseen = 1; } while( ch != '"' && ch != EOI && class(ch) != STNL) LoadChar(ch); if( ch == '"' ) { do { LoadChar(ch); *c++ = ch; if( class(ch) == STNL ) { LineNumber++; error(s_error); return; } } while( ch != '"' ); *--c = '\0'; do { LoadChar(ch); } while( class(ch) != STNL ); /* * Remember the filename */ if( !eofseen && strcmp(FileName, buf) ) { FileName = Salloc(buf,(unsigned) strlen(buf) + 1); } } if( eofseen ) { error(s_error); return; } LineNumber = i; } int LLlex() { /* LLlex() is the Lexical Analyzer. The putting aside of tokens is taken into account. */ register struct token *tk = ˙ register int ch, nch; toktype = error_type; if( ASIDE ) { /* a token is put aside */ *tk = aside; toktype = asidetype; ASIDE = 0; return tk->tk_symb; } tk->tk_lineno = LineNumber; again1: if( eofseen ) { eofseen = 0; ch = EOI; } else { again: LoadChar(ch); if( !options['C'] ) /* -C : cases are different */ TO_LOWER(ch); } switch( class(ch) ) { case STNL: LineNumber++; tk->tk_lineno++; #ifdef DEBUG cntlines++; #endif CheckForLineDirective(); goto again1; case STSKIP: goto again; case STGARB: if( !tokenseen && (ch == '"' || ch == '_') ) { return tk->tk_symb = ch; } if( (unsigned) ch < 0177 ) lexerror("garbage char %c", ch); else crash("(LLlex) garbage char \\%03o", ch); goto again; case STSIMP: if( ch == '(' ) { LoadChar(nch); if( nch == '*' ) { /* (* */ SkipComment(); tk->tk_lineno = LineNumber; goto again1; } if( nch == '.' ) /* (. is [ */ return tk->tk_symb = '['; if( nch == EOI ) eofseen = 1; else PushBack(); } else if( ch == '{' ) { SkipComment(); tk->tk_lineno = LineNumber; goto again1; } else if( ch == '@' ) ch = '^'; /* @ is ^ */ return tk->tk_symb = ch; case STCOMP: LoadChar(nch); switch( ch ) { case '.': if( nch == '.' ) /* .. */ return tk->tk_symb = UPTO; if( nch == ')' ) /* .) is ] */ return tk->tk_symb = ']'; break; case ':': if( nch == '=' ) /* := */ return tk->tk_symb = BECOMES; break; case '<': if( nch == '=' ) /* <= */ return tk->tk_symb = LESSEQUAL; if( nch == '>' ) /* <> */ return tk->tk_symb = NOTEQUAL; break; case '>': if( nch == '=' ) /* >= */ return tk->tk_symb = GREATEREQUAL; break; default : crash("(LLlex, STCOMP)"); /*NOTREACHED*/ } if( nch == EOI ) eofseen = 1; else PushBack(); return tk->tk_symb = ch; case STIDF: { char buf[IDFSIZE + 1]; register char *tag = &buf[0]; register struct idf *id; extern struct idf *str2idf(); do { if( !options['C'] ) /* -C : cases are different */ TO_LOWER(ch); if( tag - buf < idfsize ) *tag++ = ch; LoadChar(ch); } while( in_idf(ch) ); *tag = '\0'; if( ch == EOI ) eofseen = 1; else PushBack(); /* dtrg: removed to allow Pascal programs to access system routines * (necessary to make them do anything useful). What's this for, * anyway? */ #if 0 if( buf[0] == '_' ) lexerror("underscore starts identifier"); #endif tk->TOK_IDF = id = str2idf(buf, 1); return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT; } case STSTR: { register struct string *str = GetString(ch); if( str->s_length == 1 && ch == '\'') { #ifdef DEBUG if( options['l'] ) { /* to prevent LexScan from crashing */ tk->tk_data.tk_str = str; return tk->tk_symb = STRING; } #endif tk->TOK_INT = *(str->s_str) & 0377; toktype = char_type; free(str->s_str); free((char *) str); } else { if( ch == '\'' ) { tk->tk_data.tk_str = str; toktype = standard_type(T_STRINGCONST, 1, str->s_length); } else { tk->tk_data.tk_str = str; toktype = string_type; } } return tk->tk_symb = STRING; } case STNUM: { #define INT_MODE 0 #define REAL_MODE 1 char buf[NUMSIZE+2]; register char *np = &buf[1]; register int state = INT_MODE; extern char *Salloc(); buf[0] = '-'; do { if( np <= &buf[NUMSIZE] ) *np++ = ch; LoadChar(ch); } while( is_dig(ch) ); if( ch == '.' ) { LoadChar(ch); if( is_dig(ch) ) { if( np <= &buf[NUMSIZE] ) *np++ = '.'; do { /* fractional part */ if( np <= &buf[NUMSIZE] ) *np++ = ch; LoadChar(ch); } while( is_dig(ch) ); state = REAL_MODE; } else { PushBack(); PushBack(); goto end; } } if( ch == 'e' || ch == 'E' ) { char *tp = np; /* save position in string */ /* scale factor */ if( np <= &buf[NUMSIZE] ) *np++ = ch; LoadChar(ch); if( ch == '+' || ch == '-' ) { /* signed scale factor */ if( np <= &buf[NUMSIZE] ) *np++ = ch; LoadChar(ch); } if( is_dig(ch) ) { do { if( np <= &buf[NUMSIZE] ) *np++ = ch; LoadChar(ch); } while( is_dig(ch) ); state = REAL_MODE; } else { PushBack(); PushBack(); if( np - tp == 2 ) /* sign */ PushBack(); np = tp; /* restore position */ goto end; } } /* syntax of number is correct */ if( ch == EOI ) eofseen = 1; else PushBack(); end: *np++ = '\0'; if( state == INT_MODE ) { if( np > &buf[NUMSIZE+1] ) { tk->TOK_INT = 1; lexerror("constant too long"); } else { np = &buf[1]; while (*np == '0') /* skip leading zeros */ np++; tk->TOK_INT = str2long(np, 10); if( tk->TOK_INT < 0 || strlen(np) > strlen(maxint_str) || strlen(np) == strlen(maxint_str) && strcmp(np, maxint_str) > 0 ) lexwarning("overflow in constant"); } toktype = int_type; return tk->tk_symb = INTEGER; } /* REAL_MODE */ tk->tk_data.tk_real = (struct real *) Malloc(sizeof(struct real)); /* allocate struct for inverse */ tk->TOK_RIV = (struct real *) Malloc(sizeof(struct real)); tk->TOK_RIV->r_inverse = tk->tk_data.tk_real; tk->TOK_RLA = 0; tk->TOK_RIV->r_lab = 0; if( np > &buf[NUMSIZE+1] ) { tk->TOK_REL = Salloc("0.0", 4); tk->TOK_RIV->r_real = tk->TOK_REL; lexerror("floating constant too long"); } else { tk->TOK_RIV->r_real = Salloc(buf,(unsigned) (np - buf)); tk->TOK_REL = tk->TOK_RIV->r_real + 1; } toktype = real_type; return tk->tk_symb = REAL; /*NOTREACHED*/ } case STEOI: return tk->tk_symb = -1; case STCHAR: default: crash("(LLlex) Impossible character class"); /*NOTREACHED*/ } /*NOTREACHED*/ }