577 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			577 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* L E X I C A L   A N A L Y S E R   F O R   I S O - P A S C A L */
 | 
						|
 | 
						|
#include	"debug.h"
 | 
						|
#include	"idfsize.h"
 | 
						|
#include	"numsize.h"
 | 
						|
#include	"strsize.h"
 | 
						|
 | 
						|
#include	<alloc.h>
 | 
						|
#include	<em_arith.h>
 | 
						|
#include	<em_label.h>
 | 
						|
 | 
						|
#include	"LLlex.h"
 | 
						|
#include	"Lpars.h"
 | 
						|
#include	"class.h"
 | 
						|
#include	"const.h"
 | 
						|
#include	"f_info.h"
 | 
						|
#include	"idf.h"
 | 
						|
#include	"input.h"
 | 
						|
#include	"main.h"
 | 
						|
#include	"type.h"
 | 
						|
 | 
						|
extern long	str2long();
 | 
						|
extern char	*Malloc();
 | 
						|
 | 
						|
#define	TO_LOWER(ch)	(ch |= ( ch>='A' && ch<='Z' ) ? 0x0020 : 0)
 | 
						|
 | 
						|
#ifdef DEBUG
 | 
						|
extern int cntlines;
 | 
						|
#endif
 | 
						|
 | 
						|
int idfsize = IDFSIZE;
 | 
						|
struct token	dot,
 | 
						|
		aside;
 | 
						|
 | 
						|
struct type	*toktype,
 | 
						|
		*asidetype;
 | 
						|
 | 
						|
static int	eofseen;
 | 
						|
 | 
						|
int tokenseen = 0;	/* Some comment-options must precede any program text */
 | 
						|
 | 
						|
/* Warning: The options specified inside comments take precedence over
 | 
						|
 * the ones on the command line.
 | 
						|
 */
 | 
						|
CommentOptions()
 | 
						|
{
 | 
						|
	register int ch, ci;
 | 
						|
	int	on_on_minus = 0;
 | 
						|
	/* Parse options inside comments */
 | 
						|
 | 
						|
	do {
 | 
						|
		LoadChar(ch);
 | 
						|
		ci = ch;
 | 
						|
		switch ( ci ) {
 | 
						|
		case 'c':		/* for strings */
 | 
						|
		case 'd':		/* for longs */
 | 
						|
		case 's':		/* check for standard */
 | 
						|
		case 'u':		/* for underscores */
 | 
						|
		case 'C':		/* for different cases */
 | 
						|
		case 'U':		/* for underscores */
 | 
						|
			if( tokenseen ) {
 | 
						|
				lexwarning("the '%c' option must precede any program text", ci);
 | 
						|
				break;
 | 
						|
			}
 | 
						|
 | 
						|
			LoadChar(ch);
 | 
						|
			if( ci == 's' && options[ci] && ch == '-')
 | 
						|
				lexwarning("option '%c-' overrides previous one", ci);
 | 
						|
			if( ch == '-' ) options[ci] = 0;
 | 
						|
			else if( ch == '+' ) options[ci] = 1;
 | 
						|
			else PushBack();
 | 
						|
			break;
 | 
						|
 | 
						|
		case 'l':	ci = 'L' ;	/* for indexing */
 | 
						|
			/* fall through */
 | 
						|
		case 'L':			/* FIL & LIN instructions */
 | 
						|
		case 'R':			/* range checks */
 | 
						|
		case 'a':			/* assertions */
 | 
						|
			on_on_minus = 1;
 | 
						|
			/* fall through */
 | 
						|
		case 't':			/* tracing */
 | 
						|
		case 'A':			/* extra array range-checks */
 | 
						|
			LoadChar(ch);
 | 
						|
			if( ch == '-' ) options[ci] = on_on_minus;
 | 
						|
			else if( ch == '+' ) options[ci] = !on_on_minus;
 | 
						|
			else PushBack();
 | 
						|
			on_on_minus = 0;
 | 
						|
			break;
 | 
						|
 | 
						|
		case 'i':
 | 
						|
		{
 | 
						|
			register int i=0;
 | 
						|
 | 
						|
			LoadChar(ch);
 | 
						|
			while( ch >= '0' && ch <= '9' ) {
 | 
						|
				i = 10 * i + (ch - '0');
 | 
						|
				LoadChar(ch);
 | 
						|
			}
 | 
						|
			PushBack();
 | 
						|
			if( tokenseen ) {
 | 
						|
				lexwarning("the '%c' option must precede any program text", ci);
 | 
						|
				break;
 | 
						|
			}
 | 
						|
			if( i <= 0 ) {
 | 
						|
				lexwarning("bad '%c' option", ci);
 | 
						|
				break;
 | 
						|
			}
 | 
						|
			max_intset = i;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
 | 
						|
		default:
 | 
						|
			break;
 | 
						|
		}
 | 
						|
		LoadChar(ch);
 | 
						|
	} while (ch == ',' );
 | 
						|
 | 
						|
	PushBack();
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
STATIC
 | 
						|
SkipComment()
 | 
						|
{
 | 
						|
	/*	Skip ISO-Pascal comments (* ... *) or { ... }.
 | 
						|
		Note :
 | 
						|
			comments may not be nested (ISO 6.1.8).
 | 
						|
			(* and { are interchangeable, so are *) and }.
 | 
						|
	*/
 | 
						|
	register int ch;
 | 
						|
 | 
						|
	LoadChar(ch);
 | 
						|
	if (ch == '$') CommentOptions();
 | 
						|
	for (;;)	{
 | 
						|
		if( class(ch) == STNL )	{
 | 
						|
			LineNumber++;
 | 
						|
#ifdef DEBUG
 | 
						|
			cntlines++;
 | 
						|
#endif
 | 
						|
		}
 | 
						|
		else if( ch == '*' )	{
 | 
						|
			LoadChar(ch);
 | 
						|
			if( ch == ')' ) return;		/* *) */
 | 
						|
			else continue;
 | 
						|
		}
 | 
						|
		else if( ch == '}' ) return;
 | 
						|
		else if( ch == EOI )	{
 | 
						|
			lexerror("unterminated comment");
 | 
						|
			break;
 | 
						|
		}
 | 
						|
		LoadChar(ch);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
STATIC struct string *
 | 
						|
GetString( delim )
 | 
						|
register int delim;
 | 
						|
{
 | 
						|
	/*	Read a Pascal string, delimited by the character ' or ".
 | 
						|
	*/
 | 
						|
	register int ch;
 | 
						|
	register struct string *str = (struct string *)
 | 
						|
				Malloc((unsigned) sizeof(struct string));
 | 
						|
	register char *p;
 | 
						|
	register int len = ISTRSIZE;
 | 
						|
	
 | 
						|
	str->s_str = p = Malloc((unsigned int) ISTRSIZE);
 | 
						|
	for( ; ; )	{
 | 
						|
		LoadChar(ch);
 | 
						|
		if( ch & 0200 ) {
 | 
						|
			fatal("non-ascii '\\%03o' read", ch & 0377);
 | 
						|
			/*NOTREACHED*/
 | 
						|
		}
 | 
						|
		if( class(ch) == STNL )	{
 | 
						|
			lexerror("newline in string");
 | 
						|
			LineNumber++;
 | 
						|
#ifdef DEBUG
 | 
						|
			cntlines++;
 | 
						|
#endif
 | 
						|
			break;
 | 
						|
		}
 | 
						|
		if( ch == EOI )	{
 | 
						|
			lexerror("end-of-file in string");
 | 
						|
			break;
 | 
						|
		}
 | 
						|
		if( ch == delim )	{
 | 
						|
			LoadChar(ch);
 | 
						|
			if( ch != delim )
 | 
						|
				break;
 | 
						|
		}
 | 
						|
		*p++ = ch;
 | 
						|
		if( p - str->s_str == len )	{
 | 
						|
			extern char *Srealloc();
 | 
						|
 | 
						|
			str->s_str = Srealloc(str->s_str,
 | 
						|
					(unsigned int) len + RSTRSIZE);
 | 
						|
			p = str->s_str + len;
 | 
						|
			len += RSTRSIZE;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if( ch == EOI ) eofseen = 1;
 | 
						|
	else PushBack();
 | 
						|
 | 
						|
	str->s_length = p - str->s_str;
 | 
						|
	*p++ = '\0';
 | 
						|
 | 
						|
	/* ISO 6.1.7: string length at least 1 */
 | 
						|
	if( str->s_length == 0 )	{
 | 
						|
		lexerror("character-string: at least one character expected");
 | 
						|
		str->s_length = 1;
 | 
						|
	}
 | 
						|
 | 
						|
	return str;
 | 
						|
}
 | 
						|
 | 
						|
static char *s_error = "illegal line directive";
 | 
						|
 | 
						|
CheckForLineDirective()
 | 
						|
{
 | 
						|
	register int	ch;
 | 
						|
	register int	i = 0;
 | 
						|
	char		buf[IDFSIZE + 2];
 | 
						|
	register char	*c = buf;
 | 
						|
 | 
						|
	LoadChar(ch);
 | 
						|
 | 
						|
	if( ch != '#' ) {
 | 
						|
		PushBack();
 | 
						|
		return;
 | 
						|
	}
 | 
						|
	do {	/*
 | 
						|
		 * Skip to next digit. Do not skip newlines.
 | 
						|
		 */
 | 
						|
		LoadChar(ch);
 | 
						|
		if( class(ch) == STNL ) {
 | 
						|
			LineNumber++;
 | 
						|
			lexerror(s_error);
 | 
						|
			return;
 | 
						|
		}
 | 
						|
		else if( ch == EOI ) {
 | 
						|
			eofseen = 1;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
	} while( class(ch) != STNUM );
 | 
						|
	while( class(ch) == STNUM )	{
 | 
						|
		i = i * 10 + (ch - '0');
 | 
						|
		LoadChar(ch);
 | 
						|
	}
 | 
						|
	if( ch == EOI ) {
 | 
						|
		eofseen = 1;
 | 
						|
	}
 | 
						|
	while( ch != '"' && ch != EOI && class(ch) != STNL) LoadChar(ch);
 | 
						|
	if( ch == '"' )	{
 | 
						|
		do {
 | 
						|
			LoadChar(ch);
 | 
						|
			*c++ = ch;
 | 
						|
			if( class(ch) == STNL ) {
 | 
						|
				LineNumber++;
 | 
						|
				error(s_error);
 | 
						|
				return;
 | 
						|
			}
 | 
						|
		} while( ch != '"' );
 | 
						|
		*--c = '\0';
 | 
						|
		do {
 | 
						|
			LoadChar(ch);
 | 
						|
		} while( class(ch) != STNL );
 | 
						|
		/*
 | 
						|
		 * Remember the filename
 | 
						|
		 */
 | 
						|
		 if( !eofseen && strcmp(FileName, buf) ) {
 | 
						|
			FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if( eofseen ) {
 | 
						|
		error(s_error);
 | 
						|
		return;
 | 
						|
	}
 | 
						|
	LineNumber = i;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
LLlex()
 | 
						|
{
 | 
						|
	/*	LLlex() is the Lexical Analyzer.
 | 
						|
		The putting aside of tokens is taken into account.
 | 
						|
	*/
 | 
						|
	register struct token *tk = ˙
 | 
						|
	register int ch, nch;
 | 
						|
 | 
						|
	toktype = error_type;
 | 
						|
 | 
						|
	if( ASIDE )	{	/* a token is put aside */
 | 
						|
		*tk = aside;
 | 
						|
		toktype = asidetype;
 | 
						|
		ASIDE = 0;
 | 
						|
		return tk->tk_symb;
 | 
						|
	}
 | 
						|
 | 
						|
	tk->tk_lineno = LineNumber;
 | 
						|
 | 
						|
again1:
 | 
						|
	if( eofseen )	{
 | 
						|
		eofseen = 0;
 | 
						|
		ch = EOI;
 | 
						|
	}
 | 
						|
	else	{
 | 
						|
again:
 | 
						|
		LoadChar(ch);
 | 
						|
		if( !options['C'] )		/* -C : cases are different */
 | 
						|
			TO_LOWER(ch);
 | 
						|
 | 
						|
		if( (ch & 0200) && ch != EOI ) {
 | 
						|
			fatal("non-ascii '\\%03o' read", ch & 0377);
 | 
						|
			/*NOTREACHED*/
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	switch( class(ch) )	{
 | 
						|
 | 
						|
	case STNL:
 | 
						|
		LineNumber++;
 | 
						|
		tk->tk_lineno++;
 | 
						|
#ifdef DEBUG
 | 
						|
		cntlines++;
 | 
						|
#endif
 | 
						|
		CheckForLineDirective();
 | 
						|
		goto again1;
 | 
						|
 | 
						|
	case STSKIP:
 | 
						|
		goto again;
 | 
						|
 | 
						|
	case STGARB:
 | 
						|
		if( !tokenseen && (ch == '"' || ch == '_') ) {
 | 
						|
			return tk->tk_symb = ch;
 | 
						|
		}
 | 
						|
		if( (unsigned) ch < 0177 )
 | 
						|
			lexerror("garbage char %c", ch);
 | 
						|
		else
 | 
						|
			crash("(LLlex) garbage char \\%03o", ch);
 | 
						|
		goto again;
 | 
						|
 | 
						|
	case STSIMP:
 | 
						|
		if( ch == '(' )	{
 | 
						|
			LoadChar(nch);
 | 
						|
			if( nch == '*' )	{		/* (* */
 | 
						|
				SkipComment();
 | 
						|
				tk->tk_lineno = LineNumber;
 | 
						|
				goto again1;
 | 
						|
			}
 | 
						|
			if( nch == '.' )			/* (. is [ */
 | 
						|
				return tk->tk_symb = '[';
 | 
						|
			if( nch == EOI ) eofseen = 1;
 | 
						|
			else PushBack();
 | 
						|
		}
 | 
						|
		else if( ch == '{' )	{
 | 
						|
			SkipComment();
 | 
						|
			tk->tk_lineno = LineNumber;
 | 
						|
			goto again1;
 | 
						|
		}
 | 
						|
		else if( ch == '@' ) ch = '^';		/* @ is ^ */
 | 
						|
 | 
						|
		return tk->tk_symb = ch;
 | 
						|
 | 
						|
	case STCOMP:
 | 
						|
		LoadChar(nch);
 | 
						|
		switch( ch )	{
 | 
						|
 | 
						|
		case '.':
 | 
						|
			if( nch == '.' )			/* .. */
 | 
						|
				return tk->tk_symb = UPTO;
 | 
						|
			if( nch == ')' )			/* .) is ] */
 | 
						|
				return tk->tk_symb = ']';
 | 
						|
			break;
 | 
						|
 | 
						|
		case ':':
 | 
						|
			if( nch == '=' )			/* := */
 | 
						|
				return tk->tk_symb = BECOMES;
 | 
						|
			break;
 | 
						|
 | 
						|
		case '<':
 | 
						|
			if( nch == '=' )			/* <= */
 | 
						|
				return tk->tk_symb = LESSEQUAL;
 | 
						|
			if( nch == '>' )			/* <> */
 | 
						|
				return tk->tk_symb = NOTEQUAL;
 | 
						|
			break;
 | 
						|
 | 
						|
		case '>':
 | 
						|
			if( nch == '=' )			/* >= */
 | 
						|
				return tk->tk_symb = GREATEREQUAL;
 | 
						|
			break;
 | 
						|
 | 
						|
		default :
 | 
						|
			crash("(LLlex, STCOMP)");
 | 
						|
			/*NOTREACHED*/
 | 
						|
		}
 | 
						|
		if( nch == EOI ) eofseen = 1;
 | 
						|
		else PushBack();
 | 
						|
		return tk->tk_symb = ch;
 | 
						|
 | 
						|
	case STIDF:	{
 | 
						|
		char buf[IDFSIZE + 1];
 | 
						|
		register char *tag = &buf[0];
 | 
						|
		register struct idf *id;
 | 
						|
		extern struct idf *str2idf();
 | 
						|
 | 
						|
		do	{
 | 
						|
			if( !options['C'] )	/* -C : cases are different */
 | 
						|
				TO_LOWER(ch);
 | 
						|
			if( tag - buf < idfsize )
 | 
						|
				*tag++ = ch;
 | 
						|
			LoadChar(ch);
 | 
						|
		} while( in_idf(ch) );
 | 
						|
		*tag = '\0';
 | 
						|
 | 
						|
		if( ch == EOI ) eofseen = 1;
 | 
						|
		else PushBack();
 | 
						|
 | 
						|
		if( buf[0] == '_' ) lexerror("underscore starts identifier");
 | 
						|
		tk->TOK_IDF = id = str2idf(buf, 1);
 | 
						|
		return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
 | 
						|
	}
 | 
						|
 | 
						|
	case STSTR:	{
 | 
						|
		register struct string *str = GetString(ch);
 | 
						|
 | 
						|
		if( str->s_length == 1 && ch == '\'')	{
 | 
						|
#ifdef DEBUG
 | 
						|
			if( options['l'] )	{
 | 
						|
				/* to prevent LexScan from crashing */
 | 
						|
				tk->tk_data.tk_str = str;
 | 
						|
				return tk->tk_symb = STRING;
 | 
						|
			}
 | 
						|
#endif
 | 
						|
			tk->TOK_INT = *(str->s_str) & 0377;
 | 
						|
			toktype = char_type;
 | 
						|
			free(str->s_str);
 | 
						|
			free((char *) str);
 | 
						|
		}
 | 
						|
		else	{
 | 
						|
			if( ch == '\'' )	{
 | 
						|
				tk->tk_data.tk_str = str;
 | 
						|
				toktype = standard_type(T_STRINGCONST, 1, str->s_length);
 | 
						|
			}
 | 
						|
			else	{
 | 
						|
				tk->tk_data.tk_str = str;
 | 
						|
				toktype = string_type;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		return tk->tk_symb = STRING;
 | 
						|
	}
 | 
						|
 | 
						|
	case STNUM:	{
 | 
						|
#define INT_MODE	0
 | 
						|
#define REAL_MODE	1
 | 
						|
 | 
						|
		char buf[NUMSIZE+2];
 | 
						|
		register char *np = &buf[1];
 | 
						|
		register int state = INT_MODE;
 | 
						|
		extern char *Salloc();
 | 
						|
 | 
						|
		buf[0] = '-';
 | 
						|
		do	{
 | 
						|
			if( np <= &buf[NUMSIZE] )
 | 
						|
				*np++ = ch;
 | 
						|
			LoadChar(ch);
 | 
						|
		} while( is_dig(ch) );
 | 
						|
 | 
						|
		if( ch == '.' )	{
 | 
						|
			LoadChar(ch);
 | 
						|
			if( is_dig(ch) )	{
 | 
						|
				if( np <= &buf[NUMSIZE] )
 | 
						|
					*np++ = '.';
 | 
						|
				do	{
 | 
						|
					/* fractional part */
 | 
						|
					if( np <= &buf[NUMSIZE] )
 | 
						|
						*np++ = ch;
 | 
						|
					LoadChar(ch);
 | 
						|
				} while( is_dig(ch) );
 | 
						|
				state = REAL_MODE;
 | 
						|
			}
 | 
						|
			else	{
 | 
						|
				PushBack();
 | 
						|
				PushBack();
 | 
						|
				goto end;
 | 
						|
			}
 | 
						|
				
 | 
						|
		}
 | 
						|
		if( ch == 'e' || ch == 'E' )	{
 | 
						|
			char *tp = np;		/* save position in string */
 | 
						|
 | 
						|
			/* scale factor */
 | 
						|
			if( np <= &buf[NUMSIZE] )
 | 
						|
				*np++ = ch;
 | 
						|
			LoadChar(ch);
 | 
						|
			if( ch == '+' || ch == '-' )	{
 | 
						|
				/* signed scale factor */
 | 
						|
				if( np <= &buf[NUMSIZE] )
 | 
						|
					*np++ = ch;
 | 
						|
				LoadChar(ch);
 | 
						|
			}
 | 
						|
			if( is_dig(ch) )	{
 | 
						|
				do	{
 | 
						|
					if( np <= &buf[NUMSIZE] )
 | 
						|
						*np++ = ch;
 | 
						|
					LoadChar(ch);
 | 
						|
				} while( is_dig(ch) );
 | 
						|
				state = REAL_MODE;
 | 
						|
			}
 | 
						|
			else	{
 | 
						|
				PushBack();
 | 
						|
				PushBack();
 | 
						|
				if( np - tp == 2 )	/* sign */
 | 
						|
					PushBack();
 | 
						|
				np = tp;		/* restore position */
 | 
						|
				goto end;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		/* syntax of number is correct */
 | 
						|
		if( ch == EOI ) eofseen = 1;
 | 
						|
		else PushBack();
 | 
						|
	end:
 | 
						|
		*np++ = '\0';
 | 
						|
 | 
						|
		if( state == INT_MODE )	{
 | 
						|
			if( np > &buf[NUMSIZE+1] )	{
 | 
						|
				tk->TOK_INT = 1;
 | 
						|
				lexerror("constant too long");
 | 
						|
			}
 | 
						|
			else	{
 | 
						|
				np = &buf[1];
 | 
						|
				while (*np == '0')	/* skip leading zeros */
 | 
						|
					np++;
 | 
						|
				tk->TOK_INT = str2long(np, 10);
 | 
						|
				if( tk->TOK_INT < 0 ||
 | 
						|
				    strlen(np) > strlen(maxint_str) ||
 | 
						|
					strlen(np) == strlen(maxint_str) &&
 | 
						|
					strcmp(np, maxint_str) > 0 )
 | 
						|
					     lexwarning("overflow in constant");
 | 
						|
			}
 | 
						|
			toktype = int_type;
 | 
						|
			return tk->tk_symb = INTEGER;
 | 
						|
		}
 | 
						|
		/* REAL_MODE */
 | 
						|
		tk->tk_data.tk_real = (struct real *)
 | 
						|
						Malloc(sizeof(struct real));
 | 
						|
		/* allocate struct for inverse */
 | 
						|
		tk->TOK_RIV = (struct real *) Malloc(sizeof(struct real));
 | 
						|
		tk->TOK_RIV->r_inverse = tk->tk_data.tk_real;
 | 
						|
		tk->TOK_RLA = 0;
 | 
						|
		tk->TOK_RIV->r_lab = 0;
 | 
						|
 | 
						|
		if( np > &buf[NUMSIZE+1] )	{
 | 
						|
			tk->TOK_REL = Salloc("0.0", 4);
 | 
						|
			tk->TOK_RIV->r_real = tk->TOK_REL;
 | 
						|
			lexerror("floating constant too long");
 | 
						|
		}
 | 
						|
		else {
 | 
						|
			tk->TOK_RIV->r_real = Salloc(buf,(unsigned) (np - buf));
 | 
						|
			tk->TOK_REL = tk->TOK_RIV->r_real + 1;
 | 
						|
		}
 | 
						|
 | 
						|
		toktype = real_type;
 | 
						|
		return tk->tk_symb = REAL;
 | 
						|
 | 
						|
		/*NOTREACHED*/
 | 
						|
	}
 | 
						|
 | 
						|
	case STEOI:
 | 
						|
		return tk->tk_symb = -1;
 | 
						|
 | 
						|
	case STCHAR:
 | 
						|
	default:
 | 
						|
		crash("(LLlex) Impossible character class");
 | 
						|
		/*NOTREACHED*/
 | 
						|
	}
 | 
						|
	/*NOTREACHED*/
 | 
						|
}
 |