549 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			549 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
 | |
|  * See the copyright notice in the ACK home directory, in the file "Copyright".
 | |
|  */
 | |
| /* $Header$ */
 | |
| /*		    L E X I C A L   A N A L Y Z E R			*/
 | |
| 
 | |
| #include	"idfsize.h"
 | |
| #include	"numsize.h"
 | |
| #include	"strsize.h"
 | |
| 
 | |
| #include	<alloc.h>
 | |
| #include	"input.h"
 | |
| #include	"arith.h"
 | |
| #include	"macro.h"
 | |
| #include	"idf.h"
 | |
| #include	"LLlex.h"
 | |
| #include	"Lpars.h"
 | |
| #include	"class.h"
 | |
| #include	"bits.h"
 | |
| 
 | |
| #define	BUFSIZ	1024
 | |
| 
 | |
| struct token dot;
 | |
| 
 | |
| int ReplaceMacros = 1;		/* replacing macros			*/
 | |
| int AccDefined = 0;		/* accept "defined(...)"		*/
 | |
| int UnknownIdIsZero = 0;	/* interpret unknown id as integer 0	*/
 | |
| int Unstacked = 0;		/* an unstack is done			*/
 | |
| int AccFileSpecifier = 0;	/* return filespecifier <...>		*/
 | |
| int LexSave = 0;                /* last character read by GetChar       */
 | |
| extern int InputLevel;		/* # of current macro expansions	*/
 | |
| 
 | |
| extern char	*string_token();
 | |
| extern char	*strcpy();
 | |
| extern arith	char_constant();
 | |
| #define		FLG_ESEEN	0x01	/* possibly a floating point number */
 | |
| #define		FLG_DOTSEEN	0x02	/* certainly a floating point number */
 | |
| 
 | |
| int
 | |
| LLlex()
 | |
| {
 | |
| 	return (DOT != EOF) ? GetToken(&dot) : EOF;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| GetToken(ptok)
 | |
| 	register struct token *ptok;
 | |
| {
 | |
| 	/*	GetToken() is the actual token recognizer. It calls the
 | |
| 		control line interpreter if it encounters a "\n{w}*#"
 | |
| 		combination. Macro replacement is also performed if it is
 | |
| 		needed.
 | |
| 	*/
 | |
| 	char buf[BUFSIZ];
 | |
| 	register int ch, nch;
 | |
| 
 | |
| again:	/* rescan the input after an error or replacement	*/
 | |
| 	ch = GetChar();
 | |
| 	/* rescan, the following character has been read	*/
 | |
| 	if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
 | |
| 		fatal("non-ascii '\\%03o' read", ch & 0377);
 | |
| 	/* keep track of the place of the token in the file	*/
 | |
| 
 | |
| 	switch (class(ch)) {	/* detect character class	*/
 | |
| 	case STNL:		/* newline, vertical space or formfeed	*/
 | |
| 		LineNumber++;
 | |
| 		return ptok->tk_symb = EOF;
 | |
| 	case STSKIP:		/* just skip the skip characters	*/
 | |
| 		goto again;
 | |
| 	case STGARB:		/* garbage character			*/
 | |
| garbage:
 | |
| 		if (040 < ch && ch < 0177)
 | |
| 			error("garbage char %c", ch);
 | |
| 		else
 | |
| 			error("garbage char \\%03o", ch);
 | |
| 		goto again;
 | |
| 	case STSIMP:	/* a simple character, no part of compound token*/
 | |
| 		return ptok->tk_symb = ch;
 | |
| 	case STCOMP:	/* maybe the start of a compound token		*/
 | |
| 		nch = GetChar();		/* character lookahead	*/
 | |
| 		switch (ch) {
 | |
| 		case '!':
 | |
| 			if (nch == '=')
 | |
| 				return ptok->tk_symb = NOTEQUAL;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '&':
 | |
| 			if (nch == '&')
 | |
| 				return ptok->tk_symb = AND;
 | |
| 			else if (nch == '=')
 | |
| 				return ptok->tk_symb = ANDAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '+':
 | |
| 			if (nch == '+')
 | |
| 				return ptok->tk_symb = PLUSPLUS;
 | |
| 			else if (nch == '=')
 | |
| 				return ptok->tk_symb = PLUSAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '-':
 | |
| 			if (nch == '-')
 | |
| 				return ptok->tk_symb = MINMIN;
 | |
| 			else if (nch == '>')
 | |
| 				return ptok->tk_symb = ARROW;
 | |
| 			else if (nch == '=')
 | |
| 				return ptok->tk_symb = MINAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '<':
 | |
| 			if (AccFileSpecifier) {
 | |
| 				UnGetChar();	/* pushback nch */
 | |
| 				ptok->tk_str =
 | |
| 					string_token("file specifier", '>');
 | |
| 				return ptok->tk_symb = FILESPECIFIER;
 | |
| 			} else if (nch == '<') {
 | |
| 				if ((nch = GetChar()) == '=')
 | |
| 					return ptok->tk_symb = LEFTAB;
 | |
| 				UnGetChar();
 | |
| 				return ptok->tk_symb = LEFT;
 | |
| 			} else if (nch == '=')
 | |
| 				return ptok->tk_symb = LESSEQ;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '=':
 | |
| 			if (nch == '=')
 | |
| 				return ptok->tk_symb = EQUAL;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '>':
 | |
| 			if (nch == '=')
 | |
| 				return ptok->tk_symb = GREATEREQ;
 | |
| 			else if (nch == '>') {
 | |
| 				if ((nch = GetChar()) == '=')
 | |
| 					return ptok->tk_symb = RIGHTAB;
 | |
| 				UnGetChar();
 | |
| 				return ptok->tk_symb = RIGHT;
 | |
| 			}
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '|':
 | |
| 			if (nch == '|')
 | |
| 				return ptok->tk_symb = OR;
 | |
| 			else if (nch == '=')
 | |
| 				return ptok->tk_symb = ORAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '%':
 | |
| 			if (nch == '=')
 | |
| 				return ptok->tk_symb = MODAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '*':
 | |
| 			if (nch == '=')
 | |
| 				return ptok->tk_symb = TIMESAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '^':
 | |
| 			if (nch == '=')
 | |
| 				return ptok->tk_symb = XORAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		case '/':
 | |
| 			if (nch == '*' && !InputLevel) {
 | |
| 				skipcomment();
 | |
| 				goto again;
 | |
| 			}
 | |
| 			else if (nch == '=')
 | |
| 				return ptok->tk_symb = DIVAB;
 | |
| 			UnGetChar();
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		default:
 | |
| 			crash("bad class for char 0%o", ch);
 | |
| 			/* NOTREACHED */
 | |
| 		}
 | |
| 	case STCHAR:				/* character constant	*/
 | |
| 		ptok->tk_val = char_constant("character");
 | |
| 		return ptok->tk_symb = INTEGER;
 | |
| 	case STSTR:					/* string	*/
 | |
| 		ptok->tk_str = string_token("string", '"');
 | |
| 		return ptok->tk_symb = STRING;
 | |
| 	case STELL:		/* wide character constant/string prefix */
 | |
| 		nch = GetChar();
 | |
| 		if (nch == '"') {
 | |
| 			ptok->tk_str =
 | |
| 				string_token("wide character string", '"');
 | |
| 			return ptok->tk_symb = STRING;
 | |
| 		} else if (nch == '\'') {
 | |
| 			ptok->tk_val = char_constant("wide character");
 | |
| 			return ptok->tk_symb = INTEGER;
 | |
| 		}
 | |
| 		UnGetChar();
 | |
| 		/* fallthrough */
 | |
| 	case STIDF:
 | |
| 	{
 | |
| 		extern int idfsize;		/* ??? */
 | |
| 		register char *tg = &buf[0];
 | |
| 		register char *maxpos = &buf[idfsize];
 | |
| 		int NoExpandNext = 0;
 | |
| 
 | |
| #define tstmac(bx)	if (!(bits[ch] & bx)) goto nomac
 | |
| #define cpy		*tg++ = ch
 | |
| #define load		(ch = GetChar()); if (!in_idf(ch)) goto endidf
 | |
| 
 | |
| 		if (Unstacked) EnableMacros();  /* unstack macro's when allowed. */
 | |
| 		if (ch == NOEXPM)  {
 | |
| 			NoExpandNext = 1;
 | |
| 			ch = GetChar();
 | |
| 		}
 | |
| #ifdef DOBITS
 | |
| 		cpy; tstmac(bit0); load;
 | |
| 		cpy; tstmac(bit1); load;
 | |
| 		cpy; tstmac(bit2); load;
 | |
| 		cpy; tstmac(bit3); load;
 | |
| 		cpy; tstmac(bit4); load;
 | |
| 		cpy; tstmac(bit5); load;
 | |
| 		cpy; tstmac(bit6); load;
 | |
| 		cpy; tstmac(bit7); load;
 | |
| #endif
 | |
| 		for(;;) {
 | |
| 			if (tg < maxpos) {
 | |
| 				cpy;
 | |
| 
 | |
| 			}
 | |
| 			load;
 | |
| 		}
 | |
| 	endidf:
 | |
| 		/*if (ch != EOI) UnGetChar();*/
 | |
| 		UnGetChar();
 | |
| 		*tg++ = '\0';	/* mark the end of the identifier	*/
 | |
| 		if (ReplaceMacros) {
 | |
| 			register struct idf *idef = findidf(buf);
 | |
| 
 | |
| 			if (idef && idef->id_macro && !NoExpandNext) {
 | |
| 				if (replace(idef))
 | |
| 					goto again;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	nomac:			/* buf can already be null-terminated. soit */
 | |
| 		ch = GetChar();
 | |
| 		while (in_idf(ch)) {
 | |
| 			if (tg < maxpos) *tg++ = ch;
 | |
| 			ch = GetChar();
 | |
| 		}
 | |
| 		UnGetChar();
 | |
| 		*tg++ = '\0';   /* mark the end of the identifier       */
 | |
| 
 | |
| 		NoExpandNext = 0;
 | |
| 		if (UnknownIdIsZero) {
 | |
| 			ptok->tk_val = (arith)0;
 | |
| 			return ptok->tk_symb = INTEGER;
 | |
| 		}
 | |
| 		ptok->tk_str = Malloc((unsigned)(tg - buf));
 | |
| 		strcpy(ptok->tk_str, buf);
 | |
| 		return IDENTIFIER;
 | |
| 	}
 | |
| 	case STNUM:				/* a numeric constant	*/
 | |
| 	{			/* it may only be an integer constant */
 | |
| 		register int base = 10, val = 0, vch;
 | |
| 
 | |
| 		/* Since the preprocessor only knows integers and has
 | |
| 		 * nothing to do with ellipsis we just return when the
 | |
| 		 * pp-number starts with a '.'
 | |
| 		 */
 | |
| 		if (ch == '.') {
 | |
| 			return ptok->tk_symb = ch;
 | |
| 		}
 | |
| 		if (ch == '0') {
 | |
| 			ch = GetChar();
 | |
| 			if (ch == 'x' || ch == 'X') {
 | |
| 				base = 16;
 | |
| 				ch = GetChar();
 | |
| 			} else {
 | |
| 				base = 8;
 | |
| 			}
 | |
| 
 | |
| 		}
 | |
| 		while ((vch = val_in_base(ch, base)) >= 0) {
 | |
| 			val = val * base + vch;		/* overflow? nah */
 | |
| 			ch = GetChar();
 | |
| 		}
 | |
| 		while (ch == 'l' || ch == 'L' || ch == 'u' || ch == 'U')
 | |
| 			ch = GetChar();
 | |
| 		UnGetChar();
 | |
| 		ptok->tk_val = val;
 | |
| 		return ptok->tk_symb = INTEGER;
 | |
| 	}
 | |
| 	case STEOI:			/* end of text on source file	*/
 | |
| 		return ptok->tk_symb = EOF;
 | |
|         case STMSPEC:
 | |
| 		if (!InputLevel) goto garbage;
 | |
| 		if (ch == TOKSEP) goto again;
 | |
| 		/* fallthrough shouldn't happen */
 | |
| 	default:				/* this cannot happen	*/
 | |
| 		crash("bad class for char 0%o", ch);
 | |
| 	}
 | |
| 	/*NOTREACHED*/
 | |
| }
 | |
| 
 | |
| skipcomment()
 | |
| {
 | |
| 	/*	The last character read has been the '*' of '/_*'.  The
 | |
| 		characters, except NL and EOI, between '/_*' and the first
 | |
| 		occurring '*_/' are not interpreted.
 | |
| 		NL only affects the LineNumber.  EOI is not legal.
 | |
| 
 | |
| 		Important note: it is not possible to stop skipping comment
 | |
| 		beyond the end-of-file of an included file.
 | |
| 		EOI is returned by LoadChar only on encountering EOF of the
 | |
| 		top-level file...
 | |
| 	*/
 | |
| 	register int c;
 | |
| 
 | |
| 	NoUnstack++;
 | |
| 	c = GetChar();
 | |
| 	do {
 | |
| 		while (c != '*') {
 | |
| 			if (class(c) == STNL) {
 | |
| 				++LineNumber;
 | |
| 			} else if (c == EOI) {
 | |
| 				NoUnstack--;
 | |
| 				return;
 | |
| 			}
 | |
| 			c = GetChar();
 | |
| 		} /* last Character seen was '*' */
 | |
| 		c = GetChar();
 | |
| 	} while (c != '/');
 | |
| 	NoUnstack--;
 | |
| }
 | |
| 
 | |
| arith
 | |
| char_constant(nm)
 | |
| 	char *nm;
 | |
| {
 | |
| 	register arith val = 0;
 | |
| 	register int ch;
 | |
| 	int size = 0;
 | |
| 
 | |
| 	ch = GetChar();
 | |
| 	if (ch == '\'')
 | |
| 		error("%s constant too short", nm);
 | |
| 	else
 | |
| 	while (ch != '\'') {
 | |
| 		if (ch == '\n') {
 | |
| 			error("newline in %s constant", nm);
 | |
| 			LineNumber++;
 | |
| 			break;
 | |
| 		}
 | |
| 		if (ch == '\\')
 | |
| 			ch = quoted(GetChar());
 | |
| 		if (ch >= 128) ch -= 256;
 | |
| 		if (size < sizeof(arith))
 | |
| 			val |= ch << (8 * size);
 | |
| 		size++;
 | |
| 		ch = GetChar();
 | |
| 	}
 | |
| 	if (size > sizeof(arith))
 | |
| 		error("%s constant too long", nm);
 | |
| 	else if (size > 1)
 | |
| 		strict("%s constant includes more than one character", nm);
 | |
| 	return val;
 | |
| }
 | |
| 
 | |
| char *
 | |
| string_token(nm, stop_char)
 | |
| 	char *nm;
 | |
| {
 | |
| 	register int ch;
 | |
| 	register int str_size;
 | |
| 	register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
 | |
| 	register int pos = 0;
 | |
| 	
 | |
| 	ch = GetChar();
 | |
| 	while (ch != stop_char) {
 | |
| 		if (ch == '\n') {
 | |
| 			error("newline in %s", nm);
 | |
| 			LineNumber++;
 | |
| 			break;
 | |
| 		}
 | |
| 		if (ch == EOI) {
 | |
| 			error("end-of-file inside %s", nm);
 | |
| 			break;
 | |
| 		}
 | |
| 		if (ch == '\\' && !AccFileSpecifier)
 | |
| 			ch = quoted(GetChar());
 | |
| 		str[pos++] = ch;
 | |
| 		if (pos == str_size)
 | |
| 			str = Realloc(str, (unsigned)(str_size <<= 1));
 | |
| 		ch = GetChar();
 | |
| 	}
 | |
| 	str[pos++] = '\0'; /* for filenames etc. */
 | |
| 	str = Realloc(str, (unsigned)pos);
 | |
| 	return str;
 | |
| }
 | |
| 
 | |
| int
 | |
| quoted(ch)
 | |
| 	register int ch;
 | |
| {	
 | |
| 	/*	quoted() replaces an escaped character sequence by the
 | |
| 		character meant.
 | |
| 	*/
 | |
| 	/* first char after backslash already in ch */
 | |
| 	if (!is_oct(ch)) {		/* a quoted char */
 | |
| 		switch (ch) {
 | |
| 		case 'n':
 | |
| 			ch = '\n';
 | |
| 			break;
 | |
| 		case 't':
 | |
| 			ch = '\t';
 | |
| 			break;
 | |
| 		case 'b':
 | |
| 			ch = '\b';
 | |
| 			break;
 | |
| 		case 'r':
 | |
| 			ch = '\r';
 | |
| 			break;
 | |
| 		case 'f':
 | |
| 			ch = '\f';
 | |
| 			break;
 | |
| 		case 'a':		/* alert */
 | |
| 			ch = '\007';
 | |
| 			break;
 | |
| 		case 'v':		/* vertical tab */
 | |
| 			ch = '\013';
 | |
| 			break;
 | |
| 		case 'x':		/* quoted hex */
 | |
| 		{
 | |
| 			register int hex = 0;
 | |
| 			register int vch;
 | |
| 
 | |
| 			for (;;) {
 | |
| 				ch = GetChar();
 | |
| 				if (vch = val_in_base(ch, 16), vch == -1)
 | |
| 					break;
 | |
| 				hex = hex * 16 + vch;
 | |
| 			}
 | |
| 			UnGetChar();
 | |
| 			ch = hex;
 | |
| 		}
 | |
| 		}
 | |
| 	} else {				/* a quoted octal */
 | |
| 		register int oct = 0, cnt = 0;
 | |
| 
 | |
| 		do {
 | |
| 			oct = oct*8 + (ch-'0');
 | |
| 			ch = GetChar();
 | |
| 		} while (is_oct(ch) && ++cnt < 3);
 | |
| 		UnGetChar();
 | |
| 		ch = oct;
 | |
| 	}
 | |
| 	return ch&0377;
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| val_in_base(ch, base)
 | |
| 	register int ch;
 | |
| {
 | |
| 	switch (base) {
 | |
| 	case 8:
 | |
| 		return (is_dig(ch) && ch < '9') ? ch - '0' : -1;
 | |
| 	case 10:
 | |
| 		return is_dig(ch) ? ch - '0' : -1;
 | |
| 	case 16:
 | |
| 		return is_dig(ch) ? ch - '0'
 | |
| 			: is_hex(ch) ? (ch - 'a' + 10) & 017
 | |
| 			: -1;
 | |
| 	default:
 | |
| 		fatal("(val_in_base) illegal base value %d", base);
 | |
| 		/* NOTREACHED */
 | |
| 	}
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| GetChar()
 | |
| {
 | |
| 	/*	The routines GetChar and trigraph parses the trigraph
 | |
| 		sequences and removes occurences of \\\n.
 | |
| 	*/
 | |
| 	register int ch;
 | |
| 
 | |
| again:
 | |
| 	LoadChar(ch);
 | |
| 
 | |
| 	/* possible trigraph sequence */
 | |
| 	if (ch == '?')
 | |
| 		ch = trigraph();
 | |
| 
 | |
| 	/* \\\n are removed from the input stream */
 | |
| 	if (ch == '\\') {
 | |
| 		LoadChar(ch);
 | |
| 		if (ch == '\n') {
 | |
| 			++LineNumber;
 | |
| 			goto again;
 | |
| 		}
 | |
| 		PushBack();
 | |
| 		ch = '\\';
 | |
| 	}
 | |
| 	return(LexSave = ch);
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| trigraph()
 | |
| {
 | |
| 	register int ch;
 | |
| 
 | |
| 	LoadChar(ch);
 | |
| 	if (ch == '?') {
 | |
| 		LoadChar(ch);
 | |
| 		switch (ch) {		/* its a trigraph */
 | |
| 		case '=':
 | |
| 			ch =  '#';
 | |
| 			return(ch);
 | |
| 		case '(':
 | |
| 			ch = '[';
 | |
| 			return(ch);
 | |
| 		case '/':
 | |
| 			ch = '\\';
 | |
| 			return(ch);
 | |
| 		case ')':
 | |
| 			ch = ']';
 | |
| 			return(ch);
 | |
| 		case '\'':
 | |
| 			ch = '^';
 | |
| 			return(ch);
 | |
| 		case '<':
 | |
| 			ch = '{';
 | |
| 			return(ch);
 | |
| 		case '!':
 | |
| 			ch = '|';
 | |
| 			return(ch);
 | |
| 		case '>':
 | |
| 			ch = '}';
 | |
| 			return(ch);
 | |
| 		case '-':
 | |
| 			ch = '~';
 | |
| 			return(ch);
 | |
| 		}
 | |
| 		PushBack();
 | |
| 	}
 | |
| 	PushBack();
 | |
| 	return('?');
 | |
| }
 |