ack/lang/cem/cemcom.ansi/LLlex.c

/*
 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
 * See the copyright notice in the ACK home directory, in the file "Copyright".
 */
/* $Header$ */
/*		    L E X I C A L   A N A L Y Z E R			*/

#include	"lint.h"
#include	<alloc.h>
#include	"nofloat.h"
#include	"idfsize.h"
#include	"numsize.h"
#include	"debug.h"
#include	"strsize.h"
#include	"nopp.h"
#include	"input.h"
#include	"arith.h"
#include	"def.h"
#include	"macro.h"
#include	"idf.h"
#include	"LLlex.h"
#include	"Lpars.h"
#include	"class.h"
#include	"assert.h"
#include	"sizes.h"

/* Data about the token yielded */
struct token dot, ahead, aside;

#ifndef NOPP
int ReplaceMacros = 1;		/* replacing macros			*/
int AccDefined = 0;		/* accept "defined(...)"		*/
int UnknownIdIsZero = 0;	/* interpret unknown id as integer 0	*/
int Unstacked = 0;		/* an unstack is done 			*/
#endif
int AccFileSpecifier = 0;	/* return filespecifier <...>		*/
int EoiForNewline = 0;		/* return EOI upon encountering newline	*/
int File_Inserted = 0;		/* a file has just been inserted	*/
int LexSave = 0;		/* last character read by GetChar	*/
#define MAX_LL_DEPTH	2

static struct token LexStack[MAX_LL_DEPTH];
static LexSP = 0;

/*	In PushLex() the actions are taken in order to initialise or
	re-initialise the lexical scanner.
	E.g. at the invocation of a sub-parser that uses LLlex(), the
	state of the current parser should be saved.
*/
PushLex()
{
	ASSERT(LexSP < 2);
	ASSERT(ASIDE == 0);	/* ASIDE = 0;	*/
	GetToken(&ahead);
	LexStack[LexSP++] = dot;
}

PopLex()
{
	ASSERT(LexSP > 0);
	dot = LexStack[--LexSP];
}

int
LLlex()
{
	/*	LLlex() plays the role of Lexical Analyzer for the C parser.
		The look-ahead and putting aside of tokens are taken into
		account.
	*/
	if (ASIDE) {	/* a token is put aside		*/
		dot = aside;
		ASIDE = 0;
	}
	else {		/* read ahead and return the old one	*/
#ifdef	LINT
		lint_comment_ahead();
#endif	LINT
		dot = ahead;
		/*	the following test is performed due to the dual
			task of LLlex(): it is also called for parsing the
			restricted constant expression following a #if or
			#elif.  The newline character causes EOF to be
			returned in this case to stop the LLgen parsing task.
		*/
		if (DOT != EOI)
			GetToken(&ahead);
		else
			DOT = EOF;
	}
	return DOT;
}


char	*string_token();
arith	char_constant();


int
GetToken(ptok)
	register struct token *ptok;
{
	/*	LexToken() is the actual token recognizer. It calls the
		control line interpreter if it encounters a "\n{w}*#"
		combination. Macro replacement is also performed if it is
		needed.
	*/
	char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
	register int ch, nch;

	if (File_Inserted) {
		File_Inserted = 0;
		goto firstline;
	}

again:	/* rescan the input after an error or replacement	*/
	ch = GetChar();
go_on:	/* rescan, the following character has been read	*/
	if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
		fatal("non-ascii '\\%03o' read", ch & 0377);
	/* keep track of the place of the token in the file	*/
	ptok->tk_file = FileName;
	ptok->tk_line = LineNumber;

	switch (class(ch)) {	/* detect character class	*/
	case STNL:		/* newline, vertical space or formfeed	*/
firstline:
		LineNumber++;			/* also at vs and ff	*/
		ptok->tk_file = FileName;
		ptok->tk_line = LineNumber;
		if (EoiForNewline)	/* called in control line	*/
			/*	a newline in a control line indicates the
				end-of-information of the line.
			*/
			return ptok->tk_symb = EOI;
		while ((ch = GetChar()), (ch == '#' || class(ch) == STSKIP)) {
			/* blanks are allowed before hashes */
			if (ch == '#') {
				/* a control line follows */
				domacro();
				if (File_Inserted) {
					File_Inserted = 0;
					goto firstline;
				}
			}
		}
			/*	We have to loop here, because in
				`domacro' the nl, vt or ff is read. The
				character following it may again be a `#'.
			*/
		goto go_on;
	case STSKIP:		/* just skip the skip characters	*/
		goto again;
	case STGARB:		/* garbage character			*/
		if (040 < ch && ch < 0177)
			lexerror("garbage char %c", ch);
		else
			lexerror("garbage char \\%03o", ch);
		goto again;
	case STSIMP:	/* a simple character, no part of compound token*/
		if (ch == '/') { /* probably the start of comment	*/
			ch = GetChar();
			if (ch == '*') { /* start of comment */
				skipcomment();
				goto again;
			}
			else {
				UnGetChar();
				ch = '/';	/* restore ch	*/
			}
		}
		return ptok->tk_symb = ch;
	case STCOMP:	/* maybe the start of a compound token		*/
		nch = GetChar();		/* character lookahead	*/
		switch (ch) {
		case '!':
			if (nch == '=')
				return ptok->tk_symb = NOTEQUAL;
			UnGetChar();
			return ptok->tk_symb = ch;
		case '&':
			if (nch == '&')
				return ptok->tk_symb = AND;
			UnGetChar();
			return ptok->tk_symb = ch;
		case '+':
			if (nch == '+')
				return ptok->tk_symb = PLUSPLUS;
			UnGetChar();
			return ptok->tk_symb = ch;
		case '-':
			if (nch == '-')
				return ptok->tk_symb = MINMIN;
			if (nch == '>')
				return ptok->tk_symb = ARROW;
			UnGetChar();
			return ptok->tk_symb = ch;
		case '<':
			if (AccFileSpecifier) {
				UnGetChar();	/* pushback nch */
				ptok->tk_bts = string_token("file specifier",
							'>', &(ptok->tk_len));
				return ptok->tk_symb = FILESPECIFIER;
			}
			if (nch == '<')
				return ptok->tk_symb = LEFT;
			if (nch == '=')
				return ptok->tk_symb = LESSEQ;
			UnGetChar();
			return ptok->tk_symb = ch;
		case '=':
			if (nch == '=')
				return ptok->tk_symb = EQUAL;

 			/*	The following piece of code tries to recognise
 				old-fashioned assignment operators `=op'
				Note however, that these are removed from the
				ANSI C standard.
			*/
 			switch (nch) {
 			case '+':
 				ptok->tk_symb = PLUSAB;
				goto warn;
 			case '-':
 				ptok->tk_symb = MINAB;
				goto warn;
 			case '*':
 				ptok->tk_symb = TIMESAB;
				goto warn;
 			case '/':
 				ptok->tk_symb = DIVAB;
				goto warn;
 			case '%':
 				ptok->tk_symb = MODAB;
				goto warn;
 			case '>':
 			case '<':
 				GetChar(ch);
 				if (ch != nch) {
 					UnGetChar();
 					lexerror("illegal combination '=%c'",
 						nch);
 				}
 				ptok->tk_symb = nch == '<' ? LEFTAB : RIGHTAB;
				goto warn;
 			case '&':
 				ptok->tk_symb = ANDAB;
				goto warn;
 			case '^':
 				ptok->tk_symb = XORAB;
				goto warn;
 			case '|':
 				ptok->tk_symb = ORAB;
			warn:
				warning("Old-fashioned assignment operator");
				return ptok->tk_symb;
 			}
			UnGetChar();
			return ptok->tk_symb = ch;
		case '>':
			if (nch == '=')
				return ptok->tk_symb = GREATEREQ;
			if (nch == '>')
				return ptok->tk_symb = RIGHT;
			UnGetChar();
			return ptok->tk_symb = ch;
		case '|':
			if (nch == '|')
				return ptok->tk_symb = OR;
			UnGetChar();
			return ptok->tk_symb = ch;
		}
	case STCHAR:				/* character constant	*/
		ptok->tk_ival = char_constant("character");
		ptok->tk_fund = INT;
		return ptok->tk_symb = INTEGER;
	case STSTR:					/* string	*/
		ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));
		ptok->tk_fund = CHAR;		/* string of characters */
		return ptok->tk_symb = STRING;
	case STELL:		/* wide character constant/string prefix */
		nch = GetChar();
		if (nch == '"') {
			ptok->tk_bts = string_token("wide character string",
					'"', &(ptok->tk_len));
			ptok->tk_fund = WCHAR;	/* string of wide characters */
			return ptok->tk_symb = STRING;
		} else if (nch == '\'') {
			ptok->tk_ival = char_constant("wide character");
			ptok->tk_fund = INT;
			return ptok->tk_symb = INTEGER;
		}
		UnGetChar();
	case STIDF:
	{
		register char *tg = &buf[0];
		register int pos = -1;
		register int hash;
		register struct idf *idef;
		extern int idfsize;		/* ??? */

		hash = STARTHASH();
		do	{			/* read the identifier	*/
			if (++pos < idfsize) {
				*tg++ = ch;
				hash = ENHASH(hash, ch, pos);
			}
			ch = GetChar();
		} while (in_idf(ch));

		hash = STOPHASH(hash);
		if (ch != EOI)
			UnGetChar();
		*tg++ = '\0';	/* mark the end of the identifier	*/
		idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash);
		idef->id_file = ptok->tk_file;
		idef->id_line = ptok->tk_line;
#ifndef NOPP
		if (idef->id_macro && ReplaceMacros) {
			if (idef->id_macro->mc_count > 0)
				idef->id_macro->mc_count--;
			else if (replace(idef))
				goto again;
		}
		if (UnknownIdIsZero && idef->id_reserved != SIZEOF) {
			ptok->tk_ival = (arith)0;
			ptok->tk_fund = INT;
			return ptok->tk_symb = INTEGER;
		}
#endif NOPP
		ptok->tk_symb = (
			idef->id_reserved ? idef->id_reserved
			: idef->id_def && idef->id_def->df_sc == TYPEDEF ?
				TYPE_IDENTIFIER
			: IDENTIFIER
		);
		return IDENTIFIER;
	}
	case STNUM:				/* a numeric constant	*/
	{
		register char *np = &buf[1];
		register int base = 10;
		register int vch;
		register arith val = 0;

		if (ch == '.') {
#ifndef NOFLOAT
			/*	A very embarrasing ambiguity. We have either a
				floating point number or field operator or
				ELLIPSIS.
			*/
			vch = GetChar();
			if (!is_dig(vch)) {	/* . or ... */
				if (vch == '.') {
					if ((vch = GetChar()) == '.')
						return ptok->tk_symb = ELLIPSIS;
					/* This is funny: we can't push the
					   second dot back. But then again
					   ..<ch> is already an error in C,
					   so why bother ?
					*/
					UnGetChar();
					lexerror("illegal combination '..'");
				}
				UnGetChar();
				return ptok->tk_symb = '.';
			}
			*np++ = '0';
			UnGetChar();
#else
			if ((vch = GetChar()) == '.') {
				if ((vch = GetChar()) == '.')
					return ptok->tk_symb = ELLIPSIS;
				UnGetChar();
				lexerror("illegal combination '..'");
			}
			UnGetChar();
			return ptok->tk_symb = '.';
#endif
		}
		if (ch == '0') {
			*np++ = ch;
			ch = GetChar();
			if (ch == 'x' || ch == 'X') {
				base = 16;
				ch = GetChar();
			}
			else
				base = 8;
		}
		while (vch = val_in_base(ch, base), vch >= 0) {
			val = val*base + vch;
			if (np < &buf[NUMSIZE])
				*np++ = ch;
			ch = GetChar();
		}
		if (is_suf(ch)) {
			register int suf_long = 0;
			register int suf_unsigned = 0;

			/*	The type of the integal constant is
				based on its suffix.
			*/
			do {
				switch (ch) {
				case 'l':
				case 'L':
					suf_long++;
					break;
				case 'u':
				case 'U':
					suf_unsigned++;
					break;
				}
				ch = GetChar();
			} while (is_suf(ch));
			UnGetChar();

			if (suf_long > 1)
				lexerror("only one long suffix allowed");
			if (suf_unsigned > 1)
				lexerror("only one unsigned suffix allowed");

			ptok->tk_fund = (suf_long && suf_unsigned) ? ULONG :
					(suf_long) ? LONG : UNSIGNED;
			ptok->tk_ival = val;
			return ptok->tk_symb = INTEGER;
		}
#ifndef NOFLOAT
		if (base == 16 || !(ch == '.' || ch == 'e' || ch == 'E'))
#endif NOFLOAT
		{
			UnGetChar();
			ptok->tk_ival = val;
			/*	The semantic analyser must know if the
				integral constant is given in octal/hexa-
				decimal form, in which case its type is
				UNSIGNED, or in decimal form, in which case
				its type is signed, indicated by
				the fund INTEGER.
			*/
			ptok->tk_fund = 
				(base == 10 || (base == 8 && val == (arith)0))
					? INTEGER : UNSIGNED;
			return ptok->tk_symb = INTEGER;
		}
		/* where's the test for the length of the integral ???	*/
#ifndef NOFLOAT
		if (ch == '.'){
			if (np < &buf[NUMSIZE])
				*np++ = ch;
			ch = GetChar();
		}
		while (is_dig(ch)){
			if (np < &buf[NUMSIZE])
				*np++ = ch;
			ch = GetChar();
		}
		if (ch == 'e' || ch == 'E') {
			if (np < &buf[NUMSIZE])
				*np++ = ch;
			ch = GetChar();
			if (ch == '+' || ch == '-') {
				if (np < &buf[NUMSIZE])
					*np++ = ch;
				ch = GetChar();
			}
			if (!is_dig(ch)) {
				lexerror("malformed floating constant");
				if (np < &buf[NUMSIZE])
					*np++ = ch;
			}
			while (is_dig(ch)) {
				if (np < &buf[NUMSIZE])
					*np++ = ch;
				ch = GetChar();
			}
		}

		/*	The type of an integral floating point
			constant may be given by the float (f)
			or long double (l) suffix.
		*/
		if (ch == 'f' || ch == 'F')
			ptok->tk_fund = FLOAT;
		else if (ch == 'l' || ch == 'L')
			ptok->tk_fund = LNGDBL;
		else {
			ptok->tk_fund = DOUBLE;
			UnGetChar();
		}

		*np++ = '\0';
		buf[0] = '-';	/* good heavens...	*/
		if (np == &buf[NUMSIZE+1]) {
			lexerror("floating constant too long");
			ptok->tk_fval = Salloc("0.0",(unsigned) 5) + 1;
		}
		else
			ptok->tk_fval = Salloc(buf,(unsigned) (np - buf)) + 1;
		return ptok->tk_symb = FLOATING;
#endif NOFLOAT
	}
	case STEOI:			/* end of text on source file	*/
		return ptok->tk_symb = EOI;
	default:				/* this cannot happen	*/
		crash("bad class for char 0%o", ch);
	}
	/*NOTREACHED*/
}

skipcomment()
{
	/*	The last character read has been the '*' of '/_*'.  The
		characters, except NL and EOI, between '/_*' and the first
		occurring '*_/' are not interpreted.
		NL only affects the LineNumber.  EOI is not legal.

		Important note: it is not possible to stop skipping comment
		beyond the end-of-file of an included file.
		EOI is returned by LoadChar only on encountering EOF of the
		top-level file...
	*/
	register int c;

	NoUnstack++;
	c = GetChar();
#ifdef	LINT
	lint_start_comment();
	lint_comment_char(c);
#endif	LINT
	do {
		while (c != '*') {
			if (class(c) == STNL) {
				++LineNumber;
			} else
			if (c == EOI) {
				NoUnstack--;
#ifdef	LINT
				lint_end_comment();
#endif	LINT
				return;
			}
			if (c == '/' && (c = GetChar()) == '*')
				strict("extra comment delimiter found");
			c = GetChar();
#ifdef	LINT
			lint_comment_char(c);
#endif	LINT
		} /* last Character seen was '*' */
		c = GetChar();
#ifdef	LINT
		lint_comment_char(c);
#endif	LINT
	} while (c != '/');
#ifdef	LINT
	lint_end_comment();
#endif	LINT
	NoUnstack--;
}

arith
char_constant(nm)
	char *nm;
{
	register arith val = 0;
	register int ch;
	int size = 0;

	ch = GetChar();
	if (ch == '\'')
		lexerror("%s constant too short", nm);
	else
	while (ch != '\'') {
		if (ch == '\n') {
			lexerror("newline in %s constant", nm);
			LineNumber++;
			break;
		}
		if (ch == '\\')
			ch = quoted(GetChar());
		if (ch >= 128) ch -= 256;
		val = val*256 + ch;
		size++;
		ch = GetChar();
	}
	if (size > 1)
		strict("%s constant includes more than one character", nm);
	if (size > (int)int_size)
		lexerror("%s constant too long", nm);
	return val;
}

char *
string_token(nm, stop_char, plen)
	char *nm;
	int *plen;
{
	register int ch;
	register int str_size;
	register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
	register int pos = 0;
	
	ch = GetChar();
	while (ch != stop_char) {
		if (ch == '\n') {
			lexerror("newline in %s", nm);
			LineNumber++;
			break;
		}
		if (ch == EOI) {
			lexerror("end-of-file inside %s", nm);
			break;
		}
		if (ch == '\\')
			ch = quoted(GetChar());
		str[pos++] = ch;
		if (pos == str_size)
			str = Srealloc(str, (unsigned) (str_size += RSTRSIZE));
		ch = GetChar();
	}
	str[pos++] = '\0'; /* for filenames etc. */
	*plen = pos;
	return str;
}

int
quoted(ch)
	register int ch;
{	
	/*	quoted() replaces an escaped character sequence by the
		character meant.
	*/
	/* first char after backslash already in ch */
	if (!is_oct(ch)) {		/* a quoted char */
		switch (ch) {
		case 'n':
			ch = '\n';
			break;
		case 't':
			ch = '\t';
			break;
		case 'b':
			ch = '\b';
			break;
		case 'r':
			ch = '\r';
			break;
		case 'f':
			ch = '\f';
			break;
		case 'a':		/* alert */
			ch = '\007';
			break;
		case 'v':		/* vertical tab */
			ch = '\013';
			break;
		case 'x':		/* quoted hex */
		{
			register int hex = 0;
			register int vch;

			for (;;) {
				ch = GetChar();
				if (vch = val_in_base(ch, 16), vch == -1)
					break;
				hex = hex * 16 + vch;
			}
			UnGetChar();
			ch = hex;
		}
		}
	}
	else {				/* a quoted octal */
		register int oct = 0, cnt = 0;

		do {
			oct = oct*8 + (ch-'0');
			ch = GetChar();
		} while (is_oct(ch) && ++cnt < 3);
		UnGetChar();
		ch = oct;
	}
	return ch&0377;
}


int
val_in_base(ch, base)
	register int ch;
{
	switch (base) {
	case 8:
		return (is_dig(ch) && ch < '9') ? ch - '0' : -1;
	case 10:
		return is_dig(ch) ? ch - '0' : -1;
	case 16:
		return is_dig(ch) ? ch - '0'
			: is_hex(ch) ? (ch - 'a' + 10) & 017
			: -1;
	default:
		fatal("(val_in_base) illegal base value %d", base);
		/* NOTREACHED */
	}
}


int
GetChar()
{
	/*	The routines GetChar and trigraph parses the trigraph
		sequences and removes occurences of \\\n.
	*/
	register int ch;

again:
	LoadChar(ch);

	/* possible trigraph sequence */
	if (ch == '?')
		ch = trigraph();

	/* \\\n are removed from the input stream */
	if (ch == '\\') {
		LoadChar(ch);
		if (ch == '\n') {
			++LineNumber;
			goto again;
		}
		PushBack();
		ch = '\\';
	}
	return(LexSave = ch);
}


int
trigraph()
{
	register int ch;

	LoadChar(ch);
	if (ch == '?') {
		LoadChar(ch);
		switch (ch) {		/* its a trigraph */
		case '=':
			ch =  '#';
			return(ch);
		case '(':
			ch = '[';
			return(ch);
		case '/':
			ch = '\\';
			return(ch);
		case ')':
			ch = ']';
			return(ch);
		case '\'':
			ch = '^';
			return(ch);
		case '<':
			ch = '{';
			return(ch);
		case '!':
			ch = '|';
			return(ch);
		case '>':
			ch = '}';
			return(ch);
		case '-':
			ch = '~';
			return(ch);
		}
		PushBack();
	}
	PushBack();
	return('?');
}
Initial revision 1989-02-07 11:04:05 +00:00			`/*`
			`* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.`
			`* See the copyright notice in the ACK home directory, in the file "Copyright".`
			`*/`
			`/* $Header$ */`
			`/* L E X I C A L A N A L Y Z E R */`

			`#include "lint.h"`
			`#include <alloc.h>`
			`#include "nofloat.h"`
			`#include "idfsize.h"`
			`#include "numsize.h"`
			`#include "debug.h"`
			`#include "strsize.h"`
			`#include "nopp.h"`
			`#include "input.h"`
			`#include "arith.h"`
			`#include "def.h"`
			`#include "macro.h"`
			`#include "idf.h"`
			`#include "LLlex.h"`
			`#include "Lpars.h"`
			`#include "class.h"`
			`#include "assert.h"`
			`#include "sizes.h"`

			`/* Data about the token yielded */`
			`struct token dot, ahead, aside;`

			`#ifndef NOPP`
			`int ReplaceMacros = 1; /* replacing macros */`
			`int AccDefined = 0; /* accept "defined(...)" */`
			`int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */`
			`int Unstacked = 0; /* an unstack is done */`
			`#endif`
			`int AccFileSpecifier = 0; /* return filespecifier <...> */`
			`int EoiForNewline = 0; /* return EOI upon encountering newline */`
			`int File_Inserted = 0; /* a file has just been inserted */`
			`int LexSave = 0; /* last character read by GetChar */`
			`#define MAX_LL_DEPTH 2`

			`static struct token LexStack[MAX_LL_DEPTH];`
			`static LexSP = 0;`

			`/* In PushLex() the actions are taken in order to initialise or`
			`re-initialise the lexical scanner.`
			`E.g. at the invocation of a sub-parser that uses LLlex(), the`
			`state of the current parser should be saved.`
			`*/`
			`PushLex()`
			`{`
			`ASSERT(LexSP < 2);`
			`ASSERT(ASIDE == 0); /* ASIDE = 0; */`
			`GetToken(&ahead);`
			`LexStack[LexSP++] = dot;`
			`}`

			`PopLex()`
			`{`
			`ASSERT(LexSP > 0);`
			`dot = LexStack[--LexSP];`
			`}`

			`int`
			`LLlex()`
			`{`
			`/* LLlex() plays the role of Lexical Analyzer for the C parser.`
			`The look-ahead and putting aside of tokens are taken into`
			`account.`
			`*/`
			`if (ASIDE) { /* a token is put aside */`
			`dot = aside;`
			`ASIDE = 0;`
			`}`
			`else { /* read ahead and return the old one */`
			`#ifdef LINT`
			`lint_comment_ahead();`
			`#endif LINT`
			`dot = ahead;`
			`/* the following test is performed due to the dual`
			`task of LLlex(): it is also called for parsing the`
			`restricted constant expression following a #if or`
			`#elif. The newline character causes EOF to be`
			`returned in this case to stop the LLgen parsing task.`
			`*/`
			`if (DOT != EOI)`
			`GetToken(&ahead);`
			`else`
			`DOT = EOF;`
			`}`
			`return DOT;`
			`}`


			`char *string_token();`
			`arith char_constant();`


			`int`
			`GetToken(ptok)`
			`register struct token *ptok;`
			`{`
			`/* LexToken() is the actual token recognizer. It calls the`
			`control line interpreter if it encounters a "\n{w}*#"`
			`combination. Macro replacement is also performed if it is`
			`needed.`
			`*/`
			`char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];`
			`register int ch, nch;`

			`if (File_Inserted) {`
			`File_Inserted = 0;`
			`goto firstline;`
			`}`

			`again: /* rescan the input after an error or replacement */`
			`ch = GetChar();`
			`go_on: /* rescan, the following character has been read */`
			`if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */`
			`fatal("non-ascii '\\%03o' read", ch & 0377);`
			`/* keep track of the place of the token in the file */`
			`ptok->tk_file = FileName;`
			`ptok->tk_line = LineNumber;`

			`switch (class(ch)) { /* detect character class */`
			`case STNL: /* newline, vertical space or formfeed */`
			`firstline:`
			`LineNumber++; /* also at vs and ff */`
			`ptok->tk_file = FileName;`
			`ptok->tk_line = LineNumber;`
			`if (EoiForNewline) /* called in control line */`
			`/* a newline in a control line indicates the`
			`end-of-information of the line.`
			`*/`
			`return ptok->tk_symb = EOI;`
			`while ((ch = GetChar()), (ch == '#' \|\| class(ch) == STSKIP)) {`
			`/* blanks are allowed before hashes */`
			`if (ch == '#') {`
			`/* a control line follows */`
			`domacro();`
			`if (File_Inserted) {`
			`File_Inserted = 0;`
			`goto firstline;`
			`}`
			`}`
			`}`
			`/* We have to loop here, because in`
			`domacro' the nl, vt or ff is read. The
			character following it may again be a `#'.
			`*/`
			`goto go_on;`
			`case STSKIP: /* just skip the skip characters */`
			`goto again;`
			`case STGARB: /* garbage character */`
			`if (040 < ch && ch < 0177)`
			`lexerror("garbage char %c", ch);`
			`else`
			`lexerror("garbage char \\%03o", ch);`
			`goto again;`
			`case STSIMP: /* a simple character, no part of compound token*/`
			`if (ch == '/') { /* probably the start of comment */`
			`ch = GetChar();`
			`if (ch == '') { / start of comment */`
			`skipcomment();`
			`goto again;`
			`}`
			`else {`
			`UnGetChar();`
			`ch = '/'; /* restore ch */`
			`}`
			`}`
			`return ptok->tk_symb = ch;`
			`case STCOMP: /* maybe the start of a compound token */`
			`nch = GetChar(); /* character lookahead */`
			`switch (ch) {`
			`case '!':`
			`if (nch == '=')`
			`return ptok->tk_symb = NOTEQUAL;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '&':`
			`if (nch == '&')`
			`return ptok->tk_symb = AND;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '+':`
			`if (nch == '+')`
			`return ptok->tk_symb = PLUSPLUS;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '-':`
			`if (nch == '-')`
			`return ptok->tk_symb = MINMIN;`
			`if (nch == '>')`
			`return ptok->tk_symb = ARROW;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '<':`
			`if (AccFileSpecifier) {`
			`UnGetChar(); /* pushback nch */`
			`ptok->tk_bts = string_token("file specifier",`
			`'>', &(ptok->tk_len));`
			`return ptok->tk_symb = FILESPECIFIER;`
			`}`
			`if (nch == '<')`
			`return ptok->tk_symb = LEFT;`
			`if (nch == '=')`
			`return ptok->tk_symb = LESSEQ;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '=':`
			`if (nch == '=')`
			`return ptok->tk_symb = EQUAL;`

			`/* The following piece of code tries to recognise`
			old-fashioned assignment operators `=op'
			`Note however, that these are removed from the`
			`ANSI C standard.`
			`*/`
			`switch (nch) {`
			`case '+':`
			`ptok->tk_symb = PLUSAB;`
			`goto warn;`
			`case '-':`
			`ptok->tk_symb = MINAB;`
			`goto warn;`
			`case '*':`
			`ptok->tk_symb = TIMESAB;`
			`goto warn;`
			`case '/':`
			`ptok->tk_symb = DIVAB;`
			`goto warn;`
			`case '%':`
			`ptok->tk_symb = MODAB;`
			`goto warn;`
			`case '>':`
			`case '<':`
			`GetChar(ch);`
			`if (ch != nch) {`
			`UnGetChar();`
			`lexerror("illegal combination '=%c'",`
			`nch);`
			`}`
			`ptok->tk_symb = nch == '<' ? LEFTAB : RIGHTAB;`
			`goto warn;`
			`case '&':`
			`ptok->tk_symb = ANDAB;`
			`goto warn;`
			`case '^':`
			`ptok->tk_symb = XORAB;`
			`goto warn;`
			`case '\|':`
			`ptok->tk_symb = ORAB;`
			`warn:`
			`warning("Old-fashioned assignment operator");`
			`return ptok->tk_symb;`
			`}`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '>':`
			`if (nch == '=')`
			`return ptok->tk_symb = GREATEREQ;`
			`if (nch == '>')`
			`return ptok->tk_symb = RIGHT;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`case '\|':`
			`if (nch == '\|')`
			`return ptok->tk_symb = OR;`
			`UnGetChar();`
			`return ptok->tk_symb = ch;`
			`}`
			`case STCHAR: /* character constant */`
			`ptok->tk_ival = char_constant("character");`
			`ptok->tk_fund = INT;`
			`return ptok->tk_symb = INTEGER;`
			`case STSTR: /* string */`
			`ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));`
			`ptok->tk_fund = CHAR; /* string of characters */`
			`return ptok->tk_symb = STRING;`
			`case STELL: /* wide character constant/string prefix */`
			`nch = GetChar();`
			`if (nch == '"') {`
			`ptok->tk_bts = string_token("wide character string",`
			`'"', &(ptok->tk_len));`
			`ptok->tk_fund = WCHAR; /* string of wide characters */`
			`return ptok->tk_symb = STRING;`
			`} else if (nch == '\'') {`
			`ptok->tk_ival = char_constant("wide character");`
			`ptok->tk_fund = INT;`
			`return ptok->tk_symb = INTEGER;`
			`}`
			`UnGetChar();`
			`case STIDF:`
			`{`
			`register char *tg = &buf[0];`
			`register int pos = -1;`
			`register int hash;`
			`register struct idf *idef;`
			`extern int idfsize; /* ??? */`

			`hash = STARTHASH();`
			`do { /* read the identifier */`
			`if (++pos < idfsize) {`
			`*tg++ = ch;`
			`hash = ENHASH(hash, ch, pos);`
			`}`
			`ch = GetChar();`
			`} while (in_idf(ch));`

			`hash = STOPHASH(hash);`
			`if (ch != EOI)`
			`UnGetChar();`
			`tg++ = '\0'; / mark the end of the identifier */`
			`idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash);`
			`idef->id_file = ptok->tk_file;`
			`idef->id_line = ptok->tk_line;`
			`#ifndef NOPP`
			`if (idef->id_macro && ReplaceMacros) {`
			`if (idef->id_macro->mc_count > 0)`
			`idef->id_macro->mc_count--;`
			`else if (replace(idef))`
			`goto again;`
			`}`
			`if (UnknownIdIsZero && idef->id_reserved != SIZEOF) {`
			`ptok->tk_ival = (arith)0;`
			`ptok->tk_fund = INT;`
			`return ptok->tk_symb = INTEGER;`
			`}`
			`#endif NOPP`
			`ptok->tk_symb = (`
			`idef->id_reserved ? idef->id_reserved`
			`: idef->id_def && idef->id_def->df_sc == TYPEDEF ?`
			`TYPE_IDENTIFIER`
			`: IDENTIFIER`
			`);`
			`return IDENTIFIER;`
			`}`
			`case STNUM: /* a numeric constant */`
			`{`
			`register char *np = &buf[1];`
			`register int base = 10;`
			`register int vch;`
			`register arith val = 0;`

			`if (ch == '.') {`
			`#ifndef NOFLOAT`
			`/* A very embarrasing ambiguity. We have either a`
			`floating point number or field operator or`
			`ELLIPSIS.`
			`*/`
fixed several problems: - hex numbers and floating point numbers were wrong - grammar was wrong; did not accept correct ANSI C - prototype updates did not work - float parameters to routines without prototype were not upgraded to double - the dot operator no longer requires lvalue as left-hand-side 1989-02-07 13:16:02 +00:00			`vch = GetChar();`
			`if (!is_dig(vch)) { /* . or ... */`
			`if (vch == '.') {`
			`if ((vch = GetChar()) == '.')`
Initial revision 1989-02-07 11:04:05 +00:00			`return ptok->tk_symb = ELLIPSIS;`
			`/* This is funny: we can't push the`
			`second dot back. But then again`
			`..<ch> is already an error in C,`
			`so why bother ?`
			`*/`
			`UnGetChar();`
			`lexerror("illegal combination '..'");`
			`}`
			`UnGetChar();`
			`return ptok->tk_symb = '.';`
fixed several problems: - hex numbers and floating point numbers were wrong - grammar was wrong; did not accept correct ANSI C - prototype updates did not work - float parameters to routines without prototype were not upgraded to double - the dot operator no longer requires lvalue as left-hand-side 1989-02-07 13:16:02 +00:00			`}`
			`*np++ = '0';`
Initial revision 1989-02-07 11:04:05 +00:00			`UnGetChar();`
			`#else`
fixed several problems: - hex numbers and floating point numbers were wrong - grammar was wrong; did not accept correct ANSI C - prototype updates did not work - float parameters to routines without prototype were not upgraded to double - the dot operator no longer requires lvalue as left-hand-side 1989-02-07 13:16:02 +00:00			`if ((vch = GetChar()) == '.') {`
			`if ((vch = GetChar()) == '.')`
Initial revision 1989-02-07 11:04:05 +00:00			`return ptok->tk_symb = ELLIPSIS;`
			`UnGetChar();`
			`lexerror("illegal combination '..'");`
			`}`
			`UnGetChar();`
			`return ptok->tk_symb = '.';`
			`#endif`
			`}`
			`if (ch == '0') {`
			`*np++ = ch;`
			`ch = GetChar();`
			`if (ch == 'x' \|\| ch == 'X') {`
			`base = 16;`
			`ch = GetChar();`
			`}`
			`else`
			`base = 8;`
			`}`
			`while (vch = val_in_base(ch, base), vch >= 0) {`
			`val = val*base + vch;`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`ch = GetChar();`
			`}`
			`if (is_suf(ch)) {`
			`register int suf_long = 0;`
			`register int suf_unsigned = 0;`

			`/* The type of the integal constant is`
			`based on its suffix.`
			`*/`
			`do {`
			`switch (ch) {`
			`case 'l':`
			`case 'L':`
			`suf_long++;`
			`break;`
			`case 'u':`
			`case 'U':`
			`suf_unsigned++;`
			`break;`
			`}`
			`ch = GetChar();`
			`} while (is_suf(ch));`
			`UnGetChar();`

			`if (suf_long > 1)`
			`lexerror("only one long suffix allowed");`
			`if (suf_unsigned > 1)`
			`lexerror("only one unsigned suffix allowed");`

			`ptok->tk_fund = (suf_long && suf_unsigned) ? ULONG :`
			`(suf_long) ? LONG : UNSIGNED;`
			`ptok->tk_ival = val;`
			`return ptok->tk_symb = INTEGER;`
			`}`
			`#ifndef NOFLOAT`
			`if (base == 16 \|\| !(ch == '.' \|\| ch == 'e' \|\| ch == 'E'))`
			`#endif NOFLOAT`
			`{`
			`UnGetChar();`
			`ptok->tk_ival = val;`
			`/* The semantic analyser must know if the`
			`integral constant is given in octal/hexa-`
			`decimal form, in which case its type is`
			`UNSIGNED, or in decimal form, in which case`
			`its type is signed, indicated by`
			`the fund INTEGER.`
			`*/`
			`ptok->tk_fund =`
			`(base == 10 \|\| (base == 8 && val == (arith)0))`
			`? INTEGER : UNSIGNED;`
			`return ptok->tk_symb = INTEGER;`
			`}`
			`/* where's the test for the length of the integral ??? */`
			`#ifndef NOFLOAT`
			`if (ch == '.'){`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`ch = GetChar();`
			`}`
			`while (is_dig(ch)){`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`ch = GetChar();`
			`}`
			`if (ch == 'e' \|\| ch == 'E') {`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`ch = GetChar();`
			`if (ch == '+' \|\| ch == '-') {`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`ch = GetChar();`
			`}`
			`if (!is_dig(ch)) {`
			`lexerror("malformed floating constant");`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`}`
			`while (is_dig(ch)) {`
			`if (np < &buf[NUMSIZE])`
			`*np++ = ch;`
			`ch = GetChar();`
			`}`
			`}`

			`/* The type of an integral floating point`
			`constant may be given by the float (f)`
			`or long double (l) suffix.`
			`*/`
			`if (ch == 'f' \|\| ch == 'F')`
			`ptok->tk_fund = FLOAT;`
			`else if (ch == 'l' \|\| ch == 'L')`
			`ptok->tk_fund = LNGDBL;`
			`else {`
			`ptok->tk_fund = DOUBLE;`
			`UnGetChar();`
			`}`

			`*np++ = '\0';`
			`buf[0] = '-'; /* good heavens... */`
			`if (np == &buf[NUMSIZE+1]) {`
			`lexerror("floating constant too long");`
			`ptok->tk_fval = Salloc("0.0",(unsigned) 5) + 1;`
			`}`
			`else`
			`ptok->tk_fval = Salloc(buf,(unsigned) (np - buf)) + 1;`
			`return ptok->tk_symb = FLOATING;`
			`#endif NOFLOAT`
			`}`
			`case STEOI: /* end of text on source file */`
			`return ptok->tk_symb = EOI;`
			`default: /* this cannot happen */`
			`crash("bad class for char 0%o", ch);`
			`}`
			`/NOTREACHED/`
			`}`

			`skipcomment()`
			`{`
			`/* The last character read has been the '' of '/_'. The`
			`characters, except NL and EOI, between '/_*' and the first`
			`occurring '*_/' are not interpreted.`
			`NL only affects the LineNumber. EOI is not legal.`

			`Important note: it is not possible to stop skipping comment`
			`beyond the end-of-file of an included file.`
			`EOI is returned by LoadChar only on encountering EOF of the`
			`top-level file...`
			`*/`
			`register int c;`

			`NoUnstack++;`
			`c = GetChar();`
			`#ifdef LINT`
			`lint_start_comment();`
			`lint_comment_char(c);`
			`#endif LINT`
			`do {`
			`while (c != '*') {`
			`if (class(c) == STNL) {`
			`++LineNumber;`
			`} else`
			`if (c == EOI) {`
			`NoUnstack--;`
			`#ifdef LINT`
			`lint_end_comment();`
			`#endif LINT`
			`return;`
			`}`
			`if (c == '/' && (c = GetChar()) == '*')`
			`strict("extra comment delimiter found");`
			`c = GetChar();`
			`#ifdef LINT`
			`lint_comment_char(c);`
			`#endif LINT`
			`} /* last Character seen was '' /`
			`c = GetChar();`
			`#ifdef LINT`
			`lint_comment_char(c);`
			`#endif LINT`
			`} while (c != '/');`
			`#ifdef LINT`
			`lint_end_comment();`
			`#endif LINT`
			`NoUnstack--;`
			`}`

			`arith`
			`char_constant(nm)`
			`char *nm;`
			`{`
			`register arith val = 0;`
			`register int ch;`
			`int size = 0;`

			`ch = GetChar();`
			`if (ch == '\'')`
			`lexerror("%s constant too short", nm);`
			`else`
			`while (ch != '\'') {`
			`if (ch == '\n') {`
			`lexerror("newline in %s constant", nm);`
			`LineNumber++;`
			`break;`
			`}`
			`if (ch == '\\')`
			`ch = quoted(GetChar());`
			`if (ch >= 128) ch -= 256;`
			`val = val*256 + ch;`
			`size++;`
			`ch = GetChar();`
			`}`
			`if (size > 1)`
			`strict("%s constant includes more than one character", nm);`
			`if (size > (int)int_size)`
			`lexerror("%s constant too long", nm);`
			`return val;`
			`}`

			`char *`
			`string_token(nm, stop_char, plen)`
			`char *nm;`
			`int *plen;`
			`{`
			`register int ch;`
			`register int str_size;`
			`register char *str = Malloc((unsigned) (str_size = ISTRSIZE));`
			`register int pos = 0;`

			`ch = GetChar();`
			`while (ch != stop_char) {`
			`if (ch == '\n') {`
			`lexerror("newline in %s", nm);`
			`LineNumber++;`
			`break;`
			`}`
			`if (ch == EOI) {`
			`lexerror("end-of-file inside %s", nm);`
			`break;`
			`}`
			`if (ch == '\\')`
			`ch = quoted(GetChar());`
			`str[pos++] = ch;`
			`if (pos == str_size)`
			`str = Srealloc(str, (unsigned) (str_size += RSTRSIZE));`
			`ch = GetChar();`
			`}`
			`str[pos++] = '\0'; /* for filenames etc. */`
			`*plen = pos;`
			`return str;`
			`}`

			`int`
			`quoted(ch)`
			`register int ch;`
			`{`
			`/* quoted() replaces an escaped character sequence by the`
			`character meant.`
			`*/`
			`/* first char after backslash already in ch */`
			`if (!is_oct(ch)) { /* a quoted char */`
			`switch (ch) {`
			`case 'n':`
			`ch = '\n';`
			`break;`
			`case 't':`
			`ch = '\t';`
			`break;`
			`case 'b':`
			`ch = '\b';`
			`break;`
			`case 'r':`
			`ch = '\r';`
			`break;`
			`case 'f':`
			`ch = '\f';`
			`break;`
			`case 'a': /* alert */`
			`ch = '\007';`
			`break;`
			`case 'v': /* vertical tab */`
			`ch = '\013';`
			`break;`
			`case 'x': /* quoted hex */`
			`{`
			`register int hex = 0;`
			`register int vch;`

			`for (;;) {`
			`ch = GetChar();`
			`if (vch = val_in_base(ch, 16), vch == -1)`
			`break;`
			`hex = hex * 16 + vch;`
			`}`
			`UnGetChar();`
			`ch = hex;`
			`}`
			`}`
			`}`
			`else { /* a quoted octal */`
			`register int oct = 0, cnt = 0;`

			`do {`
			`oct = oct*8 + (ch-'0');`
			`ch = GetChar();`
			`} while (is_oct(ch) && ++cnt < 3);`
			`UnGetChar();`
			`ch = oct;`
			`}`
			`return ch&0377;`
			`}`


			`int`
			`val_in_base(ch, base)`
			`register int ch;`
			`{`
			`switch (base) {`
			`case 8:`
			`return (is_dig(ch) && ch < '9') ? ch - '0' : -1;`
			`case 10:`
fixed several problems: - hex numbers and floating point numbers were wrong - grammar was wrong; did not accept correct ANSI C - prototype updates did not work - float parameters to routines without prototype were not upgraded to double - the dot operator no longer requires lvalue as left-hand-side 1989-02-07 13:16:02 +00:00			`return is_dig(ch) ? ch - '0' : -1;`
Initial revision 1989-02-07 11:04:05 +00:00			`case 16:`
			`return is_dig(ch) ? ch - '0'`
			`: is_hex(ch) ? (ch - 'a' + 10) & 017`
			`: -1;`
			`default:`
			`fatal("(val_in_base) illegal base value %d", base);`
			`/* NOTREACHED */`
			`}`
			`}`


			`int`
			`GetChar()`
			`{`
			`/* The routines GetChar and trigraph parses the trigraph`
			`sequences and removes occurences of \\\n.`
			`*/`
			`register int ch;`

			`again:`
			`LoadChar(ch);`

			`/* possible trigraph sequence */`
			`if (ch == '?')`
			`ch = trigraph();`

			`/* \\\n are removed from the input stream */`
			`if (ch == '\\') {`
			`LoadChar(ch);`
			`if (ch == '\n') {`
			`++LineNumber;`
			`goto again;`
			`}`
			`PushBack();`
			`ch = '\\';`
			`}`
			`return(LexSave = ch);`
			`}`


			`int`
			`trigraph()`
			`{`
			`register int ch;`

			`LoadChar(ch);`
			`if (ch == '?') {`
			`LoadChar(ch);`
			`switch (ch) { /* its a trigraph */`
			`case '=':`
			`ch = '#';`
			`return(ch);`
			`case '(':`
			`ch = '[';`
			`return(ch);`
			`case '/':`
			`ch = '\\';`
			`return(ch);`
			`case ')':`
			`ch = ']';`
			`return(ch);`
			`case '\'':`
			`ch = '^';`
			`return(ch);`
			`case '<':`
			`ch = '{';`
			`return(ch);`
			`case '!':`
			`ch = '\|';`
			`return(ch);`
			`case '>':`
			`ch = '}';`
			`return(ch);`
			`case '-':`
			`ch = '~';`
			`return(ch);`
			`}`
			`PushBack();`
			`}`
			`PushBack();`
			`return('?');`
			`}`