1989-02-07 11:04:05 +00:00
|
|
|
/*
|
|
|
|
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
|
|
|
* See the copyright notice in the ACK home directory, in the file "Copyright".
|
|
|
|
*/
|
|
|
|
/* $Header$ */
|
|
|
|
/* L E X I C A L A N A L Y Z E R */
|
|
|
|
|
|
|
|
#include "lint.h"
|
|
|
|
#include <alloc.h>
|
|
|
|
#include "idfsize.h"
|
|
|
|
#include "numsize.h"
|
|
|
|
#include "debug.h"
|
|
|
|
#include "strsize.h"
|
|
|
|
#include "nopp.h"
|
|
|
|
#include "input.h"
|
|
|
|
#include "arith.h"
|
|
|
|
#include "def.h"
|
|
|
|
#include "macro.h"
|
|
|
|
#include "idf.h"
|
|
|
|
#include "LLlex.h"
|
|
|
|
#include "Lpars.h"
|
|
|
|
#include "class.h"
|
|
|
|
#include "assert.h"
|
|
|
|
#include "sizes.h"
|
|
|
|
|
|
|
|
/* Data about the token yielded */
|
|
|
|
struct token dot, ahead, aside;
|
|
|
|
|
|
|
|
#ifndef NOPP
|
|
|
|
int ReplaceMacros = 1; /* replacing macros */
|
|
|
|
int AccDefined = 0; /* accept "defined(...)" */
|
|
|
|
int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
|
|
|
|
int Unstacked = 0; /* an unstack is done */
|
1989-09-19 16:13:23 +00:00
|
|
|
extern int InputLevel;
|
1989-02-07 11:04:05 +00:00
|
|
|
#endif
|
|
|
|
int AccFileSpecifier = 0; /* return filespecifier <...> */
|
|
|
|
int EoiForNewline = 0; /* return EOI upon encountering newline */
|
|
|
|
int File_Inserted = 0; /* a file has just been inserted */
|
|
|
|
int LexSave = 0; /* last character read by GetChar */
|
|
|
|
#define MAX_LL_DEPTH 2
|
|
|
|
|
1989-09-19 16:13:23 +00:00
|
|
|
#define FLG_ESEEN 0x01 /* possibly a floating point number */
|
|
|
|
#define FLG_DOTSEEN 0x02 /* certainly a floating point number */
|
|
|
|
extern arith full_mask[];
|
|
|
|
extern arith max_int;
|
|
|
|
|
1989-10-23 13:50:27 +00:00
|
|
|
#ifndef NOPP
|
1989-02-07 11:04:05 +00:00
|
|
|
static struct token LexStack[MAX_LL_DEPTH];
|
|
|
|
static LexSP = 0;
|
|
|
|
|
|
|
|
/* In PushLex() the actions are taken in order to initialise or
|
|
|
|
re-initialise the lexical scanner.
|
|
|
|
E.g. at the invocation of a sub-parser that uses LLlex(), the
|
|
|
|
state of the current parser should be saved.
|
|
|
|
*/
|
|
|
|
PushLex()
|
|
|
|
{
|
1989-09-19 16:13:23 +00:00
|
|
|
ASSERT(LexSP < MAX_LL_DEPTH);
|
1989-02-07 11:04:05 +00:00
|
|
|
ASSERT(ASIDE == 0); /* ASIDE = 0; */
|
|
|
|
GetToken(&ahead);
|
|
|
|
LexStack[LexSP++] = dot;
|
|
|
|
}
|
|
|
|
|
|
|
|
PopLex()
|
|
|
|
{
|
|
|
|
ASSERT(LexSP > 0);
|
|
|
|
dot = LexStack[--LexSP];
|
|
|
|
}
|
1989-10-23 13:50:27 +00:00
|
|
|
#endif /* NOPP */
|
1989-02-07 11:04:05 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
LLlex()
|
|
|
|
{
|
|
|
|
/* LLlex() plays the role of Lexical Analyzer for the C parser.
|
|
|
|
The look-ahead and putting aside of tokens are taken into
|
|
|
|
account.
|
|
|
|
*/
|
|
|
|
if (ASIDE) { /* a token is put aside */
|
|
|
|
dot = aside;
|
|
|
|
ASIDE = 0;
|
|
|
|
}
|
|
|
|
else { /* read ahead and return the old one */
|
|
|
|
#ifdef LINT
|
|
|
|
lint_comment_ahead();
|
|
|
|
#endif LINT
|
|
|
|
dot = ahead;
|
|
|
|
/* the following test is performed due to the dual
|
|
|
|
task of LLlex(): it is also called for parsing the
|
|
|
|
restricted constant expression following a #if or
|
|
|
|
#elif. The newline character causes EOF to be
|
|
|
|
returned in this case to stop the LLgen parsing task.
|
|
|
|
*/
|
|
|
|
if (DOT != EOI)
|
|
|
|
GetToken(&ahead);
|
|
|
|
else
|
|
|
|
DOT = EOF;
|
|
|
|
}
|
|
|
|
return DOT;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
char *string_token();
|
|
|
|
arith char_constant();
|
|
|
|
|
|
|
|
int
|
|
|
|
GetToken(ptok)
|
|
|
|
register struct token *ptok;
|
|
|
|
{
|
1989-09-19 16:13:23 +00:00
|
|
|
/* GetToken() is the actual token recognizer. It calls the
|
1989-02-07 11:04:05 +00:00
|
|
|
control line interpreter if it encounters a "\n{w}*#"
|
|
|
|
combination. Macro replacement is also performed if it is
|
|
|
|
needed.
|
|
|
|
*/
|
|
|
|
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
|
|
|
|
register int ch, nch;
|
|
|
|
|
|
|
|
if (File_Inserted) {
|
|
|
|
File_Inserted = 0;
|
|
|
|
goto firstline;
|
|
|
|
}
|
|
|
|
|
|
|
|
again: /* rescan the input after an error or replacement */
|
|
|
|
ch = GetChar();
|
|
|
|
go_on: /* rescan, the following character has been read */
|
|
|
|
if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
|
1989-09-19 16:13:23 +00:00
|
|
|
{
|
1989-02-07 11:04:05 +00:00
|
|
|
fatal("non-ascii '\\%03o' read", ch & 0377);
|
1989-09-19 16:13:23 +00:00
|
|
|
}
|
1989-02-07 11:04:05 +00:00
|
|
|
/* keep track of the place of the token in the file */
|
|
|
|
ptok->tk_file = FileName;
|
|
|
|
ptok->tk_line = LineNumber;
|
|
|
|
|
|
|
|
switch (class(ch)) { /* detect character class */
|
|
|
|
case STNL: /* newline, vertical space or formfeed */
|
|
|
|
firstline:
|
|
|
|
LineNumber++; /* also at vs and ff */
|
|
|
|
ptok->tk_file = FileName;
|
|
|
|
ptok->tk_line = LineNumber;
|
|
|
|
if (EoiForNewline) /* called in control line */
|
|
|
|
/* a newline in a control line indicates the
|
|
|
|
end-of-information of the line.
|
|
|
|
*/
|
|
|
|
return ptok->tk_symb = EOI;
|
|
|
|
while ((ch = GetChar()), (ch == '#' || class(ch) == STSKIP)) {
|
|
|
|
/* blanks are allowed before hashes */
|
|
|
|
if (ch == '#') {
|
|
|
|
/* a control line follows */
|
|
|
|
domacro();
|
1989-10-23 13:50:27 +00:00
|
|
|
#ifndef NOPP
|
1989-02-07 11:04:05 +00:00
|
|
|
if (File_Inserted) {
|
|
|
|
File_Inserted = 0;
|
|
|
|
goto firstline;
|
|
|
|
}
|
1989-10-23 13:50:27 +00:00
|
|
|
#endif /* NOPP */
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* We have to loop here, because in
|
|
|
|
`domacro' the nl, vt or ff is read. The
|
|
|
|
character following it may again be a `#'.
|
|
|
|
*/
|
|
|
|
goto go_on;
|
|
|
|
case STSKIP: /* just skip the skip characters */
|
|
|
|
goto again;
|
|
|
|
case STGARB: /* garbage character */
|
1989-10-13 11:04:42 +00:00
|
|
|
#ifndef NOPP
|
1989-09-19 16:13:23 +00:00
|
|
|
garbage:
|
1989-10-13 11:04:42 +00:00
|
|
|
#endif
|
1989-02-07 11:04:05 +00:00
|
|
|
if (040 < ch && ch < 0177)
|
|
|
|
lexerror("garbage char %c", ch);
|
|
|
|
else
|
|
|
|
lexerror("garbage char \\%03o", ch);
|
|
|
|
goto again;
|
|
|
|
case STSIMP: /* a simple character, no part of compound token*/
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case STCOMP: /* maybe the start of a compound token */
|
|
|
|
nch = GetChar(); /* character lookahead */
|
|
|
|
switch (ch) {
|
|
|
|
case '!':
|
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = NOTEQUAL;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '&':
|
|
|
|
if (nch == '&')
|
|
|
|
return ptok->tk_symb = AND;
|
1989-09-19 16:13:23 +00:00
|
|
|
else if (nch == '=')
|
|
|
|
return ptok->tk_symb = ANDAB;
|
1989-02-07 11:04:05 +00:00
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '+':
|
|
|
|
if (nch == '+')
|
|
|
|
return ptok->tk_symb = PLUSPLUS;
|
1989-09-19 16:13:23 +00:00
|
|
|
else if (nch == '=')
|
|
|
|
return ptok->tk_symb = PLUSAB;
|
1989-02-07 11:04:05 +00:00
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '-':
|
|
|
|
if (nch == '-')
|
|
|
|
return ptok->tk_symb = MINMIN;
|
1989-09-19 16:13:23 +00:00
|
|
|
else if (nch == '>')
|
1989-02-07 11:04:05 +00:00
|
|
|
return ptok->tk_symb = ARROW;
|
1989-09-19 16:13:23 +00:00
|
|
|
else if (nch == '=')
|
|
|
|
return ptok->tk_symb = MINAB;
|
1989-02-07 11:04:05 +00:00
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '<':
|
|
|
|
if (AccFileSpecifier) {
|
|
|
|
UnGetChar(); /* pushback nch */
|
|
|
|
ptok->tk_bts = string_token("file specifier",
|
|
|
|
'>', &(ptok->tk_len));
|
|
|
|
return ptok->tk_symb = FILESPECIFIER;
|
1989-09-29 16:20:38 +00:00
|
|
|
} else if (nch == '<') {
|
1989-09-19 16:13:23 +00:00
|
|
|
if ((nch = GetChar()) == '=')
|
|
|
|
return ptok->tk_symb = LEFTAB;
|
|
|
|
UnGetChar();
|
1989-02-07 11:04:05 +00:00
|
|
|
return ptok->tk_symb = LEFT;
|
1989-09-19 16:13:23 +00:00
|
|
|
}
|
1989-02-07 11:04:05 +00:00
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = LESSEQ;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '=':
|
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = EQUAL;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '>':
|
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = GREATEREQ;
|
1989-09-19 16:13:23 +00:00
|
|
|
if (nch == '>') {
|
|
|
|
if ((nch = GetChar()) == '=')
|
|
|
|
return ptok->tk_symb = RIGHTAB;
|
|
|
|
UnGetChar();
|
1989-02-07 11:04:05 +00:00
|
|
|
return ptok->tk_symb = RIGHT;
|
1989-09-19 16:13:23 +00:00
|
|
|
}
|
1989-02-07 11:04:05 +00:00
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '|':
|
|
|
|
if (nch == '|')
|
|
|
|
return ptok->tk_symb = OR;
|
1989-09-19 16:13:23 +00:00
|
|
|
else if (nch == '=')
|
|
|
|
return ptok->tk_symb = ORAB;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '%':
|
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = MODAB;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '*':
|
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = TIMESAB;
|
1989-02-07 11:04:05 +00:00
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
1989-09-19 16:13:23 +00:00
|
|
|
case '^':
|
|
|
|
if (nch == '=')
|
|
|
|
return ptok->tk_symb = XORAB;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
case '/':
|
1989-10-12 14:22:35 +00:00
|
|
|
if (nch == '*'
|
|
|
|
#ifndef NOPP
|
|
|
|
&& !InputLevel
|
|
|
|
#endif
|
|
|
|
) {
|
1989-09-19 16:13:23 +00:00
|
|
|
skipcomment();
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
else if (nch == '=')
|
|
|
|
return ptok->tk_symb = DIVAB;
|
|
|
|
UnGetChar();
|
|
|
|
return ptok->tk_symb = ch;
|
|
|
|
default:
|
|
|
|
crash("bad class for char 0%o", ch);
|
|
|
|
/* NOTREACHED */
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
|
|
|
case STCHAR: /* character constant */
|
|
|
|
ptok->tk_ival = char_constant("character");
|
|
|
|
ptok->tk_fund = INT;
|
|
|
|
return ptok->tk_symb = INTEGER;
|
|
|
|
case STSTR: /* string */
|
|
|
|
ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));
|
|
|
|
ptok->tk_fund = CHAR; /* string of characters */
|
|
|
|
return ptok->tk_symb = STRING;
|
|
|
|
case STELL: /* wide character constant/string prefix */
|
|
|
|
nch = GetChar();
|
|
|
|
if (nch == '"') {
|
|
|
|
ptok->tk_bts = string_token("wide character string",
|
|
|
|
'"', &(ptok->tk_len));
|
|
|
|
ptok->tk_fund = WCHAR; /* string of wide characters */
|
|
|
|
return ptok->tk_symb = STRING;
|
|
|
|
} else if (nch == '\'') {
|
|
|
|
ptok->tk_ival = char_constant("wide character");
|
|
|
|
ptok->tk_fund = INT;
|
|
|
|
return ptok->tk_symb = INTEGER;
|
|
|
|
}
|
|
|
|
UnGetChar();
|
1989-09-19 16:13:23 +00:00
|
|
|
/* fallthrough */
|
1989-02-07 11:04:05 +00:00
|
|
|
case STIDF:
|
|
|
|
{
|
|
|
|
register char *tg = &buf[0];
|
|
|
|
register int pos = -1;
|
|
|
|
register int hash;
|
|
|
|
register struct idf *idef;
|
|
|
|
extern int idfsize; /* ??? */
|
1989-09-19 16:13:23 +00:00
|
|
|
#ifndef NOPP
|
|
|
|
int NoExpandNext = 0;
|
1989-02-07 11:04:05 +00:00
|
|
|
|
1989-09-19 16:13:23 +00:00
|
|
|
if (Unstacked) EnableMacros(); /* unstack macro's when allowed. */
|
|
|
|
if (ch == NOEXPM) {
|
|
|
|
NoExpandNext = 1;
|
|
|
|
ch = GetChar();
|
|
|
|
}
|
|
|
|
#endif
|
1989-02-07 11:04:05 +00:00
|
|
|
hash = STARTHASH();
|
|
|
|
do { /* read the identifier */
|
|
|
|
if (++pos < idfsize) {
|
|
|
|
*tg++ = ch;
|
1989-10-26 16:16:35 +00:00
|
|
|
hash = ENHASH(hash, ch);
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
|
|
|
ch = GetChar();
|
|
|
|
} while (in_idf(ch));
|
|
|
|
|
|
|
|
hash = STOPHASH(hash);
|
|
|
|
if (ch != EOI)
|
|
|
|
UnGetChar();
|
|
|
|
*tg++ = '\0'; /* mark the end of the identifier */
|
|
|
|
idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash);
|
|
|
|
idef->id_file = ptok->tk_file;
|
|
|
|
idef->id_line = ptok->tk_line;
|
|
|
|
#ifndef NOPP
|
1989-09-19 16:13:23 +00:00
|
|
|
if (idef->id_macro && ReplaceMacros && !NoExpandNext) {
|
|
|
|
if (replace(idef))
|
|
|
|
goto again;
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
|
|
|
if (UnknownIdIsZero && idef->id_reserved != SIZEOF) {
|
|
|
|
ptok->tk_ival = (arith)0;
|
|
|
|
ptok->tk_fund = INT;
|
|
|
|
return ptok->tk_symb = INTEGER;
|
|
|
|
}
|
|
|
|
#endif NOPP
|
|
|
|
ptok->tk_symb = (
|
|
|
|
idef->id_reserved ? idef->id_reserved
|
|
|
|
: idef->id_def && idef->id_def->df_sc == TYPEDEF ?
|
|
|
|
TYPE_IDENTIFIER
|
|
|
|
: IDENTIFIER
|
|
|
|
);
|
|
|
|
return IDENTIFIER;
|
|
|
|
}
|
|
|
|
case STNUM: /* a numeric constant */
|
|
|
|
{
|
1989-09-19 16:13:23 +00:00
|
|
|
register int siz_left = NUMSIZE - 1;
|
|
|
|
register char *np = &buf[0];
|
|
|
|
int flags = 0;
|
|
|
|
|
|
|
|
#define store(ch) if (--siz_left >= 0) \
|
|
|
|
*np++ = ch;
|
1989-02-07 11:04:05 +00:00
|
|
|
|
|
|
|
if (ch == '.') {
|
1989-09-19 16:13:23 +00:00
|
|
|
/* An embarrasing ambiguity. We have either a
|
|
|
|
pp-number, a field operator, an ELLIPSIS or
|
|
|
|
an error (..).
|
1989-02-07 11:04:05 +00:00
|
|
|
*/
|
1989-09-19 16:13:23 +00:00
|
|
|
ch = GetChar();
|
|
|
|
if (!is_dig(ch)) { /* . or ... */
|
|
|
|
if (ch == '.') {
|
|
|
|
if ((ch = GetChar()) == '.')
|
1989-02-07 11:04:05 +00:00
|
|
|
return ptok->tk_symb = ELLIPSIS;
|
1989-09-19 16:13:23 +00:00
|
|
|
UnGetChar(); /* not '.' */
|
|
|
|
ChPushBack('.'); /* sigh ... */
|
|
|
|
} else
|
|
|
|
UnGetChar(); /* not '.' */
|
1989-02-07 11:04:05 +00:00
|
|
|
return ptok->tk_symb = '.';
|
1989-02-07 13:16:02 +00:00
|
|
|
}
|
1989-02-07 11:04:05 +00:00
|
|
|
UnGetChar();
|
1989-09-19 16:13:23 +00:00
|
|
|
ch = '.';
|
|
|
|
flags |= FLG_DOTSEEN;
|
|
|
|
}
|
|
|
|
store(ch);
|
|
|
|
ch = GetChar();
|
|
|
|
while(in_idf(ch) || ch == '.') {
|
|
|
|
store(ch);
|
|
|
|
if (ch == '.') flags |= FLG_DOTSEEN;
|
|
|
|
if (ch == 'e' || ch == 'E') {
|
|
|
|
flags |= FLG_ESEEN;
|
1989-02-07 11:04:05 +00:00
|
|
|
ch = GetChar();
|
1989-09-19 16:13:23 +00:00
|
|
|
if (ch == '+' || ch == '-') {
|
|
|
|
flags |= FLG_DOTSEEN; /* trick */
|
|
|
|
store(ch);
|
|
|
|
ch = GetChar();
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
1989-09-19 16:13:23 +00:00
|
|
|
} else ch = GetChar();
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
1989-09-19 16:13:23 +00:00
|
|
|
store('\0');
|
|
|
|
UnGetChar();
|
1989-02-07 11:04:05 +00:00
|
|
|
|
1989-09-19 16:13:23 +00:00
|
|
|
np = &buf[0];
|
|
|
|
ch = *np++;
|
|
|
|
if (siz_left < 0) {
|
|
|
|
lexerror("number too long");
|
|
|
|
if ((flags & FLG_DOTSEEN)
|
|
|
|
|| (flags & FLG_ESEEN
|
|
|
|
&& !(ch == '0'
|
|
|
|
&& (*np == 'x' || *np == 'X')))) {
|
|
|
|
ptok->tk_fval = Salloc("0.0", (unsigned) 4);
|
|
|
|
ptok->tk_fund = DOUBLE;
|
|
|
|
return ptok->tk_symb = FLOATING;
|
|
|
|
}
|
|
|
|
ptok->tk_ival = 1;
|
|
|
|
ptok->tk_fund = ULONG;
|
|
|
|
ptok->tk_symb = INTEGER;
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
1989-09-19 16:13:23 +00:00
|
|
|
/* Now, the pp-number must be converted into a token */
|
|
|
|
if ((flags & FLG_DOTSEEN)
|
|
|
|
|| (flags & FLG_ESEEN
|
|
|
|
&& !(ch == '0' && (*np == 'x' || *np == 'X')))) {
|
|
|
|
strflt2tok(&buf[0], ptok);
|
|
|
|
return ptok->tk_symb = FLOATING;
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
1989-09-19 16:13:23 +00:00
|
|
|
strint2tok(&buf[0], ptok);
|
|
|
|
return ptok->tk_symb = INTEGER;
|
1989-02-07 11:04:05 +00:00
|
|
|
}
|
|
|
|
case STEOI: /* end of text on source file */
|
|
|
|
return ptok->tk_symb = EOI;
|
1989-09-19 16:13:23 +00:00
|
|
|
#ifndef NOPP
|
|
|
|
case STMSPEC:
|
|
|
|
if (!InputLevel) goto garbage;
|
|
|
|
if (ch == TOKSEP) goto again;
|
|
|
|
/* fallthrough shouldn't happen */
|
|
|
|
#endif
|
1989-02-07 11:04:05 +00:00
|
|
|
default: /* this cannot happen */
|
|
|
|
crash("bad class for char 0%o", ch);
|
|
|
|
}
|
|
|
|
/*NOTREACHED*/
|
|
|
|
}
|
|
|
|
|
|
|
|
skipcomment()
|
|
|
|
{
|
|
|
|
/* The last character read has been the '*' of '/_*'. The
|
|
|
|
characters, except NL and EOI, between '/_*' and the first
|
|
|
|
occurring '*_/' are not interpreted.
|
|
|
|
NL only affects the LineNumber. EOI is not legal.
|
|
|
|
|
|
|
|
Important note: it is not possible to stop skipping comment
|
|
|
|
beyond the end-of-file of an included file.
|
|
|
|
EOI is returned by LoadChar only on encountering EOF of the
|
|
|
|
top-level file...
|
|
|
|
*/
|
|
|
|
register int c;
|
|
|
|
|
|
|
|
NoUnstack++;
|
|
|
|
c = GetChar();
|
|
|
|
#ifdef LINT
|
|
|
|
lint_start_comment();
|
|
|
|
lint_comment_char(c);
|
|
|
|
#endif LINT
|
|
|
|
do {
|
|
|
|
while (c != '*') {
|
|
|
|
if (class(c) == STNL) {
|
|
|
|
++LineNumber;
|
1989-09-19 16:13:23 +00:00
|
|
|
} else if (c == EOI) {
|
1989-02-07 11:04:05 +00:00
|
|
|
NoUnstack--;
|
|
|
|
#ifdef LINT
|
|
|
|
lint_end_comment();
|
|
|
|
#endif LINT
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
c = GetChar();
|
|
|
|
#ifdef LINT
|
|
|
|
lint_comment_char(c);
|
|
|
|
#endif LINT
|
|
|
|
} /* last Character seen was '*' */
|
|
|
|
c = GetChar();
|
|
|
|
#ifdef LINT
|
|
|
|
lint_comment_char(c);
|
|
|
|
#endif LINT
|
|
|
|
} while (c != '/');
|
|
|
|
#ifdef LINT
|
|
|
|
lint_end_comment();
|
|
|
|
#endif LINT
|
|
|
|
NoUnstack--;
|
|
|
|
}
|
|
|
|
|
|
|
|
arith
|
|
|
|
char_constant(nm)
|
|
|
|
char *nm;
|
|
|
|
{
|
|
|
|
register arith val = 0;
|
|
|
|
register int ch;
|
|
|
|
int size = 0;
|
|
|
|
|
|
|
|
ch = GetChar();
|
|
|
|
if (ch == '\'')
|
|
|
|
lexerror("%s constant too short", nm);
|
|
|
|
else
|
|
|
|
while (ch != '\'') {
|
|
|
|
if (ch == '\n') {
|
|
|
|
lexerror("newline in %s constant", nm);
|
|
|
|
LineNumber++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ch == '\\')
|
|
|
|
ch = quoted(GetChar());
|
|
|
|
if (ch >= 128) ch -= 256;
|
1989-09-19 16:13:23 +00:00
|
|
|
if (size < (int)int_size)
|
|
|
|
val |= ch << 8 * size;
|
1989-02-07 11:04:05 +00:00
|
|
|
size++;
|
|
|
|
ch = GetChar();
|
|
|
|
}
|
|
|
|
if (size > 1)
|
|
|
|
strict("%s constant includes more than one character", nm);
|
|
|
|
if (size > (int)int_size)
|
|
|
|
lexerror("%s constant too long", nm);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
|
|
|
string_token(nm, stop_char, plen)
|
|
|
|
char *nm;
|
|
|
|
int *plen;
|
|
|
|
{
|
|
|
|
register int ch;
|
|
|
|
register int str_size;
|
|
|
|
register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
|
|
|
|
register int pos = 0;
|
|
|
|
|
|
|
|
ch = GetChar();
|
|
|
|
while (ch != stop_char) {
|
|
|
|
if (ch == '\n') {
|
|
|
|
lexerror("newline in %s", nm);
|
|
|
|
LineNumber++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ch == EOI) {
|
|
|
|
lexerror("end-of-file inside %s", nm);
|
|
|
|
break;
|
|
|
|
}
|
1989-09-19 16:13:23 +00:00
|
|
|
if (ch == '\\' && !AccFileSpecifier)
|
1989-02-07 11:04:05 +00:00
|
|
|
ch = quoted(GetChar());
|
|
|
|
str[pos++] = ch;
|
|
|
|
if (pos == str_size)
|
1989-10-19 14:53:25 +00:00
|
|
|
str = Realloc(str, (unsigned) (str_size += RSTRSIZE));
|
1989-02-07 11:04:05 +00:00
|
|
|
ch = GetChar();
|
|
|
|
}
|
|
|
|
str[pos++] = '\0'; /* for filenames etc. */
|
|
|
|
*plen = pos;
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
quoted(ch)
|
|
|
|
register int ch;
|
|
|
|
{
|
|
|
|
/* quoted() replaces an escaped character sequence by the
|
|
|
|
character meant.
|
|
|
|
*/
|
|
|
|
/* first char after backslash already in ch */
|
|
|
|
if (!is_oct(ch)) { /* a quoted char */
|
|
|
|
switch (ch) {
|
|
|
|
case 'n':
|
|
|
|
ch = '\n';
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
ch = '\t';
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
ch = '\b';
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
ch = '\r';
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
ch = '\f';
|
|
|
|
break;
|
|
|
|
case 'a': /* alert */
|
|
|
|
ch = '\007';
|
|
|
|
break;
|
|
|
|
case 'v': /* vertical tab */
|
|
|
|
ch = '\013';
|
|
|
|
break;
|
|
|
|
case 'x': /* quoted hex */
|
|
|
|
{
|
|
|
|
register int hex = 0;
|
|
|
|
register int vch;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
ch = GetChar();
|
1989-10-18 13:12:31 +00:00
|
|
|
if (vch = hex_val(ch), vch == -1)
|
1989-02-07 11:04:05 +00:00
|
|
|
break;
|
|
|
|
hex = hex * 16 + vch;
|
|
|
|
}
|
|
|
|
UnGetChar();
|
|
|
|
ch = hex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else { /* a quoted octal */
|
|
|
|
register int oct = 0, cnt = 0;
|
|
|
|
|
|
|
|
do {
|
|
|
|
oct = oct*8 + (ch-'0');
|
|
|
|
ch = GetChar();
|
|
|
|
} while (is_oct(ch) && ++cnt < 3);
|
|
|
|
UnGetChar();
|
|
|
|
ch = oct;
|
|
|
|
}
|
|
|
|
return ch&0377;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
1989-10-18 13:12:31 +00:00
|
|
|
hex_val(ch)
|
1989-02-07 11:04:05 +00:00
|
|
|
register int ch;
|
|
|
|
{
|
1989-10-18 13:12:31 +00:00
|
|
|
return is_dig(ch) ? ch - '0'
|
1989-02-07 11:04:05 +00:00
|
|
|
: is_hex(ch) ? (ch - 'a' + 10) & 017
|
|
|
|
: -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
GetChar()
|
|
|
|
{
|
|
|
|
/* The routines GetChar and trigraph parses the trigraph
|
|
|
|
sequences and removes occurences of \\\n.
|
|
|
|
*/
|
|
|
|
register int ch;
|
|
|
|
|
|
|
|
again:
|
|
|
|
LoadChar(ch);
|
|
|
|
|
|
|
|
/* possible trigraph sequence */
|
|
|
|
if (ch == '?')
|
|
|
|
ch = trigraph();
|
|
|
|
|
|
|
|
/* \\\n are removed from the input stream */
|
|
|
|
if (ch == '\\') {
|
|
|
|
LoadChar(ch);
|
|
|
|
if (ch == '\n') {
|
|
|
|
++LineNumber;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
PushBack();
|
|
|
|
ch = '\\';
|
|
|
|
}
|
|
|
|
return(LexSave = ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
trigraph()
|
|
|
|
{
|
|
|
|
register int ch;
|
|
|
|
|
|
|
|
LoadChar(ch);
|
|
|
|
if (ch == '?') {
|
|
|
|
LoadChar(ch);
|
|
|
|
switch (ch) { /* its a trigraph */
|
|
|
|
case '=':
|
|
|
|
ch = '#';
|
|
|
|
return(ch);
|
|
|
|
case '(':
|
|
|
|
ch = '[';
|
|
|
|
return(ch);
|
|
|
|
case '/':
|
|
|
|
ch = '\\';
|
|
|
|
return(ch);
|
|
|
|
case ')':
|
|
|
|
ch = ']';
|
|
|
|
return(ch);
|
|
|
|
case '\'':
|
|
|
|
ch = '^';
|
|
|
|
return(ch);
|
|
|
|
case '<':
|
|
|
|
ch = '{';
|
|
|
|
return(ch);
|
|
|
|
case '!':
|
|
|
|
ch = '|';
|
|
|
|
return(ch);
|
|
|
|
case '>':
|
|
|
|
ch = '}';
|
|
|
|
return(ch);
|
|
|
|
case '-':
|
|
|
|
ch = '~';
|
|
|
|
return(ch);
|
|
|
|
}
|
|
|
|
PushBack();
|
|
|
|
}
|
|
|
|
PushBack();
|
|
|
|
return('?');
|
|
|
|
}
|
1989-09-19 16:13:23 +00:00
|
|
|
|
|
|
|
/* strflt2tok only checks the syntax of the floating-point number and
|
|
|
|
* selects the right type for the number.
|
|
|
|
*/
|
|
|
|
strflt2tok(fltbuf, ptok)
|
|
|
|
char fltbuf[];
|
|
|
|
struct token *ptok;
|
|
|
|
{
|
|
|
|
register char *cp = fltbuf;
|
|
|
|
int malformed = 0;
|
|
|
|
|
|
|
|
while (is_dig(*cp)) cp++;
|
|
|
|
if (*cp == '.') {
|
|
|
|
cp++;
|
|
|
|
while (is_dig(*cp)) cp++;
|
|
|
|
}
|
|
|
|
if (*cp == 'e' || *cp == 'E') {
|
|
|
|
cp++;
|
|
|
|
if (*cp == '+' || *cp == '-')
|
|
|
|
cp++;
|
|
|
|
if (!is_dig(*cp)) malformed++;
|
|
|
|
while (is_dig(*cp)) cp++;
|
|
|
|
}
|
|
|
|
if (*cp == 'f' || *cp == 'F') {
|
|
|
|
if (*(cp + 1)) malformed++;
|
|
|
|
*cp = '\0';
|
|
|
|
ptok->tk_fund = FLOAT;
|
|
|
|
} else if (*cp == 'l' || *cp == 'L') {
|
|
|
|
if (*(cp + 1)) malformed++;
|
|
|
|
*cp = '\0';
|
|
|
|
ptok->tk_fund = LNGDBL;
|
|
|
|
} else {
|
|
|
|
ptok->tk_fund = DOUBLE;
|
|
|
|
}
|
|
|
|
if (*cp) malformed++;
|
|
|
|
if (malformed) {
|
|
|
|
lexerror("malformed floating constant");
|
|
|
|
ptok->tk_fval = Salloc("0.0", (unsigned) 4);
|
|
|
|
} else {
|
|
|
|
ptok->tk_fval = Salloc(fltbuf, (unsigned) (cp - fltbuf + 1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
strint2tok(intbuf, ptok)
|
|
|
|
char intbuf[];
|
|
|
|
struct token *ptok;
|
|
|
|
{
|
|
|
|
register char *cp = intbuf;
|
|
|
|
int base = 10;
|
|
|
|
arith val = 0, dig, ubound;
|
|
|
|
int uns_flg = 0, lng_flg = 0, malformed = 0, ovfl = 0;
|
|
|
|
int fund;
|
|
|
|
|
|
|
|
ASSERT(*cp != '-');
|
|
|
|
if (*cp == '0') {
|
|
|
|
cp++;
|
|
|
|
if (*cp == 'x' || *cp == 'X') {
|
|
|
|
cp++;
|
|
|
|
base = 16;
|
|
|
|
} else base = 8;
|
|
|
|
}
|
|
|
|
/* The upperbound will be the same as when computed with
|
|
|
|
* max_unsigned_arith / base (since base is even). The problem here
|
|
|
|
* is that unsigned arith is not accepted by all compilers.
|
|
|
|
*/
|
|
|
|
ubound = max_arith / (base / 2);
|
|
|
|
|
|
|
|
while (is_hex(*cp)) {
|
1989-10-18 13:12:31 +00:00
|
|
|
dig = hex_val(*cp);
|
1989-09-19 16:13:23 +00:00
|
|
|
if (dig >= base) {
|
|
|
|
malformed++; /* ignore */
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (val < 0 || val > ubound) ovfl++;
|
|
|
|
val *= base;
|
|
|
|
if (val < 0 && val + dig >= 0) ovfl++;
|
|
|
|
val += dig;
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (*cp) {
|
|
|
|
if (*cp == 'l' || *cp == 'L') lng_flg++;
|
|
|
|
else if (*cp == 'u' || *cp == 'U') uns_flg++;
|
|
|
|
else break;
|
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
if (*cp) {
|
|
|
|
malformed++;
|
|
|
|
}
|
|
|
|
if (malformed) {
|
|
|
|
lexerror("malformed %s integer constant",
|
|
|
|
(base == 10 ? "decimal"
|
|
|
|
: (base == 8 ? "octal"
|
|
|
|
: "hexadecimal")));
|
|
|
|
} else {
|
|
|
|
if (lng_flg > 1)
|
|
|
|
lexerror("only one long suffix allowed");
|
|
|
|
if (uns_flg > 1)
|
|
|
|
lexerror("only one unsigned suffix allowed");
|
|
|
|
}
|
|
|
|
if (ovfl) {
|
|
|
|
lexwarning("overflow in constant");
|
|
|
|
fund = ULONG;
|
|
|
|
} else if ((val & full_mask[(int)int_size]) == val) {
|
|
|
|
if (val >= 0 && val <= max_int) fund = INT;
|
|
|
|
else fund = (base == 10 ? LONG : UNSIGNED);
|
|
|
|
} else if((val & full_mask[(int)long_size]) == val) {
|
|
|
|
if (val > 0) fund = LONG;
|
|
|
|
else fund = ULONG;
|
|
|
|
} else { /* sizeof(arith) is greater than long_size */
|
|
|
|
ASSERT(arith_size > long_size);
|
|
|
|
lexwarning("constant too large for target machine");
|
|
|
|
/* cut the size to prevent further complaints */
|
|
|
|
val &= full_mask[(int)long_size];
|
|
|
|
fund = ULONG;
|
|
|
|
}
|
|
|
|
if (lng_flg) {
|
|
|
|
if (fund == INT) fund = LONG;
|
|
|
|
else if (fund == UNSIGNED) fund = ULONG;
|
|
|
|
}
|
|
|
|
if (uns_flg) {
|
|
|
|
if (fund == INT) fund = UNSIGNED;
|
|
|
|
else if (fund == LONG) fund = ULONG;
|
|
|
|
}
|
|
|
|
ptok->tk_fund = fund;
|
|
|
|
ptok->tk_ival = val;
|
|
|
|
}
|