ack/lang/cem/cpp.ansi/LLlex.c

666 lines
14 KiB
C

/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*/
/* $Id$ */
/* L E X I C A L A N A L Y Z E R */
#include "parameters.h"
#include <string.h>
#include <alloc.h>
#include "input.h"
#include "arith.h"
#include "macro.h"
#include "idf.h"
#include "LLlex.h"
#include "Lpars.h"
#include "replace.h"
#include "class.h"
#include "error.h"
#include "bits.h"
#define BUFSIZ 1024
struct token dot;
int ReplaceMacros = 1; /* replacing macros */
int AccDefined = 0; /* accept "defined(...)" */
int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
int Unstacked = 0; /* an unstack is done */
int AccFileSpecifier = 0; /* return filespecifier <...> */
int LexSave = 0; /* last character read by GetChar */
extern int InputLevel; /* # of current macro expansions */
#define FLG_ESEEN 0x01 /* possibly a floating point number */
#define FLG_DOTSEEN 0x02 /* certainly a floating point number */
/* Private forward definitions */
static arith char_constant(char*);
static char* string_token(char *, int);
static int quoted(register int);
static int val_in_base(register int, int);
static int trigraph(void);
int LLlex(void)
{
return (DOT != EOF) ? GetToken(&dot) : EOF;
}
int GetToken(register struct token* ptok)
{
/* GetToken() is the actual token recognizer. It calls the
control line interpreter if it encounters a "\n{w}*#"
combination. Macro replacement is also performed if it is
needed.
*/
char buf[BUFSIZ];
register int ch, nch;
again: /* rescan the input after an error or replacement */
ch = GetChar();
/* rescan, the following character has been read */
if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
fatal("non-ascii '\\%03o' read", ch & 0377);
/* keep track of the place of the token in the file */
switch (class(ch))
{ /* detect character class */
case STNL: /* newline, vertical space or formfeed */
LineNumber++;
return ptok->tk_symb = EOF;
case STSKIP: /* just skip the skip characters */
goto again;
case STGARB: /* garbage character */
garbage:
if (040 < ch && ch < 0177)
error("garbage char %c", ch);
else
error("garbage char \\%03o", ch);
goto again;
case STSIMP: /* a simple character, no part of compound token*/
return ptok->tk_symb = ch;
case STCOMP: /* maybe the start of a compound token */
nch = GetChar(); /* character lookahead */
switch (ch)
{
case '!':
if (nch == '=')
return ptok->tk_symb = NOTEQUAL;
UnGetChar();
return ptok->tk_symb = ch;
case '&':
if (nch == '&')
return ptok->tk_symb = AND;
else if (nch == '=')
return ptok->tk_symb = ANDAB;
UnGetChar();
return ptok->tk_symb = ch;
case '+':
if (nch == '+')
return ptok->tk_symb = PLUSPLUS;
else if (nch == '=')
return ptok->tk_symb = PLUSAB;
UnGetChar();
return ptok->tk_symb = ch;
case '-':
if (nch == '-')
return ptok->tk_symb = MINMIN;
else if (nch == '>')
return ptok->tk_symb = ARROW;
else if (nch == '=')
return ptok->tk_symb = MINAB;
UnGetChar();
return ptok->tk_symb = ch;
case '<':
if (AccFileSpecifier)
{
UnGetChar(); /* pushback nch */
ptok->tk_str = string_token("file specifier", '>');
return ptok->tk_symb = FILESPECIFIER;
}
else if (nch == '<')
{
if ((nch = GetChar()) == '=')
return ptok->tk_symb = LEFTAB;
UnGetChar();
return ptok->tk_symb = LEFT;
}
else if (nch == '=')
return ptok->tk_symb = LESSEQ;
UnGetChar();
return ptok->tk_symb = ch;
case '=':
if (nch == '=')
return ptok->tk_symb = EQUAL;
UnGetChar();
return ptok->tk_symb = ch;
case '>':
if (nch == '=')
return ptok->tk_symb = GREATEREQ;
else if (nch == '>')
{
if ((nch = GetChar()) == '=')
return ptok->tk_symb = RIGHTAB;
UnGetChar();
return ptok->tk_symb = RIGHT;
}
UnGetChar();
return ptok->tk_symb = ch;
case '|':
if (nch == '|')
return ptok->tk_symb = OR;
else if (nch == '=')
return ptok->tk_symb = ORAB;
UnGetChar();
return ptok->tk_symb = ch;
case '%':
if (nch == '=')
return ptok->tk_symb = MODAB;
UnGetChar();
return ptok->tk_symb = ch;
case '*':
if (nch == '=')
return ptok->tk_symb = TIMESAB;
UnGetChar();
return ptok->tk_symb = ch;
case '^':
if (nch == '=')
return ptok->tk_symb = XORAB;
UnGetChar();
return ptok->tk_symb = ch;
case '/':
if (!InputLevel)
{
if (nch == '*')
{
skipcomment();
goto again;
}
else if (nch == '/')
{
skiplinecomment();
goto again;
}
}
else if (nch == '=')
return ptok->tk_symb = DIVAB;
UnGetChar();
return ptok->tk_symb = ch;
default:
crash("bad class for char 0%o", ch);
/* NOTREACHED */
}
case STCHAR: /* character constant */
ptok->tk_val = char_constant("character");
return ptok->tk_symb = INTEGER;
case STSTR: /* string */
ptok->tk_str = string_token("string", '"');
return ptok->tk_symb = STRING;
case STELL: /* wide character constant/string prefix */
nch = GetChar();
if (nch == '"')
{
ptok->tk_str = string_token("wide character string", '"');
return ptok->tk_symb = STRING;
}
else if (nch == '\'')
{
ptok->tk_val = char_constant("wide character");
return ptok->tk_symb = INTEGER;
}
UnGetChar();
/* fallthrough */
case STIDF:
{
extern int idfsize; /* ??? */
register char* tg = &buf[0];
register char* maxpos = &buf[idfsize];
int NoExpandNext = 0;
#define tstmac(bx) \
if (!(bits[ch] & bx)) \
goto nomac
#define cpy *tg++ = ch
#define load \
(ch = GetChar()); \
if (!in_idf(ch)) \
goto endidf
if (Unstacked)
EnableMacros(); /* unstack macro's when allowed. */
if (ch == NOEXPM)
{
NoExpandNext = 1;
ch = GetChar();
}
#ifdef DOBITS
cpy;
tstmac(bit0);
load;
cpy;
tstmac(bit1);
load;
cpy;
tstmac(bit2);
load;
cpy;
tstmac(bit3);
load;
cpy;
tstmac(bit4);
load;
cpy;
tstmac(bit5);
load;
cpy;
tstmac(bit6);
load;
cpy;
tstmac(bit7);
load;
#endif
for (;;)
{
if (tg < maxpos)
{
cpy;
}
load;
}
endidf:
/*if (ch != EOI) UnGetChar();*/
UnGetChar();
*tg++ = '\0'; /* mark the end of the identifier */
if (ReplaceMacros)
{
register struct idf* idef = findidf(buf);
if (idef && idef->id_macro && !NoExpandNext)
{
if (replace(idef))
goto again;
}
}
nomac: /* buf can already be null-terminated. soit */
ch = GetChar();
while (in_idf(ch))
{
if (tg < maxpos)
*tg++ = ch;
ch = GetChar();
}
UnGetChar();
*tg++ = '\0'; /* mark the end of the identifier */
NoExpandNext = 0;
if (UnknownIdIsZero)
{
ptok->tk_val = (arith)0;
return ptok->tk_symb = INTEGER;
}
ptok->tk_str = Malloc((unsigned)(tg - buf));
strcpy(ptok->tk_str, buf);
return IDENTIFIER;
}
case STNUM: /* a numeric constant */
{ /* it may only be an integer constant */
register int base = 10, vch;
register arith val = 0;
int ovfl = 0;
arith ubound = max_arith / (base / 2);
/* Since the preprocessor only knows integers and has
* nothing to do with ellipsis we just return when the
* pp-number starts with a '.'
*/
if (ch == '.')
{
return ptok->tk_symb = ch;
}
if (ch == '0')
{
ch = GetChar();
if (ch == 'x' || ch == 'X')
{
base = 16;
ch = GetChar();
}
else
{
base = 8;
}
}
while ((vch = val_in_base(ch, base)) >= 0)
{
if (val < 0 || val > ubound)
ovfl++;
val *= base;
if (val < 0 && val + vch >= 0)
ovfl++;
val += vch;
ch = GetChar();
}
ptok->tk_unsigned = 0;
if (ch == 'u' || ch == 'U')
{
ptok->tk_unsigned = 1;
ch = GetChar();
if (ch == 'l' || ch == 'L')
{
ch = GetChar();
}
}
else if (ch == 'l' || ch == 'L')
{
ch = GetChar();
if (ch == 'u' || ch == 'U')
{
ptok->tk_unsigned = 1;
ch = GetChar();
}
}
if (ovfl)
{
warning("overflow in constant");
ptok->tk_unsigned = 1;
}
else if (val < 0)
{
/* give warning??? */
ptok->tk_unsigned = 1;
}
UnGetChar();
ptok->tk_val = val;
return ptok->tk_symb = INTEGER;
}
case STEOI: /* end of text on source file */
return ptok->tk_symb = EOF;
case STMSPEC:
if (!InputLevel)
goto garbage;
if (ch == TOKSEP)
goto again;
/* fallthrough shouldn't happen */
default: /* this cannot happen */
crash("bad class for char 0%o", ch);
}
/*NOTREACHED*/
}
void skipcomment(void)
{
/* The last character read has been the '*' of '/_*'. The
characters, except NL and EOI, between '/_*' and the first
occurring '*_/' are not interpreted.
NL only affects the LineNumber. EOI is not legal.
Important note: it is not possible to stop skipping comment
beyond the end-of-file of an included file.
EOI is returned by LoadChar only on encountering EOF of the
top-level file...
*/
register int c;
NoUnstack++;
c = GetChar();
do
{
while (c != '*')
{
if (class(c) == STNL)
{
++LineNumber;
}
else if (c == EOI)
{
NoUnstack--;
return;
}
c = GetChar();
} /* last Character seen was '*' */
c = GetChar();
} while (c != '/');
NoUnstack--;
}
void skiplinecomment(void)
{
/* The last character read has been the '/' of '//'. We read
and discard all characters up to but not including the next
NL. */
for (;;) {
int c = GetChar();
if ((class(c) == STNL) || (c == EOI))
{
UnGetChar();
break;
}
}
}
static arith char_constant(char* nm)
{
register arith val = 0;
register int ch;
int size = 0;
ch = GetChar();
if (ch == '\'')
error("%s constant too short", nm);
else
while (ch != '\'')
{
if (ch == '\n')
{
error("newline in %s constant", nm);
LineNumber++;
break;
}
if (ch == '\\')
ch = quoted(GetChar());
if (ch >= 128)
ch -= 256;
if (size < sizeof(arith))
val |= ch << (8 * size);
size++;
ch = GetChar();
}
if (size > sizeof(arith))
error("%s constant too long", nm);
else if (size > 1)
strict("%s constant includes more than one character", nm);
return val;
}
static char* string_token(char *nm, int stop_char)
{
register int ch;
register int str_size;
register char* str = Malloc((unsigned)(str_size = ISTRSIZE));
register int pos = 0;
ch = GetChar();
while (ch != stop_char)
{
if (ch == '\n')
{
error("newline in %s", nm);
LineNumber++;
break;
}
if (ch == EOI)
{
error("end-of-file inside %s", nm);
break;
}
if (ch == '\\' && !AccFileSpecifier)
ch = quoted(GetChar());
str[pos++] = ch;
if (pos == str_size)
str = Realloc(str, (unsigned)(str_size <<= 1));
ch = GetChar();
}
str[pos++] = '\0'; /* for filenames etc. */
str = Realloc(str, (unsigned)pos);
return str;
}
static int quoted(register int ch)
{
/* quoted() replaces an escaped character sequence by the
character meant.
*/
/* first char after backslash already in ch */
if (!is_oct(ch))
{ /* a quoted char */
switch (ch)
{
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'b':
ch = '\b';
break;
case 'r':
ch = '\r';
break;
case 'f':
ch = '\f';
break;
case 'a': /* alert */
ch = '\007';
break;
case 'v': /* vertical tab */
ch = '\013';
break;
case 'x': /* quoted hex */
{
register int hex = 0;
register int vch;
for (;;)
{
ch = GetChar();
if (vch = val_in_base(ch, 16), vch == -1)
break;
hex = hex * 16 + vch;
}
UnGetChar();
ch = hex;
}
}
}
else
{ /* a quoted octal */
register int oct = 0, cnt = 0;
do
{
oct = oct * 8 + (ch - '0');
ch = GetChar();
} while (is_oct(ch) && ++cnt < 3);
UnGetChar();
ch = oct;
}
return ch & 0377;
}
static int val_in_base(register int ch, int base)
{
switch (base)
{
case 8:
return (is_dig(ch) && ch < '9') ? ch - '0' : -1;
case 10:
return is_dig(ch) ? ch - '0' : -1;
case 16:
return is_dig(ch) ? ch - '0' : is_hex(ch) ? (ch - 'a' + 10) & 017 : -1;
default:
fatal("(val_in_base) illegal base value %d", base);
/* NOTREACHED */
}
}
int GetChar(void)
{
/* The routines GetChar and trigraph parses the trigraph
sequences and removes occurences of \\\n.
*/
register int ch;
again:
LoadChar(ch);
/* possible trigraph sequence */
if (ch == '?')
ch = trigraph();
/* \\\n are removed from the input stream */
if (ch == '\\')
{
LoadChar(ch);
if (ch == '\n')
{
++LineNumber;
goto again;
}
PushBack();
ch = '\\';
}
return (LexSave = ch);
}
static int trigraph(void)
{
register int ch;
LoadChar(ch);
if (ch == '?')
{
LoadChar(ch);
switch (ch)
{ /* its a trigraph */
case '=':
ch = '#';
return (ch);
case '(':
ch = '[';
return (ch);
case '/':
ch = '\\';
return (ch);
case ')':
ch = ']';
return (ch);
case '\'':
ch = '^';
return (ch);
case '<':
ch = '{';
return (ch);
case '!':
ch = '|';
return (ch);
case '>':
ch = '}';
return (ch);
case '-':
ch = '~';
return (ch);
}
PushBack();
}
PushBack();
return ('?');
}