ack/lang/cem/cemcom.ansi/LLlex.c
George Koehler 15950f9c95 Add long long literals like 123LL to ACK C.
For now, a long long literal must have the 'LL' or 'll' suffix.  A
literal without 'LL' or 'll' acts as before: it may become unsigned
long but not long long.  (For targets where int and long have the same
size, some literals change from unsigned int to unsigned long.)

Type `arith` may be too narrow for long long values.  Add a second
type `writh` for wide arithmetic, and change some variables from arith
to writh.  This may cause bugs if I forget to use writh, or if a
conversion from writh to arith overflows.  I mark some conversions
with (arith) or (writh) casts.

 - BigPars, SmallPars: Remove SPECIAL_ARITHMETICS.  This feature
   would change arith to a different type, but can't work, because it
   would conflict with definitions of arith in both <em_arith.h> and
   <flt_arith.h>.
 - LLlex.c: Understand 'LL' or 'll' suffix.  Cut size of constant when
   it overflows writh, not only when it overflows the target machine's
   types.  (This cut might not be necessary, because we might cut it
   again later.)  When picking signed long or unsigned long, check the
   target's long type, not the compiler's arith type; the old check
   for `val >= 0` was broken where sizeof(arith) > 4.
 - LLlex.h: Change struct token's tok_ival to writh, so it can hold a
   long long literal.
 - arith.c: Adjust to VL_VALUE being writh.  Don't convert between
   float and integer at compile-time if the integer might be too wide
   for <flt_arith.h>.  Add writh2str(), because writh might be too
   wide for long2str().
 - arith.h: Remove SPECIAL_ARITHMETICS.  Declare full_mask[] here,
   not in several *.c files.  Declare writh2str().
 - ch3.c, ch3bin.c, ch3mon.c, declarator.c, statement.g: Remove
   obsolete casts.  Adjust to VL_VALUE being writh.
 - conversion.c, stab.c: Don't declare full_mask[].
 - cstoper.c: Use writh for constant operations on VL_VALUE, and for
   full_mask[].
 - declar., field.c, ival.g: Add casts.
 - dumpidf.c: Need to #include "parameters.h" before checking DEBUG.
   Use writh2str, because "%ld" might not work.
 - eval.c, eval.h: Add casts.  Use writh when writing a wide constant
   in EM.
 - expr.c: Add and remove casts.  In fill_int_expr(), make expression
   from long long literal.  In chk_cst_expr(), allow long long as
   constant expression, so the compiler may accept `case 123LL:` in a
   switch statement.
 - expr.str: Change struct value's vl_value and struct expr's VL_VALUE
   to writh, so an expression may have a long long value at compile
   time.
 - statement.g: Remove obsolete casts.
 - switch.c, switch.str: Use writh in case entries for switch
   statements, so `switch (ll) {...}` with long long ll works.
 - tokenname.c: Add ULNGLNG so LLlex.c can use it for literals.
2019-09-04 22:14:38 -04:00

759 lines
16 KiB
C

/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*/
/* $Id$ */
/* L E X I C A L A N A L Y Z E R */
#include <assert.h>
#include <alloc.h>
#include "parameters.h"
#include "input.h"
#include "arith.h"
#include "def.h"
#include "macro.h"
#include "idf.h"
#include "LLlex.h"
#include "Lpars.h"
#include "class.h"
#include "sizes.h"
#include "type.h" /* no_long_long() */
#include "error.h"
#include "domacro.h"
#include "specials.h" /* registration of special identifiers */
/* Data about the token yielded */
struct token dot, ahead, aside;
int token_nmb = 0; /* number of the ahead token */
int tk_nmb_at_last_syn_err = -5 /*ERR_SHADOW*/;
/* token number at last syntax error */
int idfsize = IDFSIZE;
char sp_occurred[SP_TOTAL + 1];
int AccFileSpecifier = 0; /* return filespecifier <...> */
int EoiForNewline = 0; /* return EOI upon encountering newline */
int File_Inserted = 0; /* a file has just been inserted */
int LexSave = 0; /* last character read by GetChar */
#define MAX_LL_DEPTH 2
#define FLG_ESEEN 0x01 /* possibly a floating point number */
#define FLG_DOTSEEN 0x02 /* certainly a floating point number */
#ifdef LINT
extern int lint_skip_comment;
#endif
/* Internal function declarations */
static arith char_constant(char*);
static char* string_token(char *, int , int *);
static int quoted(register int);
static int hex_val(register int);
static void strflt2tok(char [], struct token *);
static void strint2tok(char [], struct token *);
int LLlex(void)
{
/* LLlex() plays the role of Lexical Analyzer for the C parser.
The look-ahead and putting aside of tokens are taken into
account.
*/
if (ASIDE)
{ /* a token is put aside */
dot = aside;
ASIDE = 0;
}
else
{ /* read ahead and return the old one */
#ifdef LINT
lint_comment_ahead();
#endif /* LINT */
dot = ahead;
/* the following test is performed due to the dual
task of LLlex(): it is also called for parsing the
restricted constant expression following a #if or
#elif. The newline character causes EOF to be
returned in this case to stop the LLgen parsing task.
*/
if (DOT != EOI)
GetToken(&ahead);
else
DOT = EOF;
}
return DOT;
}
int GetToken(register struct token* ptok)
{
/* GetToken() is the actual token recognizer. It calls the
control line interpreter if it encounters a "\n{w}*#"
combination. Macro replacement is also performed if it is
needed.
*/
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
register int ch, nch;
token_nmb++;
if (File_Inserted)
{
File_Inserted = 0;
goto firstline;
}
again: /* rescan the input after an error or replacement */
ch = GetChar();
go_on: /* rescan, the following character has been read */
if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
{
fatal("non-ascii '\\%03o' read", ch & 0377);
}
/* keep track of the place of the token in the file */
ptok->tk_file = FileName;
ptok->tk_line = LineNumber;
switch (class(ch))
{ /* detect character class */
case STNL: /* newline, vertical space or formfeed */
firstline:
LineNumber++; /* also at vs and ff */
ptok->tk_file = FileName;
ptok->tk_line = LineNumber;
if (EoiForNewline) /* called in control line */
/* a newline in a control line indicates the
end-of-information of the line.
*/
return ptok->tk_symb = EOI;
while ((ch = GetChar()), (ch == '#'
|| class(ch) == STSKIP))
{
/* blanks are allowed before hashes */
if (ch == '#')
{
/* a control line follows */
domacro();
}
}
/* We have to loop here, because in
`domacro' the nl, vt or ff is read. The
character following it may again be a `#'.
*/
goto go_on;
case STSKIP: /* just skip the skip characters */
goto again;
case STGARB: /* garbage character */
if (040 < ch && ch < 0177)
{
return ptok->tk_symb = ch;
}
else
{
lexerror("garbage char \\%03o", ch);
}
goto again;
case STSIMP: /* a simple character, no part of compound token*/
return ptok->tk_symb = ch;
case STCOMP: /* maybe the start of a compound token */
nch = GetChar(); /* character lookahead */
switch (ch)
{
case '!':
if (nch == '=')
return ptok->tk_symb = NOTEQUAL;
break;
case '&':
if (nch == '&')
return ptok->tk_symb = AND;
if (nch == '=')
return ptok->tk_symb = ANDAB;
break;
case '+':
if (nch == '+')
return ptok->tk_symb = PLUSPLUS;
if (nch == '=')
return ptok->tk_symb = PLUSAB;
break;
case '-':
if (nch == '-')
return ptok->tk_symb = MINMIN;
if (nch == '>')
return ptok->tk_symb = ARROW;
if (nch == '=')
return ptok->tk_symb = MINAB;
break;
case '<':
if (AccFileSpecifier)
{
UnGetChar(); /* pushback nch */
ptok->tk_bts = string_token("file specifier", '>', &(ptok->tk_len));
return ptok->tk_symb = FILESPECIFIER;
}
if (nch == '<')
{
if ((nch = GetChar()) == '=')
return ptok->tk_symb = LEFTAB;
UnGetChar();
return ptok->tk_symb = LEFT;
}
if (nch == '=')
return ptok->tk_symb = LESSEQ;
break;
case '=':
if (nch == '=')
return ptok->tk_symb = EQUAL;
break;
case '>':
if (nch == '=')
return ptok->tk_symb = GREATEREQ;
if (nch == '>')
{
if ((nch = GetChar()) == '=')
return ptok->tk_symb = RIGHTAB;
UnGetChar();
return ptok->tk_symb = RIGHT;
}
break;
case '|':
if (nch == '|')
return ptok->tk_symb = OR;
if (nch == '=')
return ptok->tk_symb = ORAB;
break;
case '%':
if (nch == '=')
return ptok->tk_symb = MODAB;
break;
case '*':
if (nch == '=')
return ptok->tk_symb = TIMESAB;
break;
case '^':
if (nch == '=')
return ptok->tk_symb = XORAB;
break;
case '/':
if (nch == '=')
return ptok->tk_symb = DIVAB;
break;
default:
crash("bad class for char 0%o", ch);
/* NOTREACHED */
}
UnGetChar();
return ptok->tk_symb = ch;
case STCHAR: /* character constant */
ptok->tk_ival = char_constant("character");
ptok->tk_fund = INT;
return ptok->tk_symb = INTEGER;
case STSTR: /* string */
ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));
ptok->tk_fund = CHAR; /* string of characters */
return ptok->tk_symb = STRING;
case STELL: /* wide character constant/string prefix */
nch = GetChar();
if (nch == '"')
{
ptok->tk_bts = string_token("wide character string", '"', &(ptok->tk_len));
ptok->tk_fund = WCHAR; /* string of wide characters */
return ptok->tk_symb = STRING;
}
else if (nch == '\'')
{
ptok->tk_ival = char_constant("wide character");
ptok->tk_fund = INT;
return ptok->tk_symb = INTEGER;
}
UnGetChar();
/* fallthrough */
case STIDF:
{
register char* tg = &buf[0];
register int pos = -1;
register struct idf* idef;
extern int idfsize; /* ??? */
do
{ /* read the identifier */
if (++pos < idfsize)
{
*tg++ = ch;
}
ch = GetChar();
} while (in_idf(ch));
if (ch != EOI)
UnGetChar();
*tg++ = '\0'; /* mark the end of the identifier */
idef = ptok->tk_idf = str2idf(buf, 1);
sp_occurred[idef->id_special] = 1;
idef->id_file = ptok->tk_file;
idef->id_line = ptok->tk_line;
ptok->tk_symb
= (idef->id_reserved
? idef->id_reserved
: idef->id_def && idef->id_def->df_sc == TYPEDEF ? TYPE_IDENTIFIER
: IDENTIFIER);
return IDENTIFIER;
}
case STNUM: /* a numeric constant */
{
register int siz_left = NUMSIZE - 1;
register char* np = &buf[0];
int flags = 0;
#define store(ch) \
if (--siz_left >= 0) \
*np++ = ch;
if (ch == '.')
{
/* An embarrasing ambiguity. We have either a
pp-number, a field operator, an ELLIPSIS or
an error (..).
*/
ch = GetChar();
if (!is_dig(ch))
{ /* . or ... */
if (ch == '.')
{
if ((ch = GetChar()) == '.')
return ptok->tk_symb = ELLIPSIS;
UnGetChar(); /* not '.' */
ChPushBack('.'); /* sigh ... */
}
else
UnGetChar(); /* not '.' */
return ptok->tk_symb = '.';
}
UnGetChar();
ch = '.';
flags |= FLG_DOTSEEN;
}
store(ch);
ch = GetChar();
while (in_idf(ch) || ch == '.')
{
store(ch);
if (ch == '.')
flags |= FLG_DOTSEEN;
if (ch == 'e' || ch == 'E')
{
flags |= FLG_ESEEN;
ch = GetChar();
if (ch == '+' || ch == '-')
{
flags |= FLG_DOTSEEN; /* trick */
store(ch);
ch = GetChar();
}
}
else
ch = GetChar();
}
store('\0');
UnGetChar();
np = &buf[0];
ch = *np++;
if (siz_left < 0)
{
lexerror("number too long");
if ((flags & FLG_DOTSEEN)
|| (flags & FLG_ESEEN && !(ch == '0' && (*np == 'x' || *np == 'X'))))
{
ptok->tk_fval = Salloc("0.0", (unsigned)4);
ptok->tk_fund = DOUBLE;
return ptok->tk_symb = FLOATING;
}
ptok->tk_ival = 1;
ptok->tk_fund = ULONG;
ptok->tk_symb = INTEGER;
}
/* Now, the pp-number must be converted into a token */
if ((flags & FLG_DOTSEEN)
|| (flags & FLG_ESEEN && !(ch == '0' && (*np == 'x' || *np == 'X'))))
{
strflt2tok(&buf[0], ptok);
return ptok->tk_symb = FLOATING;
}
strint2tok(&buf[0], ptok);
return ptok->tk_symb = INTEGER;
}
case STEOI: /* end of text on source file */
return ptok->tk_symb = EOI;
default: /* this cannot happen */
crash("bad class for char 0%o", ch);
}
/*NOTREACHED*/
}
static arith char_constant(char* nm)
{
register arith val = 0;
register int ch;
int size = 0;
ch = GetChar();
if (ch == '\'')
lexerror("%s constant too short", nm);
else
while (ch != '\'')
{
if (ch == '\n')
{
lexerror("newline in %s constant", nm);
LineNumber++;
break;
}
if (ch == '\\')
ch = quoted(GetChar());
if (ch >= 128)
ch -= 256;
if (size < (int)int_size)
val |= ch << 8 * size;
size++;
ch = GetChar();
}
if (size > 1)
lexstrict("%s constant includes more than one character", nm);
if (size > (int)int_size)
lexerror("%s constant too long", nm);
return val;
}
static char* string_token(char *nm, int stop_char, int *plen)
{
register int ch;
register int str_size;
register char* str = Malloc((unsigned)(str_size = ISTRSIZE));
register int pos = 0;
ch = GetChar();
while (ch != stop_char)
{
if (ch == '\n')
{
lexerror("newline in %s", nm);
LineNumber++;
break;
}
if (ch == EOI)
{
lexerror("end-of-file inside %s", nm);
break;
}
if (ch == '\\' && !AccFileSpecifier)
ch = quoted(GetChar());
str[pos++] = ch;
if (pos == str_size)
str = Realloc(str, (unsigned)(str_size += RSTRSIZE));
ch = GetChar();
}
str[pos++] = '\0'; /* for filenames etc. */
*plen = pos;
return str;
}
static int quoted(register int ch)
{
/* quoted() replaces an escaped character sequence by the
character meant.
*/
/* first char after backslash already in ch */
if (!is_oct(ch))
{ /* a quoted char */
switch (ch)
{
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'b':
ch = '\b';
break;
case 'r':
ch = '\r';
break;
case 'f':
ch = '\f';
break;
case 'a': /* alert */
ch = '\007';
break;
case 'v': /* vertical tab */
ch = '\013';
break;
case 'x': /* quoted hex */
{
register int hex = 0;
register int vch;
for (;;)
{
ch = GetChar();
if ((vch = hex_val(ch)) == -1)
break;
hex = hex * 16 + vch;
}
UnGetChar();
ch = hex;
}
}
}
else
{ /* a quoted octal */
register int oct = 0, cnt = 0;
do
{
oct = oct * 8 + (ch - '0');
ch = GetChar();
} while (is_oct(ch) && ++cnt < 3);
UnGetChar();
ch = oct;
}
return ch & 0377;
}
static int hex_val(register int ch)
{
return is_dig(ch) ? ch - '0' : is_hex(ch) ? (ch - 'a' + 10) & 017 : -1;
}
int GetChar(void)
{
/* The routines GetChar and trigraph parses the trigraph
sequences and removes occurences of \\\n.
*/
register int ch;
LoadChar(ch);
return (LexSave = ch);
}
/* strflt2tok only checks the syntax of the floating-point number and
* selects the right type for the number.
*/
static void strflt2tok(char fltbuf[], struct token* ptok)
{
register char* cp = fltbuf;
int malformed = 0;
while (is_dig(*cp))
cp++;
if (*cp == '.')
{
cp++;
while (is_dig(*cp))
cp++;
}
if (*cp == 'e' || *cp == 'E')
{
cp++;
if (*cp == '+' || *cp == '-')
cp++;
if (!is_dig(*cp))
malformed++;
while (is_dig(*cp))
cp++;
}
if (*cp == 'f' || *cp == 'F')
{
if (*(cp + 1))
malformed++;
*cp = '\0';
ptok->tk_fund = FLOAT;
}
else if (*cp == 'l' || *cp == 'L')
{
if (*(cp + 1))
malformed++;
*cp = '\0';
ptok->tk_fund = LNGDBL;
}
else
{
if (*cp)
malformed++;
ptok->tk_fund = DOUBLE;
}
if (malformed)
{
lexerror("malformed floating constant");
ptok->tk_fval = Salloc("0.0", (unsigned)4);
}
else
{
ptok->tk_fval = Salloc(fltbuf, (unsigned)(cp - fltbuf + 1));
}
}
static void strint2tok(char intbuf[], struct token* ptok)
{
register char* cp = intbuf;
int base = 10, dig;
unsigned writh val = 0, ubound;
int uns_flg = 0, lng_flg = 0, lnglng_flg = 0;
int malformed = 0, ovfl = 0;
unsigned writh uint_mask, ulng_mask, ulnglng_mask;
int cut, fund;
assert(*cp != '-');
if (*cp == '0')
{
cp++;
if (*cp == 'x' || *cp == 'X')
{
cp++;
base = 16;
}
else
base = 8;
}
/* The upperbound checks if val * base would overflow. */
ubound = ~(unsigned writh)0 / base;
while (is_hex(*cp))
{
dig = hex_val(*cp);
if (dig >= base)
{
malformed++; /* ignore */
}
else
{
if (val > ubound)
ovfl++;
val *= base;
if (val > val + dig)
ovfl++;
val += dig;
}
cp++;
}
while (*cp)
{
if (*cp == 'l' || *cp == 'L')
{
if (*cp == *(cp + 1))
{
/* 'll' or 'LL' */
lnglng_flg++;
cp++;
}
else
lng_flg++;
}
else if (*cp == 'u' || *cp == 'U')
uns_flg++;
else
break;
cp++;
}
if (*cp)
{
malformed++;
}
if (malformed)
{
lexerror(
"malformed %s integer constant",
(base == 10 ? "decimal" : (base == 8 ? "octal" : "hexadecimal")));
}
else
{
if (lng_flg + lnglng_flg > 1)
lexerror("only one long suffix allowed");
if (uns_flg > 1)
lexerror("only one unsigned suffix allowed");
}
/* Get masks like 0XFFFF, 0XFFFFFFFF as unsigned values. */
uint_mask = (unsigned writh)full_mask[(int)int_size];
ulng_mask = (unsigned writh)full_mask[(int)long_size];
if (lnglng_size < 0)
ulnglng_mask = 0;
else
ulnglng_mask = (unsigned writh)full_mask[(int)lnglng_size];
/* If a decimal literal with no suffix is too big for int
and long, then C89 tries unsigned long, but C99 tries
long long (WG14, Rationale for C99, C99RationaleV5.10.pdf,
6.4.4.1 Integer constants).
This compiler follows C89 when the literal has no
long long suffix.
*/
cut = 0;
if (ovfl)
{
lexwarning("overflow in constant");
cut = 1; /* cut the size of the constant */
}
else if (!lng_flg && !lnglng_flg && (val & uint_mask) == val)
{
if ((val & (uint_mask >> 1)) == val)
fund = INT;
else if (base == 10 && !uns_flg)
{
if ((val & (ulng_mask >> 1)) == val)
fund = LONG;
else
fund = ULONG;
}
else
fund = UNSIGNED;
}
else if (!lnglng_flg && (val & ulng_mask) == val)
{
if ((val & (ulng_mask >> 1)) == val)
fund = LONG;
else
fund = ULONG;
}
else if (lnglng_flg && (val & ulnglng_mask) == val)
{
if ((val & (ulnglng_mask >> 1)) == val)
fund = LNGLNG;
else
fund = ULNGLNG;
}
else if (lnglng_flg && no_long_long())
fund = ERRONEOUS;
else
{
assert(sizeof(val) > long_size ||
(lnglng_size >= 0 && sizeof(val) > lnglng_size));
lexwarning("constant too large for target machine");
cut = 1;
}
if (cut)
{
/* cut the size to prevent further complaints */
if (lnglng_flg)
{
fund = ULNGLNG;
val &= ulnglng_mask;
}
else
{
fund = ULONG;
val &= ulng_mask;
}
}
if (uns_flg)
{
if (fund == INT)
fund = UNSIGNED;
else if (fund == LONG)
fund = ULONG;
else if (fund == LNGLNG)
fund = ULNGLNG;
}
ptok->tk_fund = fund;
ptok->tk_ival = (writh)val;
}