ack/lang/m2/comp/LLlex.c

642 lines
12 KiB
C
Raw Normal View History

/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*
* Author: Ceriel J.H. Jacobs
*/
1986-03-26 15:11:02 +00:00
/* L E X I C A L A N A L Y S E R F O R M O D U L A - 2 */
1986-03-20 14:52:03 +00:00
/* $Header$ */
1986-05-01 19:06:53 +00:00
#include "debug.h"
#include "idfsize.h"
#include "numsize.h"
#include "strsize.h"
1986-03-26 15:11:02 +00:00
#include <alloc.h>
#include <em_arith.h>
1986-04-15 17:51:53 +00:00
#include <em_label.h>
1986-04-02 17:34:21 +00:00
#include <assert.h>
1986-04-17 09:28:09 +00:00
#include "LLlex.h"
1986-03-26 15:11:02 +00:00
#include "input.h"
#include "f_info.h"
#include "Lpars.h"
#include "class.h"
#include "idf.h"
#include "def.h"
1986-04-15 17:51:53 +00:00
#include "type.h"
#include "const.h"
1986-11-05 14:33:00 +00:00
#include "warning.h"
1986-03-26 15:11:02 +00:00
1986-03-20 14:52:03 +00:00
long str2long();
t_token dot,
1986-09-25 19:39:06 +00:00
aside;
t_type *toktype;
int idfsize = IDFSIZE;
int ForeignFlag;
1986-06-06 09:35:11 +00:00
#ifdef DEBUG
extern int cntlines;
#endif
1986-03-20 14:52:03 +00:00
extern char options[];
extern int flt_status;
1986-11-05 14:33:00 +00:00
1986-06-17 12:04:05 +00:00
STATIC
1986-03-24 17:29:57 +00:00
SkipComment()
1986-03-20 14:52:03 +00:00
{
1986-04-03 17:41:26 +00:00
/* Skip Modula-2 comments (* ... *).
Note that comments may be nested (par. 3.5).
*/
register int ch, c;
1986-10-06 20:36:30 +00:00
register int CommentLevel = 0;
1986-03-24 17:29:57 +00:00
1986-10-06 20:36:30 +00:00
LoadChar(ch);
if (ch == '$') {
LoadChar(ch);
switch(ch) {
case 'F':
/* Foreign; This definition module has an
implementation in another language.
In this case, don't generate prefixes in front
of the names. Also, don't generate call to
initialization routine.
*/
ForeignFlag = D_FOREIGN;
break;
case 'U':
inidf['_'] = 1;
break;
case 'A': /* Extra array bound checks, on or off */
case 'R': /* Range checks, on or off */
{
int on_on_minus = ch == 'R';
LoadChar(c);
if (c == '-') {
options[ch] = on_on_minus;
break;
}
if (c == '+') {
options[ch] = !on_on_minus;
break;
}
ch = c;
}
/* fall through */
default:
break;
}
}
1986-03-24 17:29:57 +00:00
for (;;) {
if (!(ch & 0200) && class(ch) == STNL) {
1986-03-24 17:29:57 +00:00
LineNumber++;
1986-06-06 09:35:11 +00:00
#ifdef DEBUG
cntlines++;
#endif
1986-03-24 17:29:57 +00:00
}
1986-06-17 12:04:05 +00:00
else if (ch == '(') {
1986-03-24 17:29:57 +00:00
LoadChar(ch);
1986-10-06 20:36:30 +00:00
if (ch == '*') CommentLevel++;
else continue;
1986-03-24 17:29:57 +00:00
}
1986-06-17 12:04:05 +00:00
else if (ch == '*') {
1986-03-24 17:29:57 +00:00
LoadChar(ch);
1986-10-06 20:36:30 +00:00
if (ch == ')') {
CommentLevel--;
if (CommentLevel < 0) break;
}
else continue;
1986-03-24 17:29:57 +00:00
}
1986-10-06 20:36:30 +00:00
else if (ch == EOI) {
lexerror("unterminated comment");
PushBack();
1986-10-06 20:36:30 +00:00
break;
}
LoadChar(ch);
1986-03-20 14:52:03 +00:00
}
1986-03-24 17:29:57 +00:00
}
1986-03-20 14:52:03 +00:00
1986-09-25 19:39:06 +00:00
STATIC struct string *
1986-03-24 17:29:57 +00:00
GetString(upto)
{
1986-04-03 17:41:26 +00:00
/* Read a Modula-2 string, delimited by the character "upto".
*/
1986-03-24 17:29:57 +00:00
register int ch;
register struct string *str = (struct string *)
Malloc((unsigned) sizeof(struct string));
1986-04-04 13:47:04 +00:00
register char *p;
register int len;
1986-03-24 17:29:57 +00:00
len = ISTRSIZE;
1986-10-06 20:36:30 +00:00
str->s_str = p = Malloc((unsigned int) ISTRSIZE);
1986-09-25 19:39:06 +00:00
while (LoadChar(ch), ch != upto) {
if (!(ch & 0200) && class(ch) == STNL) {
1986-03-24 17:29:57 +00:00
lexerror("newline in string");
LineNumber++;
1986-06-06 09:35:11 +00:00
#ifdef DEBUG
cntlines++;
#endif
1986-03-24 17:29:57 +00:00
break;
}
1986-09-25 19:39:06 +00:00
if (ch == EOI) {
1986-03-24 17:29:57 +00:00
lexerror("end-of-file in string");
break;
}
1986-04-04 13:47:04 +00:00
*p++ = ch;
if (p - str->s_str == len) {
1987-08-10 13:01:54 +00:00
str->s_str = Realloc(str->s_str,
(unsigned int) len + RSTRSIZE);
p = str->s_str + len;
len += RSTRSIZE;
1986-03-24 17:29:57 +00:00
}
}
1986-04-04 13:47:04 +00:00
str->s_length = p - str->s_str;
len = (str->s_length+(int)word_size) & ~((int)word_size-1);
while (p - str->s_str < len) {
*p++ = '\0';
}
str->s_str = Realloc(str->s_str, (unsigned) len);
1986-11-26 16:40:45 +00:00
if (str->s_length == 0) str->s_length = 1;
/* ??? string length at least 1 ??? */
1986-09-25 19:39:06 +00:00
return str;
1986-03-20 14:52:03 +00:00
}
1986-11-05 14:33:00 +00:00
static char *s_error = "illegal line directive";
STATIC int
getch()
{
register int ch;
while (LoadChar(ch), (ch & 0200) && ch != EOI) {
error("non-ascii '\\%03o' read", ch & 0377);
1986-11-05 14:33:00 +00:00
}
return ch;
}
CheckForLineDirective()
{
register int ch = getch();
1986-11-05 14:33:00 +00:00
register int i = 0;
char buf[IDFSIZE];
1986-11-05 14:33:00 +00:00
register char *c = buf;
if (ch != '#') {
PushBack();
return;
}
1986-11-05 14:33:00 +00:00
do { /*
* Skip to next digit
* Do not skip newlines
*/
ch = getch();
if (class(ch) == STNL || class(ch) == STEOI) {
1986-11-05 14:33:00 +00:00
LineNumber++;
error(s_error);
return;
}
} while (class(ch) != STNUM);
while (class(ch) == STNUM) {
1986-11-05 14:33:00 +00:00
i = i*10 + (ch - '0');
ch = getch();
}
while (ch != '"' && class(ch) != STNL && class(ch) != STEOI)
ch = getch();
1986-11-05 14:33:00 +00:00
if (ch == '"') {
c = buf;
do {
ch = getch();
if (c < &buf[IDFSIZE]) *c++ = ch;
if (class(ch) == STNL || class(ch) == STEOI) {
1986-11-05 14:33:00 +00:00
LineNumber++;
error(s_error);
return;
}
} while (ch != '"');
*--c = '\0';
do {
ch = getch();
} while (class(ch) != STNL && class(ch) != STEOI);
1986-11-05 14:33:00 +00:00
/*
* Remember the file name
*/
if (class(ch) == STNL && strcmp(FileName,buf)) {
FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
1986-11-05 14:33:00 +00:00
}
}
if (class(ch) == STEOI) {
1986-11-05 14:33:00 +00:00
error(s_error);
return;
}
LineNumber = i;
}
1989-12-07 16:28:05 +00:00
STATIC
CheckForLet()
{
register int ch;
LoadChar(ch);
if (ch != EOI) {
if (class(ch) == STIDF) {
lexerror("token separator required between identifier and number");
}
PushBack();
}
}
1986-03-20 14:52:03 +00:00
int
1986-03-24 17:29:57 +00:00
LLlex()
1986-03-20 14:52:03 +00:00
{
1986-04-17 09:28:09 +00:00
/* LLlex() is the Lexical Analyzer.
1986-04-03 17:41:26 +00:00
The putting aside of tokens is taken into account.
*/
register t_token *tk = &dot;
1986-06-04 09:01:48 +00:00
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 2];
1986-03-20 14:52:03 +00:00
register int ch, nch;
1986-06-04 09:01:48 +00:00
toktype = error_type;
1986-06-17 12:04:05 +00:00
1986-03-24 17:29:57 +00:00
if (ASIDE) { /* a token is put aside */
*tk = aside;
ASIDE = 0;
return tk->tk_symb;
}
1986-06-17 12:04:05 +00:00
1986-03-20 14:52:03 +00:00
again:
ch = getch();
tk->tk_lineno = LineNumber;
1986-03-20 14:52:03 +00:00
switch (class(ch)) {
case STNL:
LineNumber++;
1986-06-06 09:35:11 +00:00
#ifdef DEBUG
cntlines++;
#endif
CheckForLineDirective();
goto again;
1986-09-25 19:39:06 +00:00
case STSKIP:
1986-03-20 14:52:03 +00:00
goto again;
case STGARB:
1986-11-26 16:40:45 +00:00
if ((unsigned) ch - 040 < 0137) {
1986-03-20 14:52:03 +00:00
lexerror("garbage char %c", ch);
}
1986-11-26 16:40:45 +00:00
else lexerror("garbage char \\%03o", ch);
1986-03-20 14:52:03 +00:00
goto again;
case STSIMP:
if (ch == '(') {
LoadChar(nch);
if (nch == '*') {
SkipComment();
goto again;
}
PushBack();
1986-03-20 14:52:03 +00:00
}
if (ch == '&') return tk->tk_symb = AND;
if (ch == '~') return tk->tk_symb = NOT;
1986-03-20 14:52:03 +00:00
return tk->tk_symb = ch;
case STCOMP:
LoadChar(nch);
switch (ch) {
case '.':
if (nch == '.') {
return tk->tk_symb = UPTO;
}
1986-09-25 19:39:06 +00:00
break;
1986-03-20 14:52:03 +00:00
case ':':
if (nch == '=') {
return tk->tk_symb = BECOMES;
}
1986-09-25 19:39:06 +00:00
break;
1986-03-20 14:52:03 +00:00
case '<':
if (nch == '=') {
return tk->tk_symb = LESSEQUAL;
}
1986-05-30 18:48:00 +00:00
if (nch == '>') {
return tk->tk_symb = '#';
}
1986-09-25 19:39:06 +00:00
break;
1986-03-20 14:52:03 +00:00
case '>':
if (nch == '=') {
return tk->tk_symb = GREATEREQUAL;
}
1986-09-25 19:39:06 +00:00
break;
1986-03-20 14:52:03 +00:00
default :
1986-06-06 02:22:09 +00:00
crash("(LLlex, STCOMP)");
1986-03-20 14:52:03 +00:00
}
PushBack();
1986-09-25 19:39:06 +00:00
return tk->tk_symb = ch;
1986-03-20 14:52:03 +00:00
case STIDF:
{
1986-09-25 19:39:06 +00:00
register char *tag = &buf[0];
register t_idf *id;
1986-03-20 14:52:03 +00:00
do {
1986-09-25 19:39:06 +00:00
if (tag - buf < idfsize) *tag++ = ch;
1986-03-20 14:52:03 +00:00
LoadChar(ch);
if (ch == '_' && *(tag-1) == '_') {
lexerror("an identifier may not contain two consecutive underscores");
}
1986-03-20 14:52:03 +00:00
} while(in_idf(ch));
PushBack();
1988-03-23 17:44:25 +00:00
*tag = '\0';
if (*(tag - 1) == '_') {
lexerror("last character of an identifier may not be an underscore");
1988-03-23 17:44:25 +00:00
}
1986-03-20 14:52:03 +00:00
1986-03-26 22:46:48 +00:00
tk->TOK_IDF = id = str2idf(buf, 1);
1986-03-20 14:52:03 +00:00
return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
}
1986-09-25 19:39:06 +00:00
case STSTR: {
register struct string *str = GetString(ch);
if (str->s_length == 1) {
tk->TOK_INT = *(str->s_str) & 0377;
1986-06-04 09:01:48 +00:00
toktype = char_type;
1986-09-25 19:39:06 +00:00
free(str->s_str);
free((char *) str);
1986-06-04 09:01:48 +00:00
}
else {
1986-09-25 19:39:06 +00:00
tk->tk_data.tk_str = str;
if (! fit((arith)(str->s_length), (int) word_size)) {
lexerror("string too long");
}
toktype = standard_type(T_STRING, 1, (arith)(str->s_length));
1986-06-04 09:01:48 +00:00
}
1986-03-20 14:52:03 +00:00
return tk->tk_symb = STRING;
1986-09-25 19:39:06 +00:00
}
1986-03-20 14:52:03 +00:00
case STNUM:
{
/* The problem arising with the "parsing" of a number
is that we don't know the base in advance so we
have to read the number with the help of a rather
complex finite automaton.
*/
enum statetp {Oct,OptHex,Hex,Dec,OctEndOrHex,End,OptReal,Real};
1986-09-25 19:39:06 +00:00
register enum statetp state;
1988-10-13 15:43:23 +00:00
register int base = 8;
register char *np = &buf[0];
1986-03-20 14:52:03 +00:00
/* allow a '-' to be added */
*np++ = ch;
1986-09-25 19:39:06 +00:00
state = is_oct(ch) ? Oct : Dec;
1986-03-20 14:52:03 +00:00
LoadChar(ch);
1986-09-25 19:39:06 +00:00
for (;;) {
switch(state) {
case Oct:
while (is_oct(ch)) {
if (np < &buf[NUMSIZE]) *np++ = ch;
LoadChar(ch);
}
if (ch == 'B' || ch == 'C') {
state = OctEndOrHex;
break;
}
/* Fall Through */
case Dec:
base = 10;
while (is_dig(ch)) {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
}
if (ch == 'D') state = OptHex;
else if (is_hex(ch)) state = Hex;
1986-09-25 19:39:06 +00:00
else if (ch == '.') state = OptReal;
else {
state = End;
if (ch == 'H') base = 16;
else PushBack();
1986-03-20 14:52:03 +00:00
}
1986-09-25 19:39:06 +00:00
break;
case OptHex:
LoadChar(ch);
if (is_hex(ch)) {
if (np < &buf[NUMSIZE]) *np++ = 'D';
state = Hex;
}
1988-01-26 13:54:24 +00:00
else {
state = End;
1988-01-27 15:10:16 +00:00
ch = 'D';
1988-01-26 13:54:24 +00:00
PushBack();
}
break;
1986-09-25 19:39:06 +00:00
case Hex:
while (is_hex(ch)) {
if (np < &buf[NUMSIZE]) *np++ = ch;
LoadChar(ch);
}
base = 16;
state = End;
if (ch != 'H') {
lexerror("H expected after hex number");
PushBack();
1986-09-25 19:39:06 +00:00
}
break;
case OctEndOrHex:
if (np < &buf[NUMSIZE]) *np++ = ch;
1986-03-20 14:52:03 +00:00
LoadChar(ch);
1986-09-25 19:39:06 +00:00
if (ch == 'H') {
base = 16;
state = End;
break;
}
if (is_hex(ch)) {
state = Hex;
break;
}
PushBack();
1986-09-25 19:39:06 +00:00
ch = *--np;
*np++ = '\0';
/* Fall through */
1987-11-26 14:15:24 +00:00
case End: {
int ovfl = 0;
1987-11-26 14:15:24 +00:00
1986-12-01 10:06:53 +00:00
*np = '\0';
if (np >= &buf[NUMSIZE]) {
tk->TOK_INT = 1;
lexerror("constant too long");
}
else {
/* The upperbound will be the same as
when computed with something like
max(unsigned long) / base (when base
is even). The problem is that
unsigned long or unsigned arith is
not accepted by all compilers
*/
arith ubound = max_int[sizeof(arith)]
/ (base >> 1);
np = &buf[0];
1986-12-01 10:06:53 +00:00
while (*np == '0') np++;
1987-11-26 14:15:24 +00:00
tk->TOK_INT = 0;
while (*np) {
int c;
if (is_dig(*np)) {
c = *np++ - '0';
}
else {
assert(is_hex(*np));
c = *np++ - 'A' + 10;
}
if (tk->TOK_INT < 0 ||
tk->TOK_INT > ubound) {
ovfl++;
}
tk->TOK_INT = tk->TOK_INT*base;
if (tk->TOK_INT < 0 &&
tk->TOK_INT + c >= 0) {
ovfl++;
}
tk->TOK_INT += c;
1986-12-01 10:06:53 +00:00
}
}
1987-11-26 14:15:24 +00:00
toktype = card_type;
if (ch == 'C' && base == 8) {
1986-09-25 19:39:06 +00:00
toktype = char_type;
if (ovfl != 0 || tk->TOK_INT>255 ||
tk->TOK_INT < 0) {
1986-11-05 14:33:00 +00:00
lexwarning(W_ORDINARY, "character constant out of range");
1986-09-25 19:39:06 +00:00
}
1989-12-07 16:28:05 +00:00
CheckForLet();
return tk->tk_symb = INTEGER;
1986-04-25 10:14:08 +00:00
}
if (ch == 'D' && base == 10) {
if (ovfl != 0 ||
tk->TOK_INT > max_int[(int)long_size] ||
tk->TOK_INT < 0) {
ovfl = 1;
1987-11-26 14:15:24 +00:00
}
toktype = longint_type;
}
else if (ovfl == 0 && tk->TOK_INT >= 0 &&
tk->TOK_INT<=max_int[(int)int_size]) {
1986-09-25 19:39:06 +00:00
toktype = intorcard_type;
1986-03-20 14:52:03 +00:00
}
else if (! chk_bounds(tk->TOK_INT,
full_mask[(int)int_size],
T_CARDINAL)) {
ovfl = 1;
}
if (ovfl)
lexwarning(W_ORDINARY, "overflow in constant");
1989-12-07 16:28:05 +00:00
CheckForLet();
1986-09-25 19:39:06 +00:00
return tk->tk_symb = INTEGER;
1987-11-26 14:15:24 +00:00
}
1986-09-25 19:39:06 +00:00
case OptReal:
/* The '.' could be the first of the '..'
token. At this point, we need a
look-ahead of two characters.
*/
1986-03-20 14:52:03 +00:00
LoadChar(ch);
1986-09-25 19:39:06 +00:00
if (ch == '.') {
/* Indeed the '..' token
*/
1986-12-09 17:41:06 +00:00
PushBack();
PushBack();
1986-09-25 19:39:06 +00:00
state = End;
base = 10;
break;
}
state = Real;
break;
1986-03-20 14:52:03 +00:00
}
1986-09-25 19:39:06 +00:00
if (state == Real) break;
}
1986-03-20 14:52:03 +00:00
1986-09-25 19:39:06 +00:00
/* a real real constant */
if (np < &buf[NUMSIZE]) *np++ = '.';
toktype = real_type;
1986-09-25 19:39:06 +00:00
while (is_dig(ch)) {
/* Fractional part
1986-03-20 14:52:03 +00:00
*/
1986-09-25 19:39:06 +00:00
if (np < &buf[NUMSIZE]) *np++ = ch;
1986-03-20 14:52:03 +00:00
LoadChar(ch);
1986-09-25 19:39:06 +00:00
}
1986-03-20 14:52:03 +00:00
1989-04-19 15:00:07 +00:00
if (ch == 'D') {
toktype = longreal_type;
LoadChar(ch);
if (ch == '+' || ch == '-' || is_dig(ch)) {
ch = 'E';
PushBack();
}
1989-04-19 15:00:07 +00:00
}
if (ch == 'E') {
/* Scale factor
*/
if (np < &buf[NUMSIZE]) *np++ = ch;
1986-09-25 19:39:06 +00:00
LoadChar(ch);
if (ch == '+' || ch == '-') {
/* Signed scalefactor
*/
if (np < &buf[NUMSIZE]) *np++ = ch;
LoadChar(ch);
1986-03-20 14:52:03 +00:00
}
if (is_dig(ch)) {
do {
1986-09-25 19:39:06 +00:00
if (np < &buf[NUMSIZE]) *np++ = ch;
1986-03-20 14:52:03 +00:00
LoadChar(ch);
} while (is_dig(ch));
}
1986-09-25 19:39:06 +00:00
else {
lexerror("bad scale factor");
1986-03-20 14:52:03 +00:00
}
1986-09-25 19:39:06 +00:00
}
1986-03-20 14:52:03 +00:00
1986-10-21 14:45:43 +00:00
*np++ = '\0';
PushBack();
1986-03-20 14:52:03 +00:00
tk->tk_data.tk_real = new_real();
1986-09-25 19:39:06 +00:00
if (np >= &buf[NUMSIZE]) {
tk->TOK_REAL = Salloc("0.0", 4);
1987-11-03 12:44:09 +00:00
lexerror("real constant too long");
1986-03-20 14:52:03 +00:00
}
else tk->TOK_REAL = Salloc(buf, (unsigned) (np - buf));
1989-12-07 16:28:05 +00:00
CheckForLet();
flt_str2flt(tk->TOK_REAL, &(tk->TOK_RVAL));
if (flt_status == FLT_OVFL) {
lexwarning(W_ORDINARY, "overflow in floating point constant");
}
1986-09-25 19:39:06 +00:00
return tk->tk_symb = REAL;
1986-03-20 14:52:03 +00:00
/*NOTREACHED*/
}
case STEOI:
1986-03-24 17:29:57 +00:00
return tk->tk_symb = -1;
1986-03-20 14:52:03 +00:00
case STCHAR:
default:
1986-06-06 02:22:09 +00:00
crash("(LLlex) Impossible character class");
1986-10-06 20:36:30 +00:00
/*NOTREACHED*/
1986-03-20 14:52:03 +00:00
}
1986-03-24 17:29:57 +00:00
/*NOTREACHED*/
1986-03-20 14:52:03 +00:00
}