ack/lang/m2/m2mm/LLlex.c

446 lines
7.5 KiB
C
Raw Normal View History

1987-09-24 13:01:27 +00:00
/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*
* Author: Ceriel J.H. Jacobs
*/
/* L E X I C A L A N A L Y S E R F O R M O D U L A - 2 */
/* $Header$ */
#include <alloc.h>
#include "idfsize.h"
#include "idf.h"
#include "LLlex.h"
#include "input.h"
#include "f_info.h"
#include "Lpars.h"
#include "class.h"
struct token dot,
aside;
int idfsize = IDFSIZE;
int ForeignFlag;
static int eofseen;
extern char options[];
STATIC
SkipComment()
{
/* Skip Modula-2 comments (* ... *).
Note that comments may be nested (par. 3.5).
*/
register int ch;
register int CommentLevel = 0;
LoadChar(ch);
if (ch == '$') {
LoadChar(ch);
switch(ch) {
case 'F':
/* Foreign; This definition module has an
implementation in another language.
In this case, check that the object file is present
and don't generate a rule for it.
*/
ForeignFlag = 1;
break;
default:
PushBack();
break;
}
}
for (;;) {
if (class(ch) == STNL) {
LineNumber++;
}
else if (ch == '(') {
LoadChar(ch);
if (ch == '*') CommentLevel++;
else continue;
}
else if (ch == '*') {
LoadChar(ch);
if (ch == ')') {
CommentLevel--;
if (CommentLevel < 0) break;
}
else continue;
}
else if (ch == EOI) {
lexerror("unterminated comment");
break;
}
LoadChar(ch);
}
}
STATIC
GetString(upto)
{
/* Read a Modula-2 string, delimited by the character "upto".
*/
register int ch;
register char *p;
while (LoadChar(ch), ch != upto) {
if (class(ch) == STNL) {
lexerror("newline in string");
LineNumber++;
break;
}
if (ch == EOI) {
lexerror("end-of-file in string");
break;
}
}
}
static char *s_error = "illegal line directive";
STATIC int
getch()
{
register int ch;
for (;;) {
LoadChar(ch);
if ((ch & 0200) && ch != EOI) {
error("non-ascii '\\%03o' read", ch & 0377);
continue;
}
break;
}
if (ch == EOI) {
eofseen = 1;
return '\n';
}
return ch;
}
CheckForLineDirective()
{
register int ch = getch();
register int i = 0;
char buf[IDFSIZE + 2];
register char *c = buf;
if (ch != '#') {
PushBack();
return;
}
do { /*
* Skip to next digit
* Do not skip newlines
*/
ch = getch();
if (class(ch) == STNL) {
LineNumber++;
error(s_error);
return;
}
} while (class(ch) != STNUM);
while (class(ch) == STNUM) {
i = i*10 + (ch - '0');
ch = getch();
}
while (ch != '"' && class(ch) != STNL) ch = getch();
if (ch == '"') {
c = buf;
do {
*c++ = ch = getch();
if (class(ch) == STNL) {
LineNumber++;
error(s_error);
return;
}
} while (ch != '"');
*--c = '\0';
do {
ch = getch();
} while (class(ch) != STNL);
/*
* Remember the file name
*/
if (!eofseen && strcmp(FileName,buf)) {
FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
}
}
if (eofseen) {
error(s_error);
return;
}
LineNumber = i;
}
char idfbuf[IDFSIZE + 2];
int
LLlex()
{
/* LLlex() is the Lexical Analyzer.
The putting aside of tokens is taken into account.
*/
register struct token *tk = &dot;
register int ch, nch;
if (ASIDE) { /* a token is put aside */
*tk = aside;
ASIDE = 0;
return tk->tk_symb;
}
again1:
if (eofseen) {
eofseen = 0;
ch = EOI;
}
else {
again:
LoadChar(ch);
if ((ch & 0200) && ch != EOI) {
error("non-ascii '\\%03o' read", ch & 0377);
goto again;
}
}
tk->tk_lineno = LineNumber;
switch (class(ch)) {
case STNL:
LineNumber++;
CheckForLineDirective();
goto again1;
case STSKIP:
goto again;
case STGARB:
if ((unsigned) ch - 040 < 0137) {
lexerror("garbage char %c", ch);
}
else lexerror("garbage char \\%03o", ch);
goto again;
case STSIMP:
if (ch == '(') {
LoadChar(nch);
if (nch == '*') {
SkipComment();
goto again;
}
else if (nch == EOI) eofseen = 1;
else PushBack();
}
if (ch == '&') return tk->tk_symb = AND;
if (ch == '~') return tk->tk_symb = NOT;
return tk->tk_symb = ch;
case STCOMP:
LoadChar(nch);
switch (ch) {
case '.':
if (nch == '.') {
return tk->tk_symb = UPTO;
}
break;
case ':':
if (nch == '=') {
return tk->tk_symb = BECOMES;
}
break;
case '<':
if (nch == '=') {
return tk->tk_symb = LESSEQUAL;
}
if (nch == '>') {
return tk->tk_symb = '#';
}
break;
case '>':
if (nch == '=') {
return tk->tk_symb = GREATEREQUAL;
}
break;
default :
crash("(LLlex, STCOMP)");
}
if (nch == EOI) eofseen = 1;
else PushBack();
return tk->tk_symb = ch;
case STIDF:
{
register char *tag = &idfbuf[0];
register struct idf *id;
do {
if (tag - idfbuf < idfsize) *tag++ = ch;
LoadChar(ch);
} while(in_idf(ch));
if (ch == EOI) eofseen = 1;
else PushBack();
*tag++ = '\0';
tk->TOK_IDF = id = findidf(idfbuf);
return tk->tk_symb = id && id->id_reserved ? id->id_reserved : IDENT;
}
case STSTR:
GetString(ch);
return tk->tk_symb = STRING;
case STNUM:
{
/* The problem arising with the "parsing" of a number
is that we don't know the base in advance so we
have to read the number with the help of a rather
complex finite automaton.
*/
enum statetp {Oct,OptHex,Hex,Dec,OctEndOrHex,End,OptReal,Real};
register enum statetp state;
state = is_oct(ch) ? Oct : Dec;
LoadChar(ch);
for (;;) {
switch(state) {
case Oct:
while (is_oct(ch)) {
LoadChar(ch);
}
if (ch == 'B' || ch == 'C') {
state = OctEndOrHex;
break;
}
/* Fall Through */
case Dec:
while (is_dig(ch)) {
LoadChar(ch);
}
if (ch == 'D') state = OptHex;
else if (is_hex(ch)) state = Hex;
else if (ch == '.') state = OptReal;
else {
state = End;
if (ch == 'H') ;
else if (ch == EOI) eofseen = 1;
else PushBack();
}
break;
case OptHex:
LoadChar(ch);
if (is_hex(ch)) {
state = Hex;
}
else state = End;
break;
case Hex:
while (is_hex(ch)) {
LoadChar(ch);
}
state = End;
if (ch != 'H') {
lexerror("H expected after hex number");
if (ch == EOI) eofseen = 1;
else PushBack();
}
break;
case OctEndOrHex:
LoadChar(ch);
if (ch == 'H') {
state = End;
break;
}
if (is_hex(ch)) {
state = Hex;
break;
}
if (ch == EOI) eofseen = 1;
else PushBack();
/* Fall through */
case End:
return tk->tk_symb = INTEGER;
case OptReal:
/* The '.' could be the first of the '..'
token. At this point, we need a
look-ahead of two characters.
*/
LoadChar(ch);
if (ch == '.') {
/* Indeed the '..' token
*/
PushBack();
PushBack();
state = End;
break;
}
state = Real;
break;
}
if (state == Real) break;
}
while (is_dig(ch)) {
/* Fractional part
*/
LoadChar(ch);
}
if (ch == 'E' || ch == 'D') {
/* Scale factor
*/
if (ch == 'D') {
LoadChar(ch);
if (!(ch == '+' || ch == '-' || is_dig(ch)))
goto noscale;
}
LoadChar(ch);
if (ch == '+' || ch == '-') {
/* Signed scalefactor
*/
LoadChar(ch);
}
if (is_dig(ch)) {
do {
LoadChar(ch);
} while (is_dig(ch));
}
else {
lexerror("bad scale factor");
}
}
noscale:
if (ch == EOI) eofseen = 1;
else PushBack();
return tk->tk_symb = REAL;
/*NOTREACHED*/
}
case STEOI:
return tk->tk_symb = -1;
case STCHAR:
default:
crash("(LLlex) Impossible character class");
/*NOTREACHED*/
}
/*NOTREACHED*/
}