ack/lang/m2/comp/LLlex.c

401 lines
6.6 KiB
C
Raw Normal View History

1986-03-26 15:11:02 +00:00
/* L E X I C A L A N A L Y S E R F O R M O D U L A - 2 */
1986-03-20 14:52:03 +00:00
1986-03-26 15:11:02 +00:00
static char *RcsId = "$Header$";
#include <alloc.h>
#include <em_arith.h>
1986-04-15 17:51:53 +00:00
#include <em_label.h>
1986-04-02 17:34:21 +00:00
#include <assert.h>
1986-04-17 09:28:09 +00:00
#include "idfsize.h"
#include "numsize.h"
#include "strsize.h"
1986-03-26 15:11:02 +00:00
#include "input.h"
#include "f_info.h"
#include "Lpars.h"
#include "class.h"
#include "idf.h"
1986-04-15 17:51:53 +00:00
#include "type.h"
1986-03-26 15:11:02 +00:00
#include "LLlex.h"
1986-04-15 17:51:53 +00:00
#include "const.h"
1986-03-26 15:11:02 +00:00
1986-03-20 14:52:03 +00:00
long str2long();
struct token dot, aside;
1986-04-15 17:51:53 +00:00
struct type *numtype;
1986-04-04 13:47:04 +00:00
struct string string;
1986-04-17 09:28:09 +00:00
int idfsize = IDFSIZE;
1986-03-20 14:52:03 +00:00
1986-03-24 17:29:57 +00:00
static
SkipComment()
1986-03-20 14:52:03 +00:00
{
1986-04-03 17:41:26 +00:00
/* Skip Modula-2 comments (* ... *).
Note that comments may be nested (par. 3.5).
*/
1986-03-24 17:29:57 +00:00
register int ch;
register int NestLevel = 0;
LoadChar(ch);
for (;;) {
if (class(ch) == STNL) {
LineNumber++;
}
else
if (ch == '(') {
LoadChar(ch);
if (ch == '*') {
++NestLevel;
}
else {
continue;
}
}
else
if (ch == '*') {
LoadChar(ch);
if (ch == ')') {
if (NestLevel-- == 0) {
return;
}
}
else {
continue;
}
}
LoadChar(ch);
1986-03-20 14:52:03 +00:00
}
1986-03-24 17:29:57 +00:00
}
1986-03-20 14:52:03 +00:00
1986-04-04 13:47:04 +00:00
static
1986-03-24 17:29:57 +00:00
GetString(upto)
{
1986-04-03 17:41:26 +00:00
/* Read a Modula-2 string, delimited by the character "upto".
*/
1986-03-24 17:29:57 +00:00
register int ch;
1986-04-04 13:47:04 +00:00
register struct string *str = &string;
register char *p;
1986-03-24 17:29:57 +00:00
1986-04-18 17:53:47 +00:00
str->s_str = p = Malloc(str->s_length = ISTRSIZE);
1986-03-24 17:29:57 +00:00
LoadChar(ch);
while (ch != upto) {
if (class(ch) == STNL) {
lexerror("newline in string");
LineNumber++;
break;
}
if (ch == EOI) {
lexerror("end-of-file in string");
break;
}
1986-04-04 13:47:04 +00:00
*p++ = ch;
if (p - str->s_str == str->s_length) {
1986-04-17 09:28:09 +00:00
str->s_str = Srealloc(str->s_str,
str->s_length + RSTRSIZE);
p = str->s_str + str->s_length;
str->s_length += RSTRSIZE;
1986-03-24 17:29:57 +00:00
}
LoadChar(ch);
}
1986-04-04 13:47:04 +00:00
*p = '\0';
str->s_length = p - str->s_str;
1986-03-20 14:52:03 +00:00
}
int
1986-03-24 17:29:57 +00:00
LLlex()
1986-03-20 14:52:03 +00:00
{
1986-04-17 09:28:09 +00:00
/* LLlex() is the Lexical Analyzer.
1986-04-03 17:41:26 +00:00
The putting aside of tokens is taken into account.
*/
1986-03-24 17:29:57 +00:00
register struct token *tk = &dot;
1986-03-20 14:52:03 +00:00
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
register int ch, nch;
1986-04-15 17:51:53 +00:00
numtype = error_type;
1986-03-24 17:29:57 +00:00
if (ASIDE) { /* a token is put aside */
*tk = aside;
ASIDE = 0;
return tk->tk_symb;
}
tk->tk_lineno = LineNumber;
1986-04-04 13:47:04 +00:00
tk->tk_filename = FileName;
1986-03-24 17:29:57 +00:00
1986-03-20 14:52:03 +00:00
again:
LoadChar(ch);
if ((ch & 0200) && ch != EOI) {
fatal("non-ascii '\\%03o' read", ch & 0377);
}
1986-04-04 13:47:04 +00:00
1986-03-20 14:52:03 +00:00
switch (class(ch)) {
case STSKIP:
goto again;
case STNL:
LineNumber++;
1986-03-24 17:29:57 +00:00
tk->tk_lineno++;
1986-03-20 14:52:03 +00:00
goto again;
case STGARB:
if (040 < ch && ch < 0177) {
lexerror("garbage char %c", ch);
}
else {
lexerror("garbage char \\%03o", ch);
}
goto again;
case STSIMP:
if (ch == '(') {
LoadChar(nch);
if (nch == '*') {
SkipComment();
goto again;
}
else {
PushBack(nch);
}
}
return tk->tk_symb = ch;
case STCOMP:
LoadChar(nch);
switch (ch) {
case '.':
if (nch == '.') {
return tk->tk_symb = UPTO;
}
PushBack(nch);
return tk->tk_symb = ch;
case ':':
if (nch == '=') {
return tk->tk_symb = BECOMES;
}
PushBack(nch);
return tk->tk_symb = ch;
case '<':
if (nch == '=') {
return tk->tk_symb = LESSEQUAL;
}
else
if (nch == '>') {
return tk->tk_symb = UNEQUAL;
}
PushBack(nch);
return tk->tk_symb = ch;
case '>':
if (nch == '=') {
return tk->tk_symb = GREATEREQUAL;
}
PushBack(nch);
return tk->tk_symb = ch;
default :
1986-04-02 17:34:21 +00:00
assert(0);
1986-03-20 14:52:03 +00:00
}
case STIDF:
{
register char *tg = &buf[0];
register struct idf *id;
do {
1986-04-17 09:28:09 +00:00
if (tg - buf < idfsize) *tg++ = ch;
1986-03-20 14:52:03 +00:00
LoadChar(ch);
} while(in_idf(ch));
if (ch != EOI)
PushBack(ch);
*tg++ = '\0';
1986-03-26 22:46:48 +00:00
tk->TOK_IDF = id = str2idf(buf, 1);
1986-03-20 14:52:03 +00:00
if (!id) fatal("Out of memory");
return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
}
case STSTR:
1986-04-04 13:47:04 +00:00
GetString(ch);
tk->tk_data.tk_str = string;
1986-03-20 14:52:03 +00:00
return tk->tk_symb = STRING;
case STNUM:
{
/* The problem arising with the "parsing" of a number
is that we don't know the base in advance so we
have to read the number with the help of a rather
complex finite automaton.
Excuses for the very ugly code!
*/
register char *np = &buf[1];
/* allow a '-' to be added */
1986-04-08 18:15:46 +00:00
buf[0] = '-';
1986-03-20 14:52:03 +00:00
*np++ = ch;
LoadChar(ch);
while (is_oct(ch)) {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
}
switch (ch) {
case 'H':
Shex: *np++ = '\0';
1986-04-15 17:51:53 +00:00
numtype = card_type;
1986-03-20 14:52:03 +00:00
tk->TOK_INT = str2long(&buf[1], 16);
return tk->tk_symb = INTEGER;
case '8':
case '9':
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_dig(ch));
if (is_hex(ch))
goto S2;
if (ch == 'H')
goto Shex;
if (ch == '.')
goto Sreal;
PushBack(ch);
goto Sdec;
case 'B':
case 'C':
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
if (ch == 'H')
goto Shex;
if (is_hex(ch))
goto S2;
PushBack(ch);
ch = *--np;
*np++ = '\0';
1986-04-15 17:51:53 +00:00
if (ch == 'C') {
numtype = char_type;
}
else numtype = card_type;
1986-03-20 14:52:03 +00:00
tk->TOK_INT = str2long(&buf[1], 8);
return tk->tk_symb = INTEGER;
case 'A':
case 'D':
case 'E':
case 'F':
S2:
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_hex(ch));
if (ch != 'H') {
lexerror("H expected after hex number");
PushBack(ch);
}
goto Shex;
case '.':
Sreal:
/* This '.' could be the first of the '..'
token. At this point, we need a look-ahead
of two characters.
*/
LoadChar(ch);
if (ch == '.') {
/* Indeed the '..' token
*/
PushBack(ch);
PushBack(ch);
goto Sdec;
}
/* a real constant */
if (np < &buf[NUMSIZE]) {
*np++ = '.';
}
if (is_dig(ch)) {
/* Fractional part
*/
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_dig(ch));
}
if (ch == 'E') {
/* Scale factor
*/
if (np < &buf[NUMSIZE]) {
*np++ = 'E';
}
LoadChar(ch);
if (ch == '+' || ch == '-') {
/* Signed scalefactor
*/
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
}
if (is_dig(ch)) {
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_dig(ch));
}
else {
lexerror("bad scale factor");
}
}
PushBack(ch);
if (np == &buf[NUMSIZE + 1]) {
lexerror("floating constant too long");
tk->TOK_REL = Salloc("0.0", 5);
}
else {
tk->TOK_REL = Salloc(buf, np - buf) + 1;
}
return tk->tk_symb = REAL;
default:
PushBack(ch);
Sdec:
*np++ = '\0';
tk->TOK_INT = str2long(&buf[1], 10);
1986-04-15 17:51:53 +00:00
if (tk->TOK_INT < 0 || tk->TOK_INT > max_int) {
numtype = card_type;
}
else numtype = intorcard_type;
1986-03-20 14:52:03 +00:00
return tk->tk_symb = INTEGER;
}
/*NOTREACHED*/
}
case STEOI:
1986-03-24 17:29:57 +00:00
return tk->tk_symb = -1;
1986-03-20 14:52:03 +00:00
case STCHAR:
default:
1986-04-02 17:34:21 +00:00
assert(0);
1986-03-20 14:52:03 +00:00
}
1986-03-24 17:29:57 +00:00
/*NOTREACHED*/
1986-03-20 14:52:03 +00:00
}