1986-03-26 15:11:02 +00:00
|
|
|
/* L E X I C A L A N A L Y S E R F O R M O D U L A - 2 */
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-05-01 19:06:53 +00:00
|
|
|
#include "debug.h"
|
|
|
|
#include "idfsize.h"
|
|
|
|
#include "numsize.h"
|
|
|
|
#include "strsize.h"
|
1986-03-26 15:11:02 +00:00
|
|
|
|
|
|
|
#include <alloc.h>
|
|
|
|
#include <em_arith.h>
|
1986-04-15 17:51:53 +00:00
|
|
|
#include <em_label.h>
|
1986-04-02 17:34:21 +00:00
|
|
|
#include <assert.h>
|
1986-04-17 09:28:09 +00:00
|
|
|
|
1986-03-26 15:11:02 +00:00
|
|
|
#include "input.h"
|
|
|
|
#include "f_info.h"
|
|
|
|
#include "Lpars.h"
|
|
|
|
#include "class.h"
|
|
|
|
#include "idf.h"
|
1986-04-15 17:51:53 +00:00
|
|
|
#include "type.h"
|
1986-03-26 15:11:02 +00:00
|
|
|
#include "LLlex.h"
|
1986-04-15 17:51:53 +00:00
|
|
|
#include "const.h"
|
1986-03-26 15:11:02 +00:00
|
|
|
|
1986-03-20 14:52:03 +00:00
|
|
|
long str2long();
|
|
|
|
|
1986-09-25 19:39:06 +00:00
|
|
|
struct token dot,
|
|
|
|
aside;
|
|
|
|
struct type *toktype;
|
|
|
|
int idfsize = IDFSIZE;
|
1986-06-06 09:35:11 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
extern int cntlines;
|
|
|
|
#endif
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-06-17 12:04:05 +00:00
|
|
|
STATIC
|
1986-03-24 17:29:57 +00:00
|
|
|
SkipComment()
|
1986-03-20 14:52:03 +00:00
|
|
|
{
|
1986-04-03 17:41:26 +00:00
|
|
|
/* Skip Modula-2 comments (* ... *).
|
|
|
|
Note that comments may be nested (par. 3.5).
|
|
|
|
*/
|
1986-03-24 17:29:57 +00:00
|
|
|
register int ch;
|
1986-10-06 20:36:30 +00:00
|
|
|
register int CommentLevel = 0;
|
1986-03-24 17:29:57 +00:00
|
|
|
|
1986-10-06 20:36:30 +00:00
|
|
|
LoadChar(ch);
|
1986-03-24 17:29:57 +00:00
|
|
|
for (;;) {
|
|
|
|
if (class(ch) == STNL) {
|
|
|
|
LineNumber++;
|
1986-06-06 09:35:11 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
cntlines++;
|
|
|
|
#endif
|
1986-03-24 17:29:57 +00:00
|
|
|
}
|
1986-06-17 12:04:05 +00:00
|
|
|
else if (ch == '(') {
|
1986-03-24 17:29:57 +00:00
|
|
|
LoadChar(ch);
|
1986-10-06 20:36:30 +00:00
|
|
|
if (ch == '*') CommentLevel++;
|
|
|
|
else continue;
|
1986-03-24 17:29:57 +00:00
|
|
|
}
|
1986-06-17 12:04:05 +00:00
|
|
|
else if (ch == '*') {
|
1986-03-24 17:29:57 +00:00
|
|
|
LoadChar(ch);
|
1986-10-06 20:36:30 +00:00
|
|
|
if (ch == ')') {
|
|
|
|
CommentLevel--;
|
|
|
|
if (CommentLevel < 0) break;
|
|
|
|
}
|
|
|
|
else continue;
|
1986-03-24 17:29:57 +00:00
|
|
|
}
|
1986-10-06 20:36:30 +00:00
|
|
|
else if (ch == EOI) {
|
|
|
|
lexerror("unterminated comment");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
LoadChar(ch);
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-03-24 17:29:57 +00:00
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-09-25 19:39:06 +00:00
|
|
|
STATIC struct string *
|
1986-03-24 17:29:57 +00:00
|
|
|
GetString(upto)
|
|
|
|
{
|
1986-04-03 17:41:26 +00:00
|
|
|
/* Read a Modula-2 string, delimited by the character "upto".
|
|
|
|
*/
|
1986-03-24 17:29:57 +00:00
|
|
|
register int ch;
|
1986-09-25 19:39:06 +00:00
|
|
|
register struct string *str = (struct string *) Malloc(sizeof(struct string));
|
1986-04-04 13:47:04 +00:00
|
|
|
register char *p;
|
1986-03-24 17:29:57 +00:00
|
|
|
|
1986-10-06 20:36:30 +00:00
|
|
|
str->s_length = ISTRSIZE;
|
|
|
|
str->s_str = p = Malloc((unsigned int) ISTRSIZE);
|
1986-09-25 19:39:06 +00:00
|
|
|
while (LoadChar(ch), ch != upto) {
|
1986-03-24 17:29:57 +00:00
|
|
|
if (class(ch) == STNL) {
|
|
|
|
lexerror("newline in string");
|
|
|
|
LineNumber++;
|
1986-06-06 09:35:11 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
cntlines++;
|
|
|
|
#endif
|
1986-03-24 17:29:57 +00:00
|
|
|
break;
|
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
if (ch == EOI) {
|
1986-03-24 17:29:57 +00:00
|
|
|
lexerror("end-of-file in string");
|
|
|
|
break;
|
|
|
|
}
|
1986-04-04 13:47:04 +00:00
|
|
|
*p++ = ch;
|
|
|
|
if (p - str->s_str == str->s_length) {
|
1986-04-17 09:28:09 +00:00
|
|
|
str->s_str = Srealloc(str->s_str,
|
1986-04-28 18:06:58 +00:00
|
|
|
(unsigned int) str->s_length + RSTRSIZE);
|
1986-04-17 09:28:09 +00:00
|
|
|
p = str->s_str + str->s_length;
|
|
|
|
str->s_length += RSTRSIZE;
|
1986-03-24 17:29:57 +00:00
|
|
|
}
|
|
|
|
}
|
1986-04-04 13:47:04 +00:00
|
|
|
*p = '\0';
|
|
|
|
str->s_length = p - str->s_str;
|
1986-09-25 19:39:06 +00:00
|
|
|
return str;
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
1986-03-24 17:29:57 +00:00
|
|
|
LLlex()
|
1986-03-20 14:52:03 +00:00
|
|
|
{
|
1986-04-17 09:28:09 +00:00
|
|
|
/* LLlex() is the Lexical Analyzer.
|
1986-04-03 17:41:26 +00:00
|
|
|
The putting aside of tokens is taken into account.
|
|
|
|
*/
|
1986-03-24 17:29:57 +00:00
|
|
|
register struct token *tk = ˙
|
1986-06-04 09:01:48 +00:00
|
|
|
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 2];
|
1986-03-20 14:52:03 +00:00
|
|
|
register int ch, nch;
|
1986-10-22 15:38:24 +00:00
|
|
|
static int eofseen;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-06-04 09:01:48 +00:00
|
|
|
toktype = error_type;
|
1986-06-17 12:04:05 +00:00
|
|
|
|
1986-03-24 17:29:57 +00:00
|
|
|
if (ASIDE) { /* a token is put aside */
|
|
|
|
*tk = aside;
|
|
|
|
ASIDE = 0;
|
|
|
|
return tk->tk_symb;
|
|
|
|
}
|
1986-06-17 12:04:05 +00:00
|
|
|
|
1986-03-24 17:29:57 +00:00
|
|
|
tk->tk_lineno = LineNumber;
|
|
|
|
|
1986-10-22 15:38:24 +00:00
|
|
|
if (eofseen) {
|
|
|
|
eofseen = 0;
|
|
|
|
ch = EOI;
|
|
|
|
}
|
|
|
|
else {
|
1986-03-20 14:52:03 +00:00
|
|
|
again:
|
1986-10-22 15:38:24 +00:00
|
|
|
LoadChar(ch);
|
|
|
|
if ((ch & 0200) && ch != EOI) {
|
|
|
|
fatal("non-ascii '\\%03o' read", ch & 0377);
|
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-04-04 13:47:04 +00:00
|
|
|
|
1986-03-20 14:52:03 +00:00
|
|
|
switch (class(ch)) {
|
|
|
|
|
|
|
|
case STNL:
|
|
|
|
LineNumber++;
|
1986-06-06 09:35:11 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
cntlines++;
|
|
|
|
#endif
|
1986-03-24 17:29:57 +00:00
|
|
|
tk->tk_lineno++;
|
1986-09-25 19:39:06 +00:00
|
|
|
/* Fall Through */
|
|
|
|
|
|
|
|
case STSKIP:
|
1986-03-20 14:52:03 +00:00
|
|
|
goto again;
|
|
|
|
|
|
|
|
case STGARB:
|
|
|
|
if (040 < ch && ch < 0177) {
|
|
|
|
lexerror("garbage char %c", ch);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
lexerror("garbage char \\%03o", ch);
|
|
|
|
}
|
|
|
|
goto again;
|
|
|
|
|
|
|
|
case STSIMP:
|
|
|
|
if (ch == '(') {
|
|
|
|
LoadChar(nch);
|
|
|
|
if (nch == '*') {
|
|
|
|
SkipComment();
|
|
|
|
goto again;
|
|
|
|
}
|
1986-10-22 15:38:24 +00:00
|
|
|
else if (nch == EOI) eofseen = 1;
|
|
|
|
else PushBack(nch);
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
|
|
|
return tk->tk_symb = ch;
|
|
|
|
|
|
|
|
case STCOMP:
|
|
|
|
LoadChar(nch);
|
|
|
|
switch (ch) {
|
|
|
|
|
|
|
|
case '.':
|
|
|
|
if (nch == '.') {
|
|
|
|
return tk->tk_symb = UPTO;
|
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
break;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
case ':':
|
|
|
|
if (nch == '=') {
|
|
|
|
return tk->tk_symb = BECOMES;
|
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
break;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
case '<':
|
|
|
|
if (nch == '=') {
|
|
|
|
return tk->tk_symb = LESSEQUAL;
|
|
|
|
}
|
1986-05-30 18:48:00 +00:00
|
|
|
if (nch == '>') {
|
|
|
|
lexwarning("'<>' is old-fashioned; use '#'");
|
|
|
|
return tk->tk_symb = '#';
|
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
break;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
case '>':
|
|
|
|
if (nch == '=') {
|
|
|
|
return tk->tk_symb = GREATEREQUAL;
|
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
break;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
default :
|
1986-06-06 02:22:09 +00:00
|
|
|
crash("(LLlex, STCOMP)");
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-10-22 15:38:24 +00:00
|
|
|
if (nch == EOI) eofseen = 1;
|
|
|
|
else PushBack(nch);
|
1986-09-25 19:39:06 +00:00
|
|
|
return tk->tk_symb = ch;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
case STIDF:
|
|
|
|
{
|
1986-09-25 19:39:06 +00:00
|
|
|
register char *tag = &buf[0];
|
1986-03-20 14:52:03 +00:00
|
|
|
register struct idf *id;
|
|
|
|
|
|
|
|
do {
|
1986-09-25 19:39:06 +00:00
|
|
|
if (tag - buf < idfsize) *tag++ = ch;
|
1986-03-20 14:52:03 +00:00
|
|
|
LoadChar(ch);
|
|
|
|
} while(in_idf(ch));
|
|
|
|
|
1986-10-22 15:38:24 +00:00
|
|
|
if (ch == EOI) eofseen = 1;
|
|
|
|
else PushBack(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
*tag++ = '\0';
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-03-26 22:46:48 +00:00
|
|
|
tk->TOK_IDF = id = str2idf(buf, 1);
|
1986-03-20 14:52:03 +00:00
|
|
|
return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
|
|
|
|
}
|
|
|
|
|
1986-09-25 19:39:06 +00:00
|
|
|
case STSTR: {
|
|
|
|
register struct string *str = GetString(ch);
|
|
|
|
|
|
|
|
if (str->s_length == 1) {
|
|
|
|
tk->TOK_INT = *(str->s_str) & 0377;
|
1986-06-04 09:01:48 +00:00
|
|
|
toktype = char_type;
|
1986-09-25 19:39:06 +00:00
|
|
|
free(str->s_str);
|
|
|
|
free((char *) str);
|
1986-06-04 09:01:48 +00:00
|
|
|
}
|
|
|
|
else {
|
1986-09-25 19:39:06 +00:00
|
|
|
tk->tk_data.tk_str = str;
|
|
|
|
toktype = standard_type(T_STRING, 1, str->s_length);
|
1986-06-04 09:01:48 +00:00
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
return tk->tk_symb = STRING;
|
1986-09-25 19:39:06 +00:00
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
case STNUM:
|
|
|
|
{
|
|
|
|
/* The problem arising with the "parsing" of a number
|
|
|
|
is that we don't know the base in advance so we
|
|
|
|
have to read the number with the help of a rather
|
|
|
|
complex finite automaton.
|
|
|
|
*/
|
1986-09-25 19:39:06 +00:00
|
|
|
enum statetp {Oct,Hex,Dec,OctEndOrHex,End,OptReal,Real};
|
|
|
|
register enum statetp state;
|
|
|
|
register int base;
|
1986-03-20 14:52:03 +00:00
|
|
|
register char *np = &buf[1];
|
|
|
|
/* allow a '-' to be added */
|
|
|
|
|
1986-04-08 18:15:46 +00:00
|
|
|
buf[0] = '-';
|
1986-03-20 14:52:03 +00:00
|
|
|
*np++ = ch;
|
1986-09-25 19:39:06 +00:00
|
|
|
state = is_oct(ch) ? Oct : Dec;
|
1986-03-20 14:52:03 +00:00
|
|
|
LoadChar(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
for (;;) {
|
|
|
|
switch(state) {
|
|
|
|
case Oct:
|
|
|
|
while (is_oct(ch)) {
|
|
|
|
if (np < &buf[NUMSIZE]) *np++ = ch;
|
|
|
|
LoadChar(ch);
|
|
|
|
}
|
|
|
|
if (ch == 'B' || ch == 'C') {
|
|
|
|
base = 8;
|
|
|
|
state = OctEndOrHex;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Fall Through */
|
|
|
|
case Dec:
|
|
|
|
base = 10;
|
|
|
|
while (is_dig(ch)) {
|
|
|
|
if (np < &buf[NUMSIZE]) {
|
|
|
|
*np++ = ch;
|
|
|
|
}
|
|
|
|
LoadChar(ch);
|
|
|
|
}
|
|
|
|
if (is_hex(ch)) state = Hex;
|
|
|
|
else if (ch == '.') state = OptReal;
|
|
|
|
else {
|
|
|
|
state = End;
|
|
|
|
if (ch == 'H') base = 16;
|
1986-10-22 15:38:24 +00:00
|
|
|
else if (ch == EOI) eofseen = 1;
|
1986-09-25 19:39:06 +00:00
|
|
|
else PushBack(ch);
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case Hex:
|
|
|
|
while (is_hex(ch)) {
|
|
|
|
if (np < &buf[NUMSIZE]) *np++ = ch;
|
|
|
|
LoadChar(ch);
|
|
|
|
}
|
|
|
|
base = 16;
|
|
|
|
state = End;
|
|
|
|
if (ch != 'H') {
|
|
|
|
lexerror("H expected after hex number");
|
1986-10-22 15:38:24 +00:00
|
|
|
if (ch == EOI) eofseen = 1;
|
|
|
|
else PushBack(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OctEndOrHex:
|
|
|
|
if (np < &buf[NUMSIZE]) *np++ = ch;
|
1986-03-20 14:52:03 +00:00
|
|
|
LoadChar(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
if (ch == 'H') {
|
|
|
|
base = 16;
|
|
|
|
state = End;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (is_hex(ch)) {
|
|
|
|
state = Hex;
|
|
|
|
break;
|
|
|
|
}
|
1986-10-22 15:38:24 +00:00
|
|
|
if (ch == EOI) eofseen = 1;
|
|
|
|
else PushBack(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
ch = *--np;
|
|
|
|
*np++ = '\0';
|
|
|
|
base = 8;
|
|
|
|
/* Fall through */
|
|
|
|
|
|
|
|
case End:
|
|
|
|
*np++ = '\0';
|
|
|
|
tk->TOK_INT = str2long(&buf[1], base);
|
|
|
|
if (ch == 'C' && base == 8) {
|
|
|
|
toktype = char_type;
|
|
|
|
if (tk->TOK_INT<0 || tk->TOK_INT>255) {
|
1986-04-25 10:14:08 +00:00
|
|
|
lexwarning("Character constant out of range");
|
1986-09-25 19:39:06 +00:00
|
|
|
}
|
1986-04-25 10:14:08 +00:00
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
else if (tk->TOK_INT>=0 &&
|
|
|
|
tk->TOK_INT<=max_int) {
|
|
|
|
toktype = intorcard_type;
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
else toktype = card_type;
|
|
|
|
return tk->tk_symb = INTEGER;
|
|
|
|
|
|
|
|
case OptReal:
|
|
|
|
/* The '.' could be the first of the '..'
|
|
|
|
token. At this point, we need a
|
|
|
|
look-ahead of two characters.
|
|
|
|
*/
|
1986-03-20 14:52:03 +00:00
|
|
|
LoadChar(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
if (ch == '.') {
|
|
|
|
/* Indeed the '..' token
|
|
|
|
*/
|
|
|
|
PushBack(ch);
|
|
|
|
PushBack(ch);
|
|
|
|
state = End;
|
|
|
|
base = 10;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
state = Real;
|
|
|
|
break;
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
if (state == Real) break;
|
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-09-25 19:39:06 +00:00
|
|
|
/* a real real constant */
|
|
|
|
if (np < &buf[NUMSIZE]) *np++ = '.';
|
|
|
|
|
|
|
|
while (is_dig(ch)) {
|
|
|
|
/* Fractional part
|
1986-03-20 14:52:03 +00:00
|
|
|
*/
|
1986-09-25 19:39:06 +00:00
|
|
|
if (np < &buf[NUMSIZE]) *np++ = ch;
|
1986-03-20 14:52:03 +00:00
|
|
|
LoadChar(ch);
|
1986-09-25 19:39:06 +00:00
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-09-25 19:39:06 +00:00
|
|
|
if (ch == 'E') {
|
|
|
|
/* Scale factor
|
|
|
|
*/
|
|
|
|
if (np < &buf[NUMSIZE]) *np++ = 'E';
|
|
|
|
LoadChar(ch);
|
|
|
|
if (ch == '+' || ch == '-') {
|
|
|
|
/* Signed scalefactor
|
|
|
|
*/
|
|
|
|
if (np < &buf[NUMSIZE]) *np++ = ch;
|
|
|
|
LoadChar(ch);
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
|
|
|
if (is_dig(ch)) {
|
|
|
|
do {
|
1986-09-25 19:39:06 +00:00
|
|
|
if (np < &buf[NUMSIZE]) *np++ = ch;
|
1986-03-20 14:52:03 +00:00
|
|
|
LoadChar(ch);
|
|
|
|
} while (is_dig(ch));
|
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
else {
|
|
|
|
lexerror("bad scale factor");
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
}
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-10-21 14:45:43 +00:00
|
|
|
*np++ = '\0';
|
1986-10-22 15:38:24 +00:00
|
|
|
if (ch == EOI) eofseen = 1;
|
|
|
|
else PushBack(ch);
|
1986-03-20 14:52:03 +00:00
|
|
|
|
1986-09-25 19:39:06 +00:00
|
|
|
if (np >= &buf[NUMSIZE]) {
|
|
|
|
tk->TOK_REL = Salloc("0.0", 5);
|
|
|
|
lexerror("floating constant too long");
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-09-25 19:39:06 +00:00
|
|
|
else tk->TOK_REL = Salloc(buf, np - buf) + 1;
|
|
|
|
toktype = real_type;
|
|
|
|
return tk->tk_symb = REAL;
|
|
|
|
|
1986-03-20 14:52:03 +00:00
|
|
|
/*NOTREACHED*/
|
|
|
|
}
|
|
|
|
|
|
|
|
case STEOI:
|
1986-03-24 17:29:57 +00:00
|
|
|
return tk->tk_symb = -1;
|
1986-03-20 14:52:03 +00:00
|
|
|
|
|
|
|
case STCHAR:
|
|
|
|
default:
|
1986-06-06 02:22:09 +00:00
|
|
|
crash("(LLlex) Impossible character class");
|
1986-10-06 20:36:30 +00:00
|
|
|
/*NOTREACHED*/
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|
1986-03-24 17:29:57 +00:00
|
|
|
/*NOTREACHED*/
|
1986-03-20 14:52:03 +00:00
|
|
|
}
|