diff --git a/lang/m2/comp/LLlex.c b/lang/m2/comp/LLlex.c new file mode 100644 index 000000000..7380c3fed --- /dev/null +++ b/lang/m2/comp/LLlex.c @@ -0,0 +1,383 @@ +/* LEXICAL ANALYSER FOR MODULA-2 */ + +#include "input.h" +#include +#include "f_info.h" +#include "Lpars.h" +#include "class.h" +#include "param.h" +#include "idf.h" +#include "LLlex.h" + +long str2long(); +char *GetString(); + +struct token dot, aside; + +static char *RcsId = "$Header$"; + +int +LLlex() +{ + /* LLlex() plays the role of Lexical Analyzer for the parser. + The putting aside of tokens is taken into account. + */ + if (ASIDE) { /* a token is put aside */ + dot = aside; + ASIDE = 0; + } + else { + GetToken(&dot); + if (DOT == EOI) DOT = -1; + } + + return DOT; +} + +int +GetToken(tk) + register struct token *tk; +{ + char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1]; + register int ch, nch; + +again: + LoadChar(ch); + if ((ch & 0200) && ch != EOI) { + fatal("non-ascii '\\%03o' read", ch & 0377); + } + + switch (class(ch)) { + + case STSKIP: + goto again; + + case STNL: + LineNumber++; + goto again; + + case STGARB: + if (040 < ch && ch < 0177) { + lexerror("garbage char %c", ch); + } + else { + lexerror("garbage char \\%03o", ch); + } + goto again; + + case STSIMP: + if (ch == '(') { + LoadChar(nch); + if (nch == '*') { + SkipComment(); + goto again; + } + else { + PushBack(nch); + } + } + return tk->tk_symb = ch; + + case STCOMP: + LoadChar(nch); + switch (ch) { + + case '.': + if (nch == '.') { + return tk->tk_symb = UPTO; + } + PushBack(nch); + return tk->tk_symb = ch; + + case ':': + if (nch == '=') { + return tk->tk_symb = BECOMES; + } + PushBack(nch); + return tk->tk_symb = ch; + + case '<': + if (nch == '=') { + return tk->tk_symb = LESSEQUAL; + } + else + if (nch == '>') { + return tk->tk_symb = UNEQUAL; + } + PushBack(nch); + return tk->tk_symb = ch; + + case '>': + if (nch == '=') { + return tk->tk_symb = GREATEREQUAL; + } + PushBack(nch); + return tk->tk_symb = ch; + + default : + crash("bad STCOMP"); + } + + case STIDF: + { + register char *tg = &buf[0]; + register struct idf *id; + + do { + if (tg - buf < IDFSIZE) *tg++ = ch; + LoadChar(ch); + } while(in_idf(ch)); + + if (ch != EOI) + PushBack(ch); + *tg++ = '\0'; + + id = tk->TOK_IDF = str2idf(buf, 1); + if (!id) fatal("Out of memory"); + return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT; + } + + case STSTR: + tk->TOK_STR = GetString(ch); + return tk->tk_symb = STRING; + + case STNUM: + { + /* The problem arising with the "parsing" of a number + is that we don't know the base in advance so we + have to read the number with the help of a rather + complex finite automaton. + Excuses for the very ugly code! + */ + register char *np = &buf[1]; + /* allow a '-' to be added */ + + *np++ = ch; + + LoadChar(ch); + while (is_oct(ch)) { + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + } + switch (ch) { + case 'H': +Shex: *np++ = '\0'; + /* Type is integer */ + tk->TOK_INT = str2long(&buf[1], 16); + return tk->tk_symb = INTEGER; + + case '8': + case '9': + do { + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + } while (is_dig(ch)); + + if (is_hex(ch)) + goto S2; + if (ch == 'H') + goto Shex; + if (ch == '.') + goto Sreal; + PushBack(ch); + goto Sdec; + + case 'B': + case 'C': + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + if (ch == 'H') + goto Shex; + if (is_hex(ch)) + goto S2; + PushBack(ch); + ch = *--np; + *np++ = '\0'; + /* + * If (ch == 'C') type is a CHAR + * else type is an INTEGER + */ + tk->TOK_INT = str2long(&buf[1], 8); + return tk->tk_symb = INTEGER; + + case 'A': + case 'D': + case 'E': + case 'F': +S2: + do { + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + } while (is_hex(ch)); + if (ch != 'H') { + lexerror("H expected after hex number"); + PushBack(ch); + } + goto Shex; + + case '.': +Sreal: + /* This '.' could be the first of the '..' + token. At this point, we need a look-ahead + of two characters. + */ + LoadChar(ch); + if (ch == '.') { + /* Indeed the '..' token + */ + PushBack(ch); + PushBack(ch); + goto Sdec; + } + + /* a real constant */ + if (np < &buf[NUMSIZE]) { + *np++ = '.'; + } + + if (is_dig(ch)) { + /* Fractional part + */ + do { + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + } while (is_dig(ch)); + } + + if (ch == 'E') { + /* Scale factor + */ + if (np < &buf[NUMSIZE]) { + *np++ = 'E'; + } + LoadChar(ch); + if (ch == '+' || ch == '-') { + /* Signed scalefactor + */ + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + } + if (is_dig(ch)) { + do { + if (np < &buf[NUMSIZE]) { + *np++ = ch; + } + LoadChar(ch); + } while (is_dig(ch)); + } + else { + lexerror("bad scale factor"); + } + } + + PushBack(ch); + + if (np == &buf[NUMSIZE + 1]) { + lexerror("floating constant too long"); + tk->TOK_REL = Salloc("0.0", 5); + } + else { + tk->TOK_REL = Salloc(buf, np - buf) + 1; + } + return tk->tk_symb = REAL; + + default: + PushBack(ch); +Sdec: + *np++ = '\0'; + /* Type is an integer */ + tk->TOK_INT = str2long(&buf[1], 10); + return tk->tk_symb = INTEGER; + } + /*NOTREACHED*/ + } + + case STEOI: + return tk->tk_symb = EOI; + + case STCHAR: + default: + crash("bad character class %d", class(ch)); + } +} + +char * +GetString(upto) +{ + register int ch; + int str_size; + char *str = Malloc(str_size = 32); + register int pos = 0; + + LoadChar(ch); + while (ch != upto) { + if (class(ch) == STNL) { + lexerror("newline in string"); + LineNumber++; + break; + } + if (ch == EOI) { + lexerror("end-of-file in string"); + break; + } + str[pos++] = ch; + if (pos == str_size) { + str = Srealloc(str, str_size += 8); + } + LoadChar(ch); + } + str[pos] = '\0'; + return str; +} + +SkipComment() +{ + /* Skip Modula-2 like comment (* ... *). + Note that comment may be nested. + */ + + register int ch; + register int NestLevel = 0; + + LoadChar(ch); + for (;;) { + if (class(ch) == STNL) { + LineNumber++; + } + else + if (ch == '(') { + LoadChar(ch); + if (ch == '*') { + ++NestLevel; + } + else { + continue; + } + } + else + if (ch == '*') { + LoadChar(ch); + if (ch == ')') { + if (NestLevel-- == 0) { + return; + } + } + else { + continue; + } + } + LoadChar(ch); + } +} diff --git a/lang/m2/comp/LLlex.h b/lang/m2/comp/LLlex.h new file mode 100644 index 000000000..e6a2dd810 --- /dev/null +++ b/lang/m2/comp/LLlex.h @@ -0,0 +1,27 @@ +/* Token Descriptor Definition */ + +/* $Header$ */ + +struct token { + int tk_symb; /* token itself */ + union { + struct idf *tk_idf; /* IDENT */ + char *tk_str; /* STRING */ + struct { /* INTEGER */ + int tk_type; /* type */ + long tk_value; /* value */ + } tk_int; + char *tk_real; /* REAL */ + } tk_data; +}; + +#define TOK_IDF tk_data.tk_idf +#define TOK_STR tk_data.tk_str +#define TOK_ITP tk_data.tk_int.tk_type +#define TOK_INT tk_data.tk_int.tk_value +#define TOK_REL tk_data.tk_real + +extern struct token dot, aside; + +#define DOT dot.tk_symb +#define ASIDE aside.tk_symb diff --git a/lang/m2/comp/LLmessage.c b/lang/m2/comp/LLmessage.c new file mode 100644 index 000000000..fe10602be --- /dev/null +++ b/lang/m2/comp/LLmessage.c @@ -0,0 +1,69 @@ +#include +#include "f_info.h" +#include "idf.h" +#include "LLlex.h" +#include "Lpars.h" + +static char *RcsId = "$Header$"; + +extern char *symbol2str(); +int err_occurred = 0; + +LLmessage(tk) + int tk; +{ + ++err_occurred; + if (tk) { + error("%s missing", symbol2str(tk)); + insert_token(tk); + } + else + error("%s deleted", symbol2str(dot.tk_symb)); +} + +struct idf * +gen_anon_idf() +{ + /* A new idf is created out of nowhere, to serve as an + anonymous name. + */ + static int name_cnt; + char buff[100]; + char *sprintf(); + + sprintf(buff, "#%d in %s, line %u", + ++name_cnt, FileName, LineNumber); + return str2idf(buff, 1); +} + +int +is_anon_idf(idf) + struct idf *idf; +{ + return idf->id_text[0] == '#'; +} + +insert_token(tk) + int tk; +{ + aside = dot; + + dot.tk_symb = tk; + + switch (tk) { + /* The operands need some body */ + case IDENT: + dot.TOK_IDF = gen_anon_idf(); + break; + case STRING: + dot.TOK_STR = Salloc("", 1); + break; + case INTEGER: +/* dot.TOK_ITP = INT; */ + dot.TOK_INT = 1; + break; + case REAL: + dot.TOK_REL = Salloc("0.0", 4); + break; + } +} diff --git a/lang/m2/comp/Makefile b/lang/m2/comp/Makefile new file mode 100644 index 000000000..c367db296 --- /dev/null +++ b/lang/m2/comp/Makefile @@ -0,0 +1,78 @@ +# make modula-2 "compiler" +# $Header$ + +HDIR = ../../em/h +PKGDIR = ../../em/pkg +LIBDIR = ../../em/lib +INCLUDES = -I$(HDIR) -I$(PKGDIR) -I/user1/erikb/h +LSRC = tokenfile.g program.g declar.g expression.g statement.g +CC = cc +GEN = LLgen +GENOPTIONS = +CFLAGS = -DDEBUG -O $(INCLUDES) +LOBJ = tokenfile.o program.o declar.o expression.o statement.o +COBJ = LLlex.o LLmessage.o char.o error.o main.o \ + symbol2str.o tokenname.o idf.o input.o idlist.o +OBJ = $(COBJ) $(LOBJ) Lpars.o +GENFILES= tokenfile.c \ + program.c declar.c expression.c statement.c \ + tokenfile.g symbol2str.c char.c Lpars.c Lpars.h + +all: + make LLfiles + make main + +LLfiles: $(LSRC) + $(GEN) $(GENOPTIONS) $(LSRC) + @touch LLfiles + +main: $(OBJ) Makefile + $(CC) $(LFLAGS) $(OBJ) $(LIBDIR)/libcomp.a /user1/erikb/em/lib/libstr.a /user1/erikb/lib/libsystem.a -o main + size main + +clean: + rm -f $(OBJ) $(GENFILES) LLfiles + +tokenfile.g: tokenname.c make.tokfile + make.tokfile tokenfile.g + +symbol2str.c: tokenname.c make.tokcase + make.tokcase symbol2str.c + +idlist.h: idlist.H make.allocd + +char.c: char.tab tab + ./tab -fchar.tab >char.c + +tab: + $(CC) tab.c -o tab + +depend: + sed '/^#AUTOAUTO/,$$d' Makefile > Makefile.new + echo '#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO' >> Makefile.new + /user1/erikb/bin/mkdep `sources $(OBJ)` |\ + sed 's/\.c:/\.o:/' >> Makefile.new + mv Makefile Makefile.old + mv Makefile.new Makefile + +.SUFFIXES: .H .h .C +.H.h .C.c : + make.allocd < $< > $@ + +#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO +LLlex.o: LLlex.h Lpars.h class.h f_info.h idf.h param.h +LLmessage.o: LLlex.h Lpars.h f_info.h idf.h +char.o: class.h +error.o: LLlex.h f_info.h +main.o: LLlex.h Lpars.h f_info.h idf.h +symbol2str.o: Lpars.h +tokenname.o: Lpars.h idf.h tokenname.h +idf.o: idf.h +input.o: f_info.h input.h +idlist.o: idf.h idlist.h +tokenfile.o: Lpars.h +program.o: Lpars.h idf.h idlist.h +declar.o: LLlex.h Lpars.h idf.h idlist.h +expression.o: Lpars.h +statement.o: Lpars.h +Lpars.o: Lpars.h diff --git a/lang/m2/comp/char.tab b/lang/m2/comp/char.tab new file mode 100644 index 000000000..53b2d69d6 --- /dev/null +++ b/lang/m2/comp/char.tab @@ -0,0 +1,54 @@ +% character tables for mod2 compiler +% $Header$ +%S129 +%F %s, +% +% CHARACTER CLASSES +% +%C +STGARB:\000-\200 +STSKIP: \r\t +STNL:\012\013\014 +STSIMP:#&()*+,-/;=[]^{|}~ +STCOMP:.:<> +STIDF:a-zA-Z +STSTR:"' +STNUM:0-9 +STEOI:\200 +%T#include "class.h" +%Tchar tkclass[] = { +%p +%T}; +% +% INIDF +% +%C +1:a-zA-Z_0-9 +%Tchar inidf[] = { +%F %s, +%p +%T}; +% +% ISDIG +% +%C +1:0-9 +%Tchar isdig[] = { +%p +%T}; +% +% ISHEX +% +%C +1:a-fA-F +%Tchar ishex[] = { +%p +%T}; +% +% ISOCT +% +%C +1:0-7 +%Tchar isoct[] = { +%p +%T}; diff --git a/lang/m2/comp/class.h b/lang/m2/comp/class.h new file mode 100644 index 000000000..322ac0509 --- /dev/null +++ b/lang/m2/comp/class.h @@ -0,0 +1,38 @@ +/* U S E O F C H A R A C T E R C L A S S E S */ + +/* $Header$ */ + +/* As a starter, chars are divided into classes, according to which + token they can be the start of. + At present such a class number is supposed to fit in 4 bits. +*/ + +#define class(ch) (tkclass[ch]) + +/* Being the start of a token is, fortunately, a mutual exclusive + property, so, as there are less than 16 classes they can be + packed in 4 bits. +*/ + +#define STSKIP 0 /* spaces and so on: skipped characters */ +#define STNL 1 /* newline character(s): update linenumber etc. */ +#define STGARB 2 /* garbage ascii character: not allowed */ +#define STSIMP 3 /* this character can occur as token */ +#define STCOMP 4 /* this one can start a compound token */ +#define STIDF 5 /* being the initial character of an identifier */ +#define STCHAR 6 /* the starter of a character constant */ +#define STSTR 7 /* the starter of a string */ +#define STNUM 8 /* the starter of a numeric constant */ +#define STEOI 9 /* End-Of-Information mark */ + +/* But occurring inside a token is not, so we need 1 bit for each + class. This is implemented as a collection of tables to speed up + the decision whether a character has a special meaning. +*/ +#define in_idf(ch) (inidf[ch]) +#define is_oct(ch) (isoct[ch]) +#define is_dig(ch) (isdig[ch]) +#define is_hex(ch) (ishex[ch]) + +extern char tkclass[]; +extern char inidf[], isoct[], isdig[], ishex[]; diff --git a/lang/m2/comp/declar.g b/lang/m2/comp/declar.g new file mode 100644 index 000000000..f1f77a05d --- /dev/null +++ b/lang/m2/comp/declar.g @@ -0,0 +1,181 @@ +{ +#include "idf.h" +#include "idlist.h" +#include "LLlex.h" + +static char *RcsId = "$Header$"; +} + +ProcedureDeclaration: + ProcedureHeading ';' block IDENT +; + +ProcedureHeading: + PROCEDURE IDENT FormalParameters? +; + +block: + declaration* [ BEGIN StatementSequence ]? END +; + +declaration: + CONST [ ConstantDeclaration ';' ]* +| + TYPE [ TypeDeclaration ';' ]* +| + VAR [ VariableDeclaration ';' ]* +| + ProcedureDeclaration ';' +| + ModuleDeclaration ';' +; + +FormalParameters: + '(' [ FPSection [ ';' FPSection ]* ]? ')' + [ ':' qualident ]? +; + +FPSection +{ + struct id_list *FPList; +} : + VAR? IdentList(&FPList) ':' FormalType +; + +FormalType: + [ ARRAY OF ]? qualident +; + +TypeDeclaration: + IDENT '=' type +; + +type: + SimpleType +| + ArrayType +| + RecordType +| + SetType +| + PointerType +| + ProcedureType +; + +SimpleType: + qualident + [ + + | + SubrangeType + /* + * The subrange type is given a base type by the + * qualident (this is new modula-2). + */ + ] +| + enumeration +| + SubrangeType +; + +enumeration +{ + struct id_list *EnumList; +} : + '(' IdentList(&EnumList) ')' +; + +IdentList(struct id_list **p;) +{ + register struct id_list *q = new_id_list(); +} : + IDENT { q->id_ptr = dot.TOK_IDF; } + [ + ',' IDENT { q->next = new_id_list(); + q = q->next; + q->id_ptr = dot.TOK_IDF; + } + ]* + { q->next = 0; + *p = q; + } +; + +SubrangeType: + /* + This is not exactly the rule in the new report, but see + the rule for "SimpleType". + */ + '[' ConstExpression UPTO ConstExpression ']' +; + +ArrayType: + ARRAY SimpleType [ ',' SimpleType ]* OF type +; + +RecordType: + RECORD FieldListSequence END +; + +FieldListSequence: + FieldList [ ';' FieldList ]* +; + +FieldList +{ + struct id_list *FldList; +} : +[ + IdentList(&FldList) ':' type +| + CASE IDENT? /* Changed rule in new modula-2 */ + ':' qualident + OF variant [ '|' variant ]* + [ ELSE FieldListSequence ]? + END +]? +; + +variant: + [ CaseLabelList ':' FieldListSequence ]? + /* Changed rule in new modula-2 */ +; + +CaseLabelList: + CaseLabels [ ',' CaseLabels ]* +; + +CaseLabels: + ConstExpression [ UPTO ConstExpression ]? +; + +SetType: + SET OF SimpleType +; + +PointerType: + POINTER TO type +; + +ProcedureType: + PROCEDURE FormalTypeList? +; + +FormalTypeList: + '(' [ VAR? FormalType [ ',' VAR? FormalType ]* ]? ')' + [ ':' qualident ]? +; + +ConstantDeclaration: + IDENT '=' ConstExpression +; + +VariableDeclaration +{ + struct id_list *VarList; +} : + IdentList(&VarList) ':' type +; diff --git a/lang/m2/comp/error.c b/lang/m2/comp/error.c new file mode 100644 index 000000000..3e04da2e0 --- /dev/null +++ b/lang/m2/comp/error.c @@ -0,0 +1,170 @@ +/* E R R O R A N D D I A G N O S T I C R O U T I N E S */ + +/* This file contains the (non-portable) error-message and diagnostic + giving functions. Be aware that they are called with a variable + number of arguments! +*/ + +#include +#include "input.h" +#include "f_info.h" +#include "LLlex.h" + +static char *RcsId = "$Header$"; + +#define ERROUT stderr + +#define ERROR 1 +#define WARNING 2 +#define LEXERROR 3 +#define LEXWARNING 4 +#define CRASH 5 +#define FATAL 6 +#define NONFATAL 7 +#ifdef DEBUG +#define VDEBUG 8 +#endif DEBUG + +int err_occurred; +/* + extern int ofd; /* compact.c * / + #define compiling (ofd >= 0) +*/ + +extern char options[]; + +/* There are two general error message giving functions: + error() : syntactic and semantic error messages + lexerror() : lexical and pre-processor error messages + The difference lies in the fact that the first function deals with + tokens already read in by the lexical analyzer so the name of the + file it comes from and the linenumber must be retrieved from the + token instead of looking at the global variables LineNumber and + FileName. +*/ + +/*VARARGS1*/ +error(fmt, args) + char *fmt; +{ + /* + if (compiling) + C_ms_err(); + */ + ++err_occurred; + _error(ERROR, fmt, &args); +} + +#ifdef DEBUG +debug(fmt, args) + char *fmt; +{ + if (options['D']) + _error(VDEBUG, fmt, &args); +} +#endif DEBUG + +/*VARARGS1*/ +lexerror(fmt, args) + char *fmt; +{ + /* + if (compiling) + C_ms_err(); + */ + ++err_occurred; + _error(LEXERROR, fmt, &args); +} + +/*VARARGS1*/ +lexwarning(fmt, args) char *fmt; { + if (options['w']) return; + _error(LEXWARNING, fmt, &args); +} + +/*VARARGS1*/ +crash(fmt, args) + char *fmt; + int args; +{ + /* + if (compiling) + C_ms_err(); + */ + _error(CRASH, fmt, &args); + fflush(ERROUT); + fflush(stderr); + fflush(stdout); + /* + cclose(); + */ + abort(); /* produce core by "Illegal Instruction" */ + /* this should be changed into exit(1) */ +} + +/*VARARGS1*/ +fatal(fmt, args) + char *fmt; + int args; +{ + /* + if (compiling) + C_ms_err(); + */ + _error(FATAL, fmt, &args); + exit(-1); +} + +/*VARARGS1*/ +nonfatal(fmt, args) + char *fmt; + int args; +{ + _error(NONFATAL, fmt, &args); +} + +/*VARARGS1*/ +warning(fmt, args) + char *fmt; +{ + if (options['w']) return; + _error(WARNING, fmt, &args); +} + +_error(class, fmt, argv) + int class; + char *fmt; + int argv[]; +{ + + switch (class) { + + case ERROR: + case LEXERROR: + fprintf(ERROUT, "%s, line %ld: ", FileName, LineNumber); + break; + case WARNING: + case LEXWARNING: + fprintf(ERROUT, "%s, line %ld: (warning) ", + FileName, LineNumber); + break; + case CRASH: + fprintf(ERROUT, "CRASH\007 %s, line %ld: \n", + FileName, LineNumber); + break; + case FATAL: + fprintf(ERROUT, "%s, line %ld: fatal error -- ", + FileName, LineNumber); + break; + case NONFATAL: + fprintf(ERROUT, "warning: "); /* no line number ??? */ + break; +#ifdef DEBUG + case VDEBUG: + fprintf(ERROUT, "-D "); + break; +#endif DEBUG + } + _doprnt(fmt, argv, ERROUT); + fprintf(ERROUT, "\n"); +} diff --git a/lang/m2/comp/expression.g b/lang/m2/comp/expression.g new file mode 100644 index 000000000..c56441fff --- /dev/null +++ b/lang/m2/comp/expression.g @@ -0,0 +1,97 @@ +{ +static char *RcsId = "$Header$"; +} + +number: + INTEGER +| + REAL +; + +qualident: + IDENT selector* +; + +selector: + '.' /* field */ IDENT +; + +ExpList: + expression [ ',' expression ]* +; + +ConstExpression: + expression + /* + * Changed rule in new Modula-2. + * Check that the expression is a constant expression and evaluate! + */ +; + +expression: + SimpleExpression [ relation SimpleExpression ]? +; + +relation: + '=' | '#' | UNEQUAL | '<' | LESSEQUAL | '>' | GREATEREQUAL | IN +; + +SimpleExpression: + [ '+' | '-' ]? term [ AddOperator term ]* +; + +AddOperator: + '+' | '-' | OR +; + +term: + factor [ MulOperator factor ]* +; + +MulOperator: + '*' | '/' | DIV | MOD | AND | '&' +; + +factor: + qualident + [ + designator_tail? ActualParameters? + | + bare_set + ] +| + bare_set +| %default + number +| + STRING +| + '(' expression ')' +| + NOT factor +; + +bare_set: + '{' [ element [ ',' element ]* ]? '}' +; + +ActualParameters: + '(' ExpList? ')' +; + +element: + expression [ UPTO expression ]? +; + +designator: + qualident designator_tail? +; + +designator_tail: + visible_designator_tail + [ selector | visible_designator_tail ]* +; + +visible_designator_tail: + '[' ExpList ']' | '^' +; diff --git a/lang/m2/comp/f_info.h b/lang/m2/comp/f_info.h new file mode 100644 index 000000000..c04496ad9 --- /dev/null +++ b/lang/m2/comp/f_info.h @@ -0,0 +1,11 @@ +/* $Header$ */ + +struct f_info { + unsigned int f_lineno; + char *f_filename; + char *f_workingdir; +}; + +extern struct f_info file_info; +#define LineNumber file_info.f_lineno +#define FileName file_info.f_filename diff --git a/lang/m2/comp/idf.c b/lang/m2/comp/idf.c new file mode 100644 index 000000000..d1b0380a2 --- /dev/null +++ b/lang/m2/comp/idf.c @@ -0,0 +1,4 @@ +/* $Header$ */ + +#include "idf.h" +#include diff --git a/lang/m2/comp/idf.h b/lang/m2/comp/idf.h new file mode 100644 index 000000000..46f7af0e4 --- /dev/null +++ b/lang/m2/comp/idf.h @@ -0,0 +1,5 @@ +/* $Header$ */ + +#define IDF_TYPE int +#define id_reserved id_user +#include diff --git a/lang/m2/comp/idlist.H b/lang/m2/comp/idlist.H new file mode 100644 index 000000000..9d320bb99 --- /dev/null +++ b/lang/m2/comp/idlist.H @@ -0,0 +1,12 @@ +/* $Header$ */ + +#include + +/* Structure to link idf structures together +*/ +struct id_list { + struct id_list *next; + struct idf *id_ptr; +}; + +/* ALLOCDEF "id_list" */ diff --git a/lang/m2/comp/idlist.c b/lang/m2/comp/idlist.c new file mode 100644 index 000000000..3b067af2b --- /dev/null +++ b/lang/m2/comp/idlist.c @@ -0,0 +1,20 @@ +static char *RcsId = "$Header$"; + +#include "idf.h" +#include "idlist.h" + +struct id_list *h_id_list; /* Header of free list */ + +/* FreeIdList: take a list of id_list structures and put them + on the free list of id_list structures +*/ +FreeIdList(p) + struct id_list *p; +{ + register struct id_list *q; + + while (q = p) { + p = p->next; + free_id_list(q); + } +} diff --git a/lang/m2/comp/input.c b/lang/m2/comp/input.c new file mode 100644 index 000000000..a55c4fd58 --- /dev/null +++ b/lang/m2/comp/input.c @@ -0,0 +1,6 @@ +/* $Header$ */ + +#include "f_info.h" +struct f_info file_info; +#include "input.h" +#include diff --git a/lang/m2/comp/input.h b/lang/m2/comp/input.h new file mode 100644 index 000000000..3fcb7b8b8 --- /dev/null +++ b/lang/m2/comp/input.h @@ -0,0 +1,7 @@ +/* $Header$ */ + +#define INP_NPUSHBACK 2 +#define INP_TYPE struct f_info +#define INP_VAR file_info +#define INP_READ_IN_ONE +#include diff --git a/lang/m2/comp/main.c b/lang/m2/comp/main.c new file mode 100644 index 000000000..ba0b0b970 --- /dev/null +++ b/lang/m2/comp/main.c @@ -0,0 +1,121 @@ +/* mod2 -- compiler , althans: een aanzet daartoe */ + +#include +#undef BUFSIZ /* Really neccesary??? */ +#include +#include "input.h" +#include "f_info.h" +#include "idf.h" +#include "LLlex.h" +#include "Lpars.h" + +static char *RcsId = "$Header:"; + +char options[128]; +char *ProgName; +extern int err_occurred; + +main(argc, argv) + char *argv[]; +{ + register Nargc = 1; + register char **Nargv = &argv[0]; + + ProgName = *argv++; + +# ifdef DEBUG + setbuf(stdout, (char *) 0); +# endif + while (--argc > 0) { + if (**argv == '-') + Option(*argv++); + else + Nargv[Nargc++] = *argv++; + } + Nargv[Nargc] = 0; /* terminate the arg vector */ + if (Nargc != 2) { + fprintf(stderr, "%s: Use one file argument\n", ProgName); + return 1; + } +#ifdef DEBUG + printf("Mod2 compiler -- Debug version\n"); + debug("-D: Debugging on"); +#endif DEBUG + return !Compile(Nargv[1]); +} + +Compile(src) + char *src; +{ + extern struct tokenname tkidf[]; + +#ifdef DEBUG + printf("%s\n", src); +#endif DEBUG + if (! InsertFile(src, (char **) 0)) { + fprintf(stderr,"%s: cannot open %s\n", ProgName, src); + return 0; + } + LineNumber = 1; + FileName = src; + init_idf(); + reserve(tkidf); +#ifdef DEBUG + if (options['L']) + LexScan(); + else if (options['T']) + TimeScan(); + else +#endif DEBUG + CompUnit(); +#ifdef DEBUG + if (options['h']) hash_stat(); +#endif DEBUG + if (err_occurred) return 0; + return 1; +} + +#ifdef DEBUG +LexScan() +{ + register int symb; + + while ((symb = LLlex()) != EOF) { + printf(">>> %s ", symbol2str(symb)); + switch(symb) { + + case IDENT: + printf("%s\n", dot.TOK_IDF->id_text); + break; + + case INTEGER: + printf("%ld\n", dot.TOK_INT); + break; + + case REAL: + printf("%s\n", dot.TOK_REL); + break; + + case STRING: + printf("\"%s\"\n", dot.TOK_STR); + break; + + default: + putchar('\n'); + } + } +} + +TimeScan() { + while (LLlex() != EOF) /* nothing */; +} +#endif + +Option(str) + char *str; +{ +#ifdef DEBUG + debug("option %c", str[1]); +#endif DEBUG + options[str[1]]++; /* switch option on */ +} diff --git a/lang/m2/comp/make.allocd b/lang/m2/comp/make.allocd new file mode 100755 index 000000000..450584aa3 --- /dev/null +++ b/lang/m2/comp/make.allocd @@ -0,0 +1,17 @@ +sed -e ' +s:^.*[ ]ALLOCDEF[ ].*"\(.*\)".*$:\ +/* allocation definitions of struct \1 */\ +extern char *st_alloc();\ +extern struct \1 *h_\1;\ +#define new_\1() ((struct \1 *) \\\ + st_alloc((char **)\&h_\1, sizeof(struct \1)))\ +#define free_\1(p) st_free(p, h_\1, sizeof(struct \1))\ +:' -e ' +s:^.*[ ]STATICALLOCDEF[ ].*"\(.*\)".*$:\ +/* allocation definitions of struct \1 */\ +extern char *st_alloc();\ +static struct \1 *h_\1;\ +#define new_\1() ((struct \1 *) \\\ + st_alloc((char **)\&h_\1, sizeof(struct \1)))\ +#define free_\1(p) st_free(p, h_\1, sizeof(struct \1))\ +:' diff --git a/lang/m2/comp/make.tokcase b/lang/m2/comp/make.tokcase new file mode 100755 index 000000000..ef32292f9 --- /dev/null +++ b/lang/m2/comp/make.tokcase @@ -0,0 +1,34 @@ +cat <<'--EOT--' +#include "Lpars.h" + +char * +symbol2str(tok) + int tok; +{ + static char buf[2] = { '\0', '\0' }; + + if (040 <= tok && tok < 0177) { + buf[0] = tok; + buf[1] = '\0'; + return buf; + } + switch (tok) { +--EOT-- +sed ' +/{[A-Z]/!d +s/.*{\(.*\),.*\(".*"\).*$/ case \1 :\ + return \2;/ +' +cat <<'--EOT--' + case '\n': + case '\f': + case '\v': + case '\r': + case '\t': + buf[0] = tok; + return buf; + default: + return "bad token"; + } +} +--EOT-- diff --git a/lang/m2/comp/make.tokfile b/lang/m2/comp/make.tokfile new file mode 100755 index 000000000..494b7e3cc --- /dev/null +++ b/lang/m2/comp/make.tokfile @@ -0,0 +1,6 @@ +sed ' +/{[A-Z]/!d +s/.*{// +s/,.*// +s/.*/%token &;/ +' diff --git a/lang/m2/comp/param.h b/lang/m2/comp/param.h new file mode 100644 index 000000000..cd4d2cba4 --- /dev/null +++ b/lang/m2/comp/param.h @@ -0,0 +1,4 @@ +/* $Header$ */ + +#define IDFSIZE 256 +#define NUMSIZE 256 diff --git a/lang/m2/comp/program.g b/lang/m2/comp/program.g new file mode 100644 index 000000000..89eef00a7 --- /dev/null +++ b/lang/m2/comp/program.g @@ -0,0 +1,116 @@ +/* + Program: Modula-2 grammar in LL(1) form + Version: Mon Feb 24 14:29:39 MET 1986 +*/ + +/* + The grammar as given by Wirth is already almost LL(1); the + main problem is that the full form of a qualified designator + may be: + [ module_ident '.' ]* IDENT [ '.' field_ident ]* + which is quite confusing to an LL(1) parser. Rather than + resorting to context-sensitive techniques, I have decided + to render this as: + IDENT [ '.' IDENT ]* + on the grounds that it is quite natural to consider the first + IDENT to be the name of the object and regard the others as + field identifiers. +*/ + +{ +#include "idf.h" +#include "idlist.h" + +static char *RcsId = "$Header$"; +} + +%lexical LLlex; + +%start CompUnit, CompilationUnit; + +ModuleDeclaration: + MODULE IDENT priority? ';' import* export? block IDENT +; + +priority: + '[' ConstExpression ']' +; + +export +{ + struct id_list *ExportList; +} : + EXPORT QUALIFIED? IdentList(&ExportList) ';' +; + +import +{ + struct id_list *ImportList; +} : + [ FROM + IDENT + ]? + IMPORT IdentList(&ImportList) ';' + /* + When parsing a global module, this is the place where we must + read already compiled definition modules. + If the FROM clause is present, the identifier in it is a module + name, otherwise the names in the import list are module names. + */ +; + +DefinitionModule: + DEFINITION + { +#ifdef DEBUG + debug("Definition module"); +#endif DEBUG + } + MODULE IDENT ';' import* + /* export? + + New Modula-2 does not have export lists in definition modules. + */ + definition* END IDENT '.' +; + +definition: + CONST [ ConstantDeclaration ';' ]* +| + TYPE + [ IDENT + [ '=' type + | /* empty */ + /* + Here, the exported type has a hidden implementation. + The export is said to be opaque. + It is restricted to pointer types. + */ + ] + ';' + ]* +| + VAR [ VariableDeclaration ';' ]* +| + ProcedureHeading ';' +; + +ProgramModule: + MODULE + { +#ifdef DEBUG + debug("Program module"); +#endif DEBUG + } + IDENT priority? ';' import* block IDENT '.' +; + +Module: + DefinitionModule +| + IMPLEMENTATION? ProgramModule +; + +CompilationUnit: + Module +; diff --git a/lang/m2/comp/statement.g b/lang/m2/comp/statement.g new file mode 100644 index 000000000..3e736a5ae --- /dev/null +++ b/lang/m2/comp/statement.g @@ -0,0 +1,98 @@ +{ +static char *RcsId = "$Header$"; +} + +statement: +[ + /* + * This part is not in the reference grammar. The reference grammar + * states : assignment | ProcedureCall | ... + * but this gives LL(1) conflicts + */ + designator + [ + ActualParameters? + | + BECOMES expression + ] + /* + * end of changed part + */ +| + IfStatement +| + CaseStatement +| + WhileStatement +| + RepeatStatement +| + LoopStatement +| + ForStatement +| + WithStatement +| + EXIT +| + RETURN expression? +]? +; + +/* + * The next two rules in-line in "Statement", because of an LL(1) conflict + +assignment: + designator BECOMES expression +; + +ProcedureCall: + designator ActualParameters? +; +*/ + +StatementSequence: + statement [ ';' statement ]* +; + +IfStatement: + IF expression THEN StatementSequence + [ ELSIF expression THEN StatementSequence ]* + [ ELSE StatementSequence ]? + END +; + +CaseStatement: + CASE expression OF case [ '|' case ]* + [ ELSE StatementSequence ]? + END +; + +case: + [ CaseLabelList ':' StatementSequence ]? + /* This rule is changed in new modula-2 */ +; + +WhileStatement: + WHILE expression DO StatementSequence END +; + +RepeatStatement: + REPEAT StatementSequence UNTIL expression +; + +ForStatement: + FOR IDENT + BECOMES expression + TO expression + [ BY ConstExpression ]? + DO StatementSequence END +; + +LoopStatement: + LOOP StatementSequence END +; + +WithStatement: + WITH designator DO StatementSequence END +; diff --git a/lang/m2/comp/tab.c b/lang/m2/comp/tab.c new file mode 100644 index 000000000..17065cf9b --- /dev/null +++ b/lang/m2/comp/tab.c @@ -0,0 +1,295 @@ +/* @cc tab.c -o $INSTALLDIR/tab@ + tab - table generator + + Author: Erik Baalbergen (..tjalk!erikb) +*/ + +#include + +static char *RcsId = "$Header$"; + +#define MAXTAB 10000 +#define MAXBUF 10000 +#define COMCOM '-' +#define FILECOM '%' + +int InputForm = 'c'; +char OutputForm[MAXBUF] = "%s,\n"; +int TabSize = 257; +char *Table[MAXTAB]; +char *Name; +char *ProgCall; + +main(argc, argv) + char *argv[]; +{ + ProgCall = *argv++; + argc--; + while (argc-- > 0) { + if (**argv == COMCOM) { + option(*argv++); + } + else { + process(*argv++, InputForm); + } + } +} + +char * +Salloc(s) + char *s; +{ + char *malloc(); + char *ns = malloc(strlen(s) + 1); + + if (ns) { + strcpy(ns, s); + } + return ns; +} + +option(str) + char *str; +{ + /* note that *str indicates the source of the option: + either COMCOM (from command line) or FILECOM (from a file). + */ + switch (*++str) { + + case ' ': /* command */ + case '\t': + case '\0': + break; + case 'I': + InputForm = *++str; + break; + case 'f': + if (*++str == '\0') { + fprintf(stderr, "%s: -f: name expected\n", ProgCall); + exit(1); + } + DoFile(str); + break; + case 'F': + sprintf(OutputForm, "%s\n", ++str); + break; + case 'T': + printf("%s\n", ++str); + break; + case 'p': + PrintTable(); + break; + case 'C': + ClearTable(); + break; + case 'S': + { + register i = stoi(++str); + + if (i <= 0 || i > MAXTAB) { + fprintf(stderr, "%s: size would exceed maximum\n", + ProgCall); + } + else { + TabSize = i; + } + break; + } + default: + fprintf(stderr, "%s: bad option -%s\n", ProgCall, str); + } +} + +ClearTable() +{ + register i; + + for (i = 0; i < MAXTAB; i++) { + Table[i] = 0; + } +} + +PrintTable() +{ + register i; + + for (i = 0; i < TabSize; i++) { + if (Table[i]) { + printf(OutputForm, Table[i]); + } + else { + printf(OutputForm, "0"); + } + } +} + +process(str, format) + char *str; +{ + char *cstr = str; + char *Name = cstr; /* overwrite original string! */ + + /* strip of the entry name + */ + while (*str && *str != ':') { + if (*str == '\\') { + ++str; + } + *cstr++ = *str++; + } + + if (*str != ':') { + fprintf(stderr, "%s: bad specification: \"%s\", ignored\n", + ProgCall, Name); + return 0; + } + *cstr = '\0'; + str++; + + switch (format) { + + case 'c': + return c_proc(str, Name); + default: + fprintf(stderr, "%s: bad input format\n", ProgCall); + } + return 0; +} + +c_proc(str, Name) + char *str; + char *Name; +{ + int ch, ch2; + int quoted(); + + while (*str) { + if (*str == '\\') { + ch = quoted(&str); + } + else { + ch = *str++; + } + if (*str == '-') { + if (*++str == '\\') { + ch2 = quoted(&str); + } + else { + if (ch2 = *str++); + else str--; + } + if (ch > ch2) { + fprintf(stderr, "%s: bad range\n", ProgCall); + return 0; + } + if (ch >= 0 && ch2 <= 255) + while (ch <= ch2) + Table[ch++] = Salloc(Name); + } + else { + if (ch >= 0 && ch <= 255) + Table[ch] = Salloc(Name); + } + } + return 1; +} + +int +quoted(pstr) + char **pstr; +{ + register int ch; + register int i; + register char *str = *pstr; + + if ((*++str >= '0') && (*str <= '9')) { + ch = 0; + for (i = 0; i < 3; i++) { + ch = 8 * ch + *str - '0'; + if (*++str < '0' || *str > '9') + break; + } + } + else { + switch (*str++) { + + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case 'b': + ch = '\b'; + break; + case 'r': + ch = '\r'; + break; + case 'f': + ch = '\f'; + break; + default : + ch = *str; + } + } + *pstr = str; + return ch & 0377; +} + +int +stoi(str) + char *str; +{ + register i = 0; + + while (*str >= '0' && *str <= '9') { + i = i * 10 + *str++ - '0'; + } + return i; +} + +char * +getline(s, n, fp) + char *s; + FILE *fp; +{ + register c = getc(fp); + char *str = s; + + while (n--) { + if (c == EOF) { + return NULL; + } + else + if (c == '\n') { + *str++ = '\0'; + return s; + } + *str++ = c; + c = getc(fp); + } + s[n - 1] = '\0'; + return s; +} + +#define BUFSIZE 1024 + +DoFile(name) + char *name; +{ + char text[BUFSIZE]; + FILE *fp; + + if ((fp = fopen(name, "r")) == NULL) { + fprintf(stderr, "%s: cannot read file %s\n", ProgCall, name); + exit(1); + } + while (getline(text, BUFSIZE, fp) != NULL) { + if (text[0] == FILECOM) { + option(text); + } + else { + process(text, InputForm); + } + } +} diff --git a/lang/m2/comp/tokenname.c b/lang/m2/comp/tokenname.c new file mode 100644 index 000000000..32e658a1f --- /dev/null +++ b/lang/m2/comp/tokenname.c @@ -0,0 +1,99 @@ +#include "tokenname.h" +#include "Lpars.h" +#include "idf.h" + +/* To centralize the declaration of %tokens, their presence in this + file is taken as their declaration. The Makefile will produce + a grammar file (tokenfile.g) from this file. This scheme ensures + that all tokens have a printable name. + Also, the "token2str.c" file is produced from this file. +*/ + +static char *RcsId = "$Header$"; + +struct tokenname tkspec[] = { /* the names of the special tokens */ + {IDENT, "identifier"}, + {STRING, "string"}, + {INTEGER, "integer"}, + {REAL, "real"}, + {0, ""} +}; + +struct tokenname tkcomp[] = { /* names of the composite tokens */ + {UNEQUAL, "<>"}, + {LESSEQUAL, "<="}, + {GREATEREQUAL, ">="}, + {UPTO, ".."}, + {BECOMES, ":="}, + {0, ""} +}; + +struct tokenname tkidf[] = { /* names of the identifier tokens */ + {AND, "AND"}, + {ARRAY, "ARRAY"}, + {BEGIN, "BEGIN"}, + {BY, "BY"}, + {CASE, "CASE"}, + {CONST, "CONST"}, + {DEFINITION, "DEFINITION"}, + {DIV, "DIV"}, + {DO, "DO"}, + {ELSE, "ELSE"}, + {ELSIF, "ELSIF"}, + {END, "END"}, + {EXIT, "EXIT"}, + {EXPORT, "EXPORT"}, + {FOR, "FOR"}, + {FROM, "FROM"}, + {IF, "IF"}, + {IMPLEMENTATION, "IMPLEMENTATION"}, + {IMPORT, "IMPORT"}, + {IN, "IN"}, + {LOOP, "LOOP"}, + {MOD, "MOD"}, + {MODULE, "MODULE"}, + {NOT, "NOT"}, + {OF, "OF"}, + {OR, "OR"}, + {POINTER, "POINTER"}, + {PROCEDURE, "PROCEDURE"}, + {QUALIFIED, "QUALIFIED"}, + {RECORD, "RECORD"}, + {REPEAT, "REPEAT"}, + {RETURN, "RETURN"}, + {SET, "SET"}, + {THEN, "THEN"}, + {TO, "TO"}, + {TYPE, "TYPE"}, + {UNTIL, "UNTIL"}, + {VAR, "VAR"}, + {WHILE, "WHILE"}, + {WITH, "WITH"}, + {0, ""} +}; + +struct tokenname tkinternal[] = { /* internal keywords */ + {0, "0"} +}; + +struct tokenname tkstandard[] = { /* standard identifiers */ + {0, ""} +}; + +/* Some routines to handle tokennames */ + +reserve(resv) + register struct tokenname *resv; +{ + /* The names of the tokens described in resv are entered + as reserved words. + */ + register struct idf *p; + + while (resv->tn_symbol) { + p = str2idf(resv->tn_name, 0); + if (!p) fatal("Out of Memory"); + p->id_reserved = resv->tn_symbol; + resv++; + } +} diff --git a/lang/m2/comp/tokenname.h b/lang/m2/comp/tokenname.h new file mode 100644 index 000000000..2b545da45 --- /dev/null +++ b/lang/m2/comp/tokenname.h @@ -0,0 +1,7 @@ +/* $Header$ */ +struct tokenname { /* Used for defining the name of a + token as identified by its symbol + */ + int tn_symbol; + char *tn_name; +};