Initial version

This commit is contained in:
ceriel 1986-03-20 14:52:03 +00:00
parent 9f8f8c1a9c
commit b683a21217
26 changed files with 1959 additions and 0 deletions

383
lang/m2/comp/LLlex.c Normal file
View file

@ -0,0 +1,383 @@
/* LEXICAL ANALYSER FOR MODULA-2 */
#include "input.h"
#include <alloc.h>
#include "f_info.h"
#include "Lpars.h"
#include "class.h"
#include "param.h"
#include "idf.h"
#include "LLlex.h"
long str2long();
char *GetString();
struct token dot, aside;
static char *RcsId = "$Header$";
int
LLlex()
{
/* LLlex() plays the role of Lexical Analyzer for the parser.
The putting aside of tokens is taken into account.
*/
if (ASIDE) { /* a token is put aside */
dot = aside;
ASIDE = 0;
}
else {
GetToken(&dot);
if (DOT == EOI) DOT = -1;
}
return DOT;
}
int
GetToken(tk)
register struct token *tk;
{
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
register int ch, nch;
again:
LoadChar(ch);
if ((ch & 0200) && ch != EOI) {
fatal("non-ascii '\\%03o' read", ch & 0377);
}
switch (class(ch)) {
case STSKIP:
goto again;
case STNL:
LineNumber++;
goto again;
case STGARB:
if (040 < ch && ch < 0177) {
lexerror("garbage char %c", ch);
}
else {
lexerror("garbage char \\%03o", ch);
}
goto again;
case STSIMP:
if (ch == '(') {
LoadChar(nch);
if (nch == '*') {
SkipComment();
goto again;
}
else {
PushBack(nch);
}
}
return tk->tk_symb = ch;
case STCOMP:
LoadChar(nch);
switch (ch) {
case '.':
if (nch == '.') {
return tk->tk_symb = UPTO;
}
PushBack(nch);
return tk->tk_symb = ch;
case ':':
if (nch == '=') {
return tk->tk_symb = BECOMES;
}
PushBack(nch);
return tk->tk_symb = ch;
case '<':
if (nch == '=') {
return tk->tk_symb = LESSEQUAL;
}
else
if (nch == '>') {
return tk->tk_symb = UNEQUAL;
}
PushBack(nch);
return tk->tk_symb = ch;
case '>':
if (nch == '=') {
return tk->tk_symb = GREATEREQUAL;
}
PushBack(nch);
return tk->tk_symb = ch;
default :
crash("bad STCOMP");
}
case STIDF:
{
register char *tg = &buf[0];
register struct idf *id;
do {
if (tg - buf < IDFSIZE) *tg++ = ch;
LoadChar(ch);
} while(in_idf(ch));
if (ch != EOI)
PushBack(ch);
*tg++ = '\0';
id = tk->TOK_IDF = str2idf(buf, 1);
if (!id) fatal("Out of memory");
return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
}
case STSTR:
tk->TOK_STR = GetString(ch);
return tk->tk_symb = STRING;
case STNUM:
{
/* The problem arising with the "parsing" of a number
is that we don't know the base in advance so we
have to read the number with the help of a rather
complex finite automaton.
Excuses for the very ugly code!
*/
register char *np = &buf[1];
/* allow a '-' to be added */
*np++ = ch;
LoadChar(ch);
while (is_oct(ch)) {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
}
switch (ch) {
case 'H':
Shex: *np++ = '\0';
/* Type is integer */
tk->TOK_INT = str2long(&buf[1], 16);
return tk->tk_symb = INTEGER;
case '8':
case '9':
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_dig(ch));
if (is_hex(ch))
goto S2;
if (ch == 'H')
goto Shex;
if (ch == '.')
goto Sreal;
PushBack(ch);
goto Sdec;
case 'B':
case 'C':
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
if (ch == 'H')
goto Shex;
if (is_hex(ch))
goto S2;
PushBack(ch);
ch = *--np;
*np++ = '\0';
/*
* If (ch == 'C') type is a CHAR
* else type is an INTEGER
*/
tk->TOK_INT = str2long(&buf[1], 8);
return tk->tk_symb = INTEGER;
case 'A':
case 'D':
case 'E':
case 'F':
S2:
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_hex(ch));
if (ch != 'H') {
lexerror("H expected after hex number");
PushBack(ch);
}
goto Shex;
case '.':
Sreal:
/* This '.' could be the first of the '..'
token. At this point, we need a look-ahead
of two characters.
*/
LoadChar(ch);
if (ch == '.') {
/* Indeed the '..' token
*/
PushBack(ch);
PushBack(ch);
goto Sdec;
}
/* a real constant */
if (np < &buf[NUMSIZE]) {
*np++ = '.';
}
if (is_dig(ch)) {
/* Fractional part
*/
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_dig(ch));
}
if (ch == 'E') {
/* Scale factor
*/
if (np < &buf[NUMSIZE]) {
*np++ = 'E';
}
LoadChar(ch);
if (ch == '+' || ch == '-') {
/* Signed scalefactor
*/
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
}
if (is_dig(ch)) {
do {
if (np < &buf[NUMSIZE]) {
*np++ = ch;
}
LoadChar(ch);
} while (is_dig(ch));
}
else {
lexerror("bad scale factor");
}
}
PushBack(ch);
if (np == &buf[NUMSIZE + 1]) {
lexerror("floating constant too long");
tk->TOK_REL = Salloc("0.0", 5);
}
else {
tk->TOK_REL = Salloc(buf, np - buf) + 1;
}
return tk->tk_symb = REAL;
default:
PushBack(ch);
Sdec:
*np++ = '\0';
/* Type is an integer */
tk->TOK_INT = str2long(&buf[1], 10);
return tk->tk_symb = INTEGER;
}
/*NOTREACHED*/
}
case STEOI:
return tk->tk_symb = EOI;
case STCHAR:
default:
crash("bad character class %d", class(ch));
}
}
char *
GetString(upto)
{
register int ch;
int str_size;
char *str = Malloc(str_size = 32);
register int pos = 0;
LoadChar(ch);
while (ch != upto) {
if (class(ch) == STNL) {
lexerror("newline in string");
LineNumber++;
break;
}
if (ch == EOI) {
lexerror("end-of-file in string");
break;
}
str[pos++] = ch;
if (pos == str_size) {
str = Srealloc(str, str_size += 8);
}
LoadChar(ch);
}
str[pos] = '\0';
return str;
}
SkipComment()
{
/* Skip Modula-2 like comment (* ... *).
Note that comment may be nested.
*/
register int ch;
register int NestLevel = 0;
LoadChar(ch);
for (;;) {
if (class(ch) == STNL) {
LineNumber++;
}
else
if (ch == '(') {
LoadChar(ch);
if (ch == '*') {
++NestLevel;
}
else {
continue;
}
}
else
if (ch == '*') {
LoadChar(ch);
if (ch == ')') {
if (NestLevel-- == 0) {
return;
}
}
else {
continue;
}
}
LoadChar(ch);
}
}

27
lang/m2/comp/LLlex.h Normal file
View file

@ -0,0 +1,27 @@
/* Token Descriptor Definition */
/* $Header$ */
struct token {
int tk_symb; /* token itself */
union {
struct idf *tk_idf; /* IDENT */
char *tk_str; /* STRING */
struct { /* INTEGER */
int tk_type; /* type */
long tk_value; /* value */
} tk_int;
char *tk_real; /* REAL */
} tk_data;
};
#define TOK_IDF tk_data.tk_idf
#define TOK_STR tk_data.tk_str
#define TOK_ITP tk_data.tk_int.tk_type
#define TOK_INT tk_data.tk_int.tk_value
#define TOK_REL tk_data.tk_real
extern struct token dot, aside;
#define DOT dot.tk_symb
#define ASIDE aside.tk_symb

69
lang/m2/comp/LLmessage.c Normal file
View file

@ -0,0 +1,69 @@
#include <alloc.h>
#include "f_info.h"
#include "idf.h"
#include "LLlex.h"
#include "Lpars.h"
static char *RcsId = "$Header$";
extern char *symbol2str();
int err_occurred = 0;
LLmessage(tk)
int tk;
{
++err_occurred;
if (tk) {
error("%s missing", symbol2str(tk));
insert_token(tk);
}
else
error("%s deleted", symbol2str(dot.tk_symb));
}
struct idf *
gen_anon_idf()
{
/* A new idf is created out of nowhere, to serve as an
anonymous name.
*/
static int name_cnt;
char buff[100];
char *sprintf();
sprintf(buff, "#%d in %s, line %u",
++name_cnt, FileName, LineNumber);
return str2idf(buff, 1);
}
int
is_anon_idf(idf)
struct idf *idf;
{
return idf->id_text[0] == '#';
}
insert_token(tk)
int tk;
{
aside = dot;
dot.tk_symb = tk;
switch (tk) {
/* The operands need some body */
case IDENT:
dot.TOK_IDF = gen_anon_idf();
break;
case STRING:
dot.TOK_STR = Salloc("", 1);
break;
case INTEGER:
/* dot.TOK_ITP = INT; */
dot.TOK_INT = 1;
break;
case REAL:
dot.TOK_REL = Salloc("0.0", 4);
break;
}
}

78
lang/m2/comp/Makefile Normal file
View file

@ -0,0 +1,78 @@
# make modula-2 "compiler"
# $Header$
HDIR = ../../em/h
PKGDIR = ../../em/pkg
LIBDIR = ../../em/lib
INCLUDES = -I$(HDIR) -I$(PKGDIR) -I/user1/erikb/h
LSRC = tokenfile.g program.g declar.g expression.g statement.g
CC = cc
GEN = LLgen
GENOPTIONS =
CFLAGS = -DDEBUG -O $(INCLUDES)
LOBJ = tokenfile.o program.o declar.o expression.o statement.o
COBJ = LLlex.o LLmessage.o char.o error.o main.o \
symbol2str.o tokenname.o idf.o input.o idlist.o
OBJ = $(COBJ) $(LOBJ) Lpars.o
GENFILES= tokenfile.c \
program.c declar.c expression.c statement.c \
tokenfile.g symbol2str.c char.c Lpars.c Lpars.h
all:
make LLfiles
make main
LLfiles: $(LSRC)
$(GEN) $(GENOPTIONS) $(LSRC)
@touch LLfiles
main: $(OBJ) Makefile
$(CC) $(LFLAGS) $(OBJ) $(LIBDIR)/libcomp.a /user1/erikb/em/lib/libstr.a /user1/erikb/lib/libsystem.a -o main
size main
clean:
rm -f $(OBJ) $(GENFILES) LLfiles
tokenfile.g: tokenname.c make.tokfile
make.tokfile <tokenname.c >tokenfile.g
symbol2str.c: tokenname.c make.tokcase
make.tokcase <tokenname.c >symbol2str.c
idlist.h: idlist.H make.allocd
char.c: char.tab tab
./tab -fchar.tab >char.c
tab:
$(CC) tab.c -o tab
depend:
sed '/^#AUTOAUTO/,$$d' Makefile > Makefile.new
echo '#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO' >> Makefile.new
/user1/erikb/bin/mkdep `sources $(OBJ)` |\
sed 's/\.c:/\.o:/' >> Makefile.new
mv Makefile Makefile.old
mv Makefile.new Makefile
.SUFFIXES: .H .h .C
.H.h .C.c :
make.allocd < $< > $@
#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO
LLlex.o: LLlex.h Lpars.h class.h f_info.h idf.h param.h
LLmessage.o: LLlex.h Lpars.h f_info.h idf.h
char.o: class.h
error.o: LLlex.h f_info.h
main.o: LLlex.h Lpars.h f_info.h idf.h
symbol2str.o: Lpars.h
tokenname.o: Lpars.h idf.h tokenname.h
idf.o: idf.h
input.o: f_info.h input.h
idlist.o: idf.h idlist.h
tokenfile.o: Lpars.h
program.o: Lpars.h idf.h idlist.h
declar.o: LLlex.h Lpars.h idf.h idlist.h
expression.o: Lpars.h
statement.o: Lpars.h
Lpars.o: Lpars.h

54
lang/m2/comp/char.tab Normal file
View file

@ -0,0 +1,54 @@
% character tables for mod2 compiler
% $Header$
%S129
%F %s,
%
% CHARACTER CLASSES
%
%C
STGARB:\000-\200
STSKIP: \r\t
STNL:\012\013\014
STSIMP:#&()*+,-/;=[]^{|}~
STCOMP:.:<>
STIDF:a-zA-Z
STSTR:"'
STNUM:0-9
STEOI:\200
%T#include "class.h"
%Tchar tkclass[] = {
%p
%T};
%
% INIDF
%
%C
1:a-zA-Z_0-9
%Tchar inidf[] = {
%F %s,
%p
%T};
%
% ISDIG
%
%C
1:0-9
%Tchar isdig[] = {
%p
%T};
%
% ISHEX
%
%C
1:a-fA-F
%Tchar ishex[] = {
%p
%T};
%
% ISOCT
%
%C
1:0-7
%Tchar isoct[] = {
%p
%T};

38
lang/m2/comp/class.h Normal file
View file

@ -0,0 +1,38 @@
/* U S E O F C H A R A C T E R C L A S S E S */
/* $Header$ */
/* As a starter, chars are divided into classes, according to which
token they can be the start of.
At present such a class number is supposed to fit in 4 bits.
*/
#define class(ch) (tkclass[ch])
/* Being the start of a token is, fortunately, a mutual exclusive
property, so, as there are less than 16 classes they can be
packed in 4 bits.
*/
#define STSKIP 0 /* spaces and so on: skipped characters */
#define STNL 1 /* newline character(s): update linenumber etc. */
#define STGARB 2 /* garbage ascii character: not allowed */
#define STSIMP 3 /* this character can occur as token */
#define STCOMP 4 /* this one can start a compound token */
#define STIDF 5 /* being the initial character of an identifier */
#define STCHAR 6 /* the starter of a character constant */
#define STSTR 7 /* the starter of a string */
#define STNUM 8 /* the starter of a numeric constant */
#define STEOI 9 /* End-Of-Information mark */
/* But occurring inside a token is not, so we need 1 bit for each
class. This is implemented as a collection of tables to speed up
the decision whether a character has a special meaning.
*/
#define in_idf(ch) (inidf[ch])
#define is_oct(ch) (isoct[ch])
#define is_dig(ch) (isdig[ch])
#define is_hex(ch) (ishex[ch])
extern char tkclass[];
extern char inidf[], isoct[], isdig[], ishex[];

181
lang/m2/comp/declar.g Normal file
View file

@ -0,0 +1,181 @@
{
#include "idf.h"
#include "idlist.h"
#include "LLlex.h"
static char *RcsId = "$Header$";
}
ProcedureDeclaration:
ProcedureHeading ';' block IDENT
;
ProcedureHeading:
PROCEDURE IDENT FormalParameters?
;
block:
declaration* [ BEGIN StatementSequence ]? END
;
declaration:
CONST [ ConstantDeclaration ';' ]*
|
TYPE [ TypeDeclaration ';' ]*
|
VAR [ VariableDeclaration ';' ]*
|
ProcedureDeclaration ';'
|
ModuleDeclaration ';'
;
FormalParameters:
'(' [ FPSection [ ';' FPSection ]* ]? ')'
[ ':' qualident ]?
;
FPSection
{
struct id_list *FPList;
} :
VAR? IdentList(&FPList) ':' FormalType
;
FormalType:
[ ARRAY OF ]? qualident
;
TypeDeclaration:
IDENT '=' type
;
type:
SimpleType
|
ArrayType
|
RecordType
|
SetType
|
PointerType
|
ProcedureType
;
SimpleType:
qualident
[
|
SubrangeType
/*
* The subrange type is given a base type by the
* qualident (this is new modula-2).
*/
]
|
enumeration
|
SubrangeType
;
enumeration
{
struct id_list *EnumList;
} :
'(' IdentList(&EnumList) ')'
;
IdentList(struct id_list **p;)
{
register struct id_list *q = new_id_list();
} :
IDENT { q->id_ptr = dot.TOK_IDF; }
[
',' IDENT { q->next = new_id_list();
q = q->next;
q->id_ptr = dot.TOK_IDF;
}
]*
{ q->next = 0;
*p = q;
}
;
SubrangeType:
/*
This is not exactly the rule in the new report, but see
the rule for "SimpleType".
*/
'[' ConstExpression UPTO ConstExpression ']'
;
ArrayType:
ARRAY SimpleType [ ',' SimpleType ]* OF type
;
RecordType:
RECORD FieldListSequence END
;
FieldListSequence:
FieldList [ ';' FieldList ]*
;
FieldList
{
struct id_list *FldList;
} :
[
IdentList(&FldList) ':' type
|
CASE IDENT? /* Changed rule in new modula-2 */
':' qualident
OF variant [ '|' variant ]*
[ ELSE FieldListSequence ]?
END
]?
;
variant:
[ CaseLabelList ':' FieldListSequence ]?
/* Changed rule in new modula-2 */
;
CaseLabelList:
CaseLabels [ ',' CaseLabels ]*
;
CaseLabels:
ConstExpression [ UPTO ConstExpression ]?
;
SetType:
SET OF SimpleType
;
PointerType:
POINTER TO type
;
ProcedureType:
PROCEDURE FormalTypeList?
;
FormalTypeList:
'(' [ VAR? FormalType [ ',' VAR? FormalType ]* ]? ')'
[ ':' qualident ]?
;
ConstantDeclaration:
IDENT '=' ConstExpression
;
VariableDeclaration
{
struct id_list *VarList;
} :
IdentList(&VarList) ':' type
;

170
lang/m2/comp/error.c Normal file
View file

@ -0,0 +1,170 @@
/* E R R O R A N D D I A G N O S T I C R O U T I N E S */
/* This file contains the (non-portable) error-message and diagnostic
giving functions. Be aware that they are called with a variable
number of arguments!
*/
#include <stdio.h>
#include "input.h"
#include "f_info.h"
#include "LLlex.h"
static char *RcsId = "$Header$";
#define ERROUT stderr
#define ERROR 1
#define WARNING 2
#define LEXERROR 3
#define LEXWARNING 4
#define CRASH 5
#define FATAL 6
#define NONFATAL 7
#ifdef DEBUG
#define VDEBUG 8
#endif DEBUG
int err_occurred;
/*
extern int ofd; /* compact.c * /
#define compiling (ofd >= 0)
*/
extern char options[];
/* There are two general error message giving functions:
error() : syntactic and semantic error messages
lexerror() : lexical and pre-processor error messages
The difference lies in the fact that the first function deals with
tokens already read in by the lexical analyzer so the name of the
file it comes from and the linenumber must be retrieved from the
token instead of looking at the global variables LineNumber and
FileName.
*/
/*VARARGS1*/
error(fmt, args)
char *fmt;
{
/*
if (compiling)
C_ms_err();
*/
++err_occurred;
_error(ERROR, fmt, &args);
}
#ifdef DEBUG
debug(fmt, args)
char *fmt;
{
if (options['D'])
_error(VDEBUG, fmt, &args);
}
#endif DEBUG
/*VARARGS1*/
lexerror(fmt, args)
char *fmt;
{
/*
if (compiling)
C_ms_err();
*/
++err_occurred;
_error(LEXERROR, fmt, &args);
}
/*VARARGS1*/
lexwarning(fmt, args) char *fmt; {
if (options['w']) return;
_error(LEXWARNING, fmt, &args);
}
/*VARARGS1*/
crash(fmt, args)
char *fmt;
int args;
{
/*
if (compiling)
C_ms_err();
*/
_error(CRASH, fmt, &args);
fflush(ERROUT);
fflush(stderr);
fflush(stdout);
/*
cclose();
*/
abort(); /* produce core by "Illegal Instruction" */
/* this should be changed into exit(1) */
}
/*VARARGS1*/
fatal(fmt, args)
char *fmt;
int args;
{
/*
if (compiling)
C_ms_err();
*/
_error(FATAL, fmt, &args);
exit(-1);
}
/*VARARGS1*/
nonfatal(fmt, args)
char *fmt;
int args;
{
_error(NONFATAL, fmt, &args);
}
/*VARARGS1*/
warning(fmt, args)
char *fmt;
{
if (options['w']) return;
_error(WARNING, fmt, &args);
}
_error(class, fmt, argv)
int class;
char *fmt;
int argv[];
{
switch (class) {
case ERROR:
case LEXERROR:
fprintf(ERROUT, "%s, line %ld: ", FileName, LineNumber);
break;
case WARNING:
case LEXWARNING:
fprintf(ERROUT, "%s, line %ld: (warning) ",
FileName, LineNumber);
break;
case CRASH:
fprintf(ERROUT, "CRASH\007 %s, line %ld: \n",
FileName, LineNumber);
break;
case FATAL:
fprintf(ERROUT, "%s, line %ld: fatal error -- ",
FileName, LineNumber);
break;
case NONFATAL:
fprintf(ERROUT, "warning: "); /* no line number ??? */
break;
#ifdef DEBUG
case VDEBUG:
fprintf(ERROUT, "-D ");
break;
#endif DEBUG
}
_doprnt(fmt, argv, ERROUT);
fprintf(ERROUT, "\n");
}

97
lang/m2/comp/expression.g Normal file
View file

@ -0,0 +1,97 @@
{
static char *RcsId = "$Header$";
}
number:
INTEGER
|
REAL
;
qualident:
IDENT selector*
;
selector:
'.' /* field */ IDENT
;
ExpList:
expression [ ',' expression ]*
;
ConstExpression:
expression
/*
* Changed rule in new Modula-2.
* Check that the expression is a constant expression and evaluate!
*/
;
expression:
SimpleExpression [ relation SimpleExpression ]?
;
relation:
'=' | '#' | UNEQUAL | '<' | LESSEQUAL | '>' | GREATEREQUAL | IN
;
SimpleExpression:
[ '+' | '-' ]? term [ AddOperator term ]*
;
AddOperator:
'+' | '-' | OR
;
term:
factor [ MulOperator factor ]*
;
MulOperator:
'*' | '/' | DIV | MOD | AND | '&'
;
factor:
qualident
[
designator_tail? ActualParameters?
|
bare_set
]
|
bare_set
| %default
number
|
STRING
|
'(' expression ')'
|
NOT factor
;
bare_set:
'{' [ element [ ',' element ]* ]? '}'
;
ActualParameters:
'(' ExpList? ')'
;
element:
expression [ UPTO expression ]?
;
designator:
qualident designator_tail?
;
designator_tail:
visible_designator_tail
[ selector | visible_designator_tail ]*
;
visible_designator_tail:
'[' ExpList ']' | '^'
;

11
lang/m2/comp/f_info.h Normal file
View file

@ -0,0 +1,11 @@
/* $Header$ */
struct f_info {
unsigned int f_lineno;
char *f_filename;
char *f_workingdir;
};
extern struct f_info file_info;
#define LineNumber file_info.f_lineno
#define FileName file_info.f_filename

4
lang/m2/comp/idf.c Normal file
View file

@ -0,0 +1,4 @@
/* $Header$ */
#include "idf.h"
#include <idf_pkg.body>

5
lang/m2/comp/idf.h Normal file
View file

@ -0,0 +1,5 @@
/* $Header$ */
#define IDF_TYPE int
#define id_reserved id_user
#include <idf_pkg.spec>

12
lang/m2/comp/idlist.H Normal file
View file

@ -0,0 +1,12 @@
/* $Header$ */
#include <alloc.h>
/* Structure to link idf structures together
*/
struct id_list {
struct id_list *next;
struct idf *id_ptr;
};
/* ALLOCDEF "id_list" */

20
lang/m2/comp/idlist.c Normal file
View file

@ -0,0 +1,20 @@
static char *RcsId = "$Header$";
#include "idf.h"
#include "idlist.h"
struct id_list *h_id_list; /* Header of free list */
/* FreeIdList: take a list of id_list structures and put them
on the free list of id_list structures
*/
FreeIdList(p)
struct id_list *p;
{
register struct id_list *q;
while (q = p) {
p = p->next;
free_id_list(q);
}
}

6
lang/m2/comp/input.c Normal file
View file

@ -0,0 +1,6 @@
/* $Header$ */
#include "f_info.h"
struct f_info file_info;
#include "input.h"
#include <inp_pkg.body>

7
lang/m2/comp/input.h Normal file
View file

@ -0,0 +1,7 @@
/* $Header$ */
#define INP_NPUSHBACK 2
#define INP_TYPE struct f_info
#define INP_VAR file_info
#define INP_READ_IN_ONE
#include <inp_pkg.spec>

121
lang/m2/comp/main.c Normal file
View file

@ -0,0 +1,121 @@
/* mod2 -- compiler , althans: een aanzet daartoe */
#include <stdio.h>
#undef BUFSIZ /* Really neccesary??? */
#include <system.h>
#include "input.h"
#include "f_info.h"
#include "idf.h"
#include "LLlex.h"
#include "Lpars.h"
static char *RcsId = "$Header:";
char options[128];
char *ProgName;
extern int err_occurred;
main(argc, argv)
char *argv[];
{
register Nargc = 1;
register char **Nargv = &argv[0];
ProgName = *argv++;
# ifdef DEBUG
setbuf(stdout, (char *) 0);
# endif
while (--argc > 0) {
if (**argv == '-')
Option(*argv++);
else
Nargv[Nargc++] = *argv++;
}
Nargv[Nargc] = 0; /* terminate the arg vector */
if (Nargc != 2) {
fprintf(stderr, "%s: Use one file argument\n", ProgName);
return 1;
}
#ifdef DEBUG
printf("Mod2 compiler -- Debug version\n");
debug("-D: Debugging on");
#endif DEBUG
return !Compile(Nargv[1]);
}
Compile(src)
char *src;
{
extern struct tokenname tkidf[];
#ifdef DEBUG
printf("%s\n", src);
#endif DEBUG
if (! InsertFile(src, (char **) 0)) {
fprintf(stderr,"%s: cannot open %s\n", ProgName, src);
return 0;
}
LineNumber = 1;
FileName = src;
init_idf();
reserve(tkidf);
#ifdef DEBUG
if (options['L'])
LexScan();
else if (options['T'])
TimeScan();
else
#endif DEBUG
CompUnit();
#ifdef DEBUG
if (options['h']) hash_stat();
#endif DEBUG
if (err_occurred) return 0;
return 1;
}
#ifdef DEBUG
LexScan()
{
register int symb;
while ((symb = LLlex()) != EOF) {
printf(">>> %s ", symbol2str(symb));
switch(symb) {
case IDENT:
printf("%s\n", dot.TOK_IDF->id_text);
break;
case INTEGER:
printf("%ld\n", dot.TOK_INT);
break;
case REAL:
printf("%s\n", dot.TOK_REL);
break;
case STRING:
printf("\"%s\"\n", dot.TOK_STR);
break;
default:
putchar('\n');
}
}
}
TimeScan() {
while (LLlex() != EOF) /* nothing */;
}
#endif
Option(str)
char *str;
{
#ifdef DEBUG
debug("option %c", str[1]);
#endif DEBUG
options[str[1]]++; /* switch option on */
}

17
lang/m2/comp/make.allocd Executable file
View file

@ -0,0 +1,17 @@
sed -e '
s:^.*[ ]ALLOCDEF[ ].*"\(.*\)".*$:\
/* allocation definitions of struct \1 */\
extern char *st_alloc();\
extern struct \1 *h_\1;\
#define new_\1() ((struct \1 *) \\\
st_alloc((char **)\&h_\1, sizeof(struct \1)))\
#define free_\1(p) st_free(p, h_\1, sizeof(struct \1))\
:' -e '
s:^.*[ ]STATICALLOCDEF[ ].*"\(.*\)".*$:\
/* allocation definitions of struct \1 */\
extern char *st_alloc();\
static struct \1 *h_\1;\
#define new_\1() ((struct \1 *) \\\
st_alloc((char **)\&h_\1, sizeof(struct \1)))\
#define free_\1(p) st_free(p, h_\1, sizeof(struct \1))\
:'

34
lang/m2/comp/make.tokcase Executable file
View file

@ -0,0 +1,34 @@
cat <<'--EOT--'
#include "Lpars.h"
char *
symbol2str(tok)
int tok;
{
static char buf[2] = { '\0', '\0' };
if (040 <= tok && tok < 0177) {
buf[0] = tok;
buf[1] = '\0';
return buf;
}
switch (tok) {
--EOT--
sed '
/{[A-Z]/!d
s/.*{\(.*\),.*\(".*"\).*$/ case \1 :\
return \2;/
'
cat <<'--EOT--'
case '\n':
case '\f':
case '\v':
case '\r':
case '\t':
buf[0] = tok;
return buf;
default:
return "bad token";
}
}
--EOT--

6
lang/m2/comp/make.tokfile Executable file
View file

@ -0,0 +1,6 @@
sed '
/{[A-Z]/!d
s/.*{//
s/,.*//
s/.*/%token &;/
'

4
lang/m2/comp/param.h Normal file
View file

@ -0,0 +1,4 @@
/* $Header$ */
#define IDFSIZE 256
#define NUMSIZE 256

116
lang/m2/comp/program.g Normal file
View file

@ -0,0 +1,116 @@
/*
Program: Modula-2 grammar in LL(1) form
Version: Mon Feb 24 14:29:39 MET 1986
*/
/*
The grammar as given by Wirth is already almost LL(1); the
main problem is that the full form of a qualified designator
may be:
[ module_ident '.' ]* IDENT [ '.' field_ident ]*
which is quite confusing to an LL(1) parser. Rather than
resorting to context-sensitive techniques, I have decided
to render this as:
IDENT [ '.' IDENT ]*
on the grounds that it is quite natural to consider the first
IDENT to be the name of the object and regard the others as
field identifiers.
*/
{
#include "idf.h"
#include "idlist.h"
static char *RcsId = "$Header$";
}
%lexical LLlex;
%start CompUnit, CompilationUnit;
ModuleDeclaration:
MODULE IDENT priority? ';' import* export? block IDENT
;
priority:
'[' ConstExpression ']'
;
export
{
struct id_list *ExportList;
} :
EXPORT QUALIFIED? IdentList(&ExportList) ';'
;
import
{
struct id_list *ImportList;
} :
[ FROM
IDENT
]?
IMPORT IdentList(&ImportList) ';'
/*
When parsing a global module, this is the place where we must
read already compiled definition modules.
If the FROM clause is present, the identifier in it is a module
name, otherwise the names in the import list are module names.
*/
;
DefinitionModule:
DEFINITION
{
#ifdef DEBUG
debug("Definition module");
#endif DEBUG
}
MODULE IDENT ';' import*
/* export?
New Modula-2 does not have export lists in definition modules.
*/
definition* END IDENT '.'
;
definition:
CONST [ ConstantDeclaration ';' ]*
|
TYPE
[ IDENT
[ '=' type
| /* empty */
/*
Here, the exported type has a hidden implementation.
The export is said to be opaque.
It is restricted to pointer types.
*/
]
';'
]*
|
VAR [ VariableDeclaration ';' ]*
|
ProcedureHeading ';'
;
ProgramModule:
MODULE
{
#ifdef DEBUG
debug("Program module");
#endif DEBUG
}
IDENT priority? ';' import* block IDENT '.'
;
Module:
DefinitionModule
|
IMPLEMENTATION? ProgramModule
;
CompilationUnit:
Module
;

98
lang/m2/comp/statement.g Normal file
View file

@ -0,0 +1,98 @@
{
static char *RcsId = "$Header$";
}
statement:
[
/*
* This part is not in the reference grammar. The reference grammar
* states : assignment | ProcedureCall | ...
* but this gives LL(1) conflicts
*/
designator
[
ActualParameters?
|
BECOMES expression
]
/*
* end of changed part
*/
|
IfStatement
|
CaseStatement
|
WhileStatement
|
RepeatStatement
|
LoopStatement
|
ForStatement
|
WithStatement
|
EXIT
|
RETURN expression?
]?
;
/*
* The next two rules in-line in "Statement", because of an LL(1) conflict
assignment:
designator BECOMES expression
;
ProcedureCall:
designator ActualParameters?
;
*/
StatementSequence:
statement [ ';' statement ]*
;
IfStatement:
IF expression THEN StatementSequence
[ ELSIF expression THEN StatementSequence ]*
[ ELSE StatementSequence ]?
END
;
CaseStatement:
CASE expression OF case [ '|' case ]*
[ ELSE StatementSequence ]?
END
;
case:
[ CaseLabelList ':' StatementSequence ]?
/* This rule is changed in new modula-2 */
;
WhileStatement:
WHILE expression DO StatementSequence END
;
RepeatStatement:
REPEAT StatementSequence UNTIL expression
;
ForStatement:
FOR IDENT
BECOMES expression
TO expression
[ BY ConstExpression ]?
DO StatementSequence END
;
LoopStatement:
LOOP StatementSequence END
;
WithStatement:
WITH designator DO StatementSequence END
;

295
lang/m2/comp/tab.c Normal file
View file

@ -0,0 +1,295 @@
/* @cc tab.c -o $INSTALLDIR/tab@
tab - table generator
Author: Erik Baalbergen (..tjalk!erikb)
*/
#include <stdio.h>
static char *RcsId = "$Header$";
#define MAXTAB 10000
#define MAXBUF 10000
#define COMCOM '-'
#define FILECOM '%'
int InputForm = 'c';
char OutputForm[MAXBUF] = "%s,\n";
int TabSize = 257;
char *Table[MAXTAB];
char *Name;
char *ProgCall;
main(argc, argv)
char *argv[];
{
ProgCall = *argv++;
argc--;
while (argc-- > 0) {
if (**argv == COMCOM) {
option(*argv++);
}
else {
process(*argv++, InputForm);
}
}
}
char *
Salloc(s)
char *s;
{
char *malloc();
char *ns = malloc(strlen(s) + 1);
if (ns) {
strcpy(ns, s);
}
return ns;
}
option(str)
char *str;
{
/* note that *str indicates the source of the option:
either COMCOM (from command line) or FILECOM (from a file).
*/
switch (*++str) {
case ' ': /* command */
case '\t':
case '\0':
break;
case 'I':
InputForm = *++str;
break;
case 'f':
if (*++str == '\0') {
fprintf(stderr, "%s: -f: name expected\n", ProgCall);
exit(1);
}
DoFile(str);
break;
case 'F':
sprintf(OutputForm, "%s\n", ++str);
break;
case 'T':
printf("%s\n", ++str);
break;
case 'p':
PrintTable();
break;
case 'C':
ClearTable();
break;
case 'S':
{
register i = stoi(++str);
if (i <= 0 || i > MAXTAB) {
fprintf(stderr, "%s: size would exceed maximum\n",
ProgCall);
}
else {
TabSize = i;
}
break;
}
default:
fprintf(stderr, "%s: bad option -%s\n", ProgCall, str);
}
}
ClearTable()
{
register i;
for (i = 0; i < MAXTAB; i++) {
Table[i] = 0;
}
}
PrintTable()
{
register i;
for (i = 0; i < TabSize; i++) {
if (Table[i]) {
printf(OutputForm, Table[i]);
}
else {
printf(OutputForm, "0");
}
}
}
process(str, format)
char *str;
{
char *cstr = str;
char *Name = cstr; /* overwrite original string! */
/* strip of the entry name
*/
while (*str && *str != ':') {
if (*str == '\\') {
++str;
}
*cstr++ = *str++;
}
if (*str != ':') {
fprintf(stderr, "%s: bad specification: \"%s\", ignored\n",
ProgCall, Name);
return 0;
}
*cstr = '\0';
str++;
switch (format) {
case 'c':
return c_proc(str, Name);
default:
fprintf(stderr, "%s: bad input format\n", ProgCall);
}
return 0;
}
c_proc(str, Name)
char *str;
char *Name;
{
int ch, ch2;
int quoted();
while (*str) {
if (*str == '\\') {
ch = quoted(&str);
}
else {
ch = *str++;
}
if (*str == '-') {
if (*++str == '\\') {
ch2 = quoted(&str);
}
else {
if (ch2 = *str++);
else str--;
}
if (ch > ch2) {
fprintf(stderr, "%s: bad range\n", ProgCall);
return 0;
}
if (ch >= 0 && ch2 <= 255)
while (ch <= ch2)
Table[ch++] = Salloc(Name);
}
else {
if (ch >= 0 && ch <= 255)
Table[ch] = Salloc(Name);
}
}
return 1;
}
int
quoted(pstr)
char **pstr;
{
register int ch;
register int i;
register char *str = *pstr;
if ((*++str >= '0') && (*str <= '9')) {
ch = 0;
for (i = 0; i < 3; i++) {
ch = 8 * ch + *str - '0';
if (*++str < '0' || *str > '9')
break;
}
}
else {
switch (*str++) {
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'b':
ch = '\b';
break;
case 'r':
ch = '\r';
break;
case 'f':
ch = '\f';
break;
default :
ch = *str;
}
}
*pstr = str;
return ch & 0377;
}
int
stoi(str)
char *str;
{
register i = 0;
while (*str >= '0' && *str <= '9') {
i = i * 10 + *str++ - '0';
}
return i;
}
char *
getline(s, n, fp)
char *s;
FILE *fp;
{
register c = getc(fp);
char *str = s;
while (n--) {
if (c == EOF) {
return NULL;
}
else
if (c == '\n') {
*str++ = '\0';
return s;
}
*str++ = c;
c = getc(fp);
}
s[n - 1] = '\0';
return s;
}
#define BUFSIZE 1024
DoFile(name)
char *name;
{
char text[BUFSIZE];
FILE *fp;
if ((fp = fopen(name, "r")) == NULL) {
fprintf(stderr, "%s: cannot read file %s\n", ProgCall, name);
exit(1);
}
while (getline(text, BUFSIZE, fp) != NULL) {
if (text[0] == FILECOM) {
option(text);
}
else {
process(text, InputForm);
}
}
}

99
lang/m2/comp/tokenname.c Normal file
View file

@ -0,0 +1,99 @@
#include "tokenname.h"
#include "Lpars.h"
#include "idf.h"
/* To centralize the declaration of %tokens, their presence in this
file is taken as their declaration. The Makefile will produce
a grammar file (tokenfile.g) from this file. This scheme ensures
that all tokens have a printable name.
Also, the "token2str.c" file is produced from this file.
*/
static char *RcsId = "$Header$";
struct tokenname tkspec[] = { /* the names of the special tokens */
{IDENT, "identifier"},
{STRING, "string"},
{INTEGER, "integer"},
{REAL, "real"},
{0, ""}
};
struct tokenname tkcomp[] = { /* names of the composite tokens */
{UNEQUAL, "<>"},
{LESSEQUAL, "<="},
{GREATEREQUAL, ">="},
{UPTO, ".."},
{BECOMES, ":="},
{0, ""}
};
struct tokenname tkidf[] = { /* names of the identifier tokens */
{AND, "AND"},
{ARRAY, "ARRAY"},
{BEGIN, "BEGIN"},
{BY, "BY"},
{CASE, "CASE"},
{CONST, "CONST"},
{DEFINITION, "DEFINITION"},
{DIV, "DIV"},
{DO, "DO"},
{ELSE, "ELSE"},
{ELSIF, "ELSIF"},
{END, "END"},
{EXIT, "EXIT"},
{EXPORT, "EXPORT"},
{FOR, "FOR"},
{FROM, "FROM"},
{IF, "IF"},
{IMPLEMENTATION, "IMPLEMENTATION"},
{IMPORT, "IMPORT"},
{IN, "IN"},
{LOOP, "LOOP"},
{MOD, "MOD"},
{MODULE, "MODULE"},
{NOT, "NOT"},
{OF, "OF"},
{OR, "OR"},
{POINTER, "POINTER"},
{PROCEDURE, "PROCEDURE"},
{QUALIFIED, "QUALIFIED"},
{RECORD, "RECORD"},
{REPEAT, "REPEAT"},
{RETURN, "RETURN"},
{SET, "SET"},
{THEN, "THEN"},
{TO, "TO"},
{TYPE, "TYPE"},
{UNTIL, "UNTIL"},
{VAR, "VAR"},
{WHILE, "WHILE"},
{WITH, "WITH"},
{0, ""}
};
struct tokenname tkinternal[] = { /* internal keywords */
{0, "0"}
};
struct tokenname tkstandard[] = { /* standard identifiers */
{0, ""}
};
/* Some routines to handle tokennames */
reserve(resv)
register struct tokenname *resv;
{
/* The names of the tokens described in resv are entered
as reserved words.
*/
register struct idf *p;
while (resv->tn_symbol) {
p = str2idf(resv->tn_name, 0);
if (!p) fatal("Out of Memory");
p->id_reserved = resv->tn_symbol;
resv++;
}
}

7
lang/m2/comp/tokenname.h Normal file
View file

@ -0,0 +1,7 @@
/* $Header$ */
struct tokenname { /* Used for defining the name of a
token as identified by its symbol
*/
int tn_symbol;
char *tn_name;
};