Initial revision

This commit is contained in:
ceriel 1985-10-03 10:35:55 +00:00
parent 4ed4dab397
commit ca4cc6fe80
2 changed files with 1024 additions and 0 deletions

548
util/LLgen/src/LLgen.g Normal file
View file

@ -0,0 +1,548 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/*
* L L G E N
*
* An Extended LL(1) Parser Generator
*
* Author : Ceriel J.H. Jacobs
*/
/*
* LLgen.g
* Defines the grammar of LLgen.
* Some routines that are included build the internal structure
*/
{
# include "types.h"
# include "io.h"
# include "tunable.h"
# include "extern.h"
# include "assert.h"
# ifndef NORCSID
static string rcsidb = "$Header$";
# endif
p_mem alloc();
string store();
t_gram search();
static int nparams; /* parameter count for nonterminals */
static t_gram elem; /* temporary space */
static int acount; /* count #of global actions */
/* Here are defined : */
STATIC copyact();
STATIC unsigned get();
STATIC t_gram mkalt();
STATIC t_gram mkterm();
STATIC p_gram copyrule();
}
%start LLparse, spec;
spec : { acount = 0; }
[ %persistent def ]*
{ /*
* Put an endmarker in temporary file
*/
putc('\0',fact); putc('\0',fact);
}
;
def { register string p; }
: rule
/*
* A grammar rule
*/
| C_TOKEN listel [ ',' listel ]* ';'
/*
* A token declaration
*/
| C_START C_IDENT
{ p = store(lextoken.t_string); }
',' C_IDENT ';'
/*
* A start symbol declaration
*/
{ /*
* Put the declaration in the list
* of start symbols
*/
t_gram temp;
register p_start ff;
temp = search(NONTERM,lextoken.t_string,BOTH);
ff = (p_start) alloc(sizeof(t_start));
ff->ff_nont = &nonterms[g_getnont(&temp)];
ff->ff_name = p;
ff->ff_next = start;
start = ff;
}
| C_LEXICAL C_IDENT
{ if (!lexical) {
lexical = store(lextoken.t_string);
} else error(linecount,"Duplicate %%lexical");
}
';'
/*
* Declaration of a name for the lexical analyser.
* May appear only once
*/
| action(0) { acount++; }
/*
* A global C-declaration
*/
| firsts
/*
* declarations for macros
*/
;
listel : C_IDENT { search(TERMINAL,lextoken.t_string,ENTERING); }
;
rule { register p_nont p;}
: /*
* grammar for a production rule
*/
C_IDENT { t_gram temp;
temp = search(NONTERM,lextoken.t_string,BOTH);
p = &nonterms[g_getnont(&temp)];
if (p->n_rule) {
error(linecount,
"nonterminal already defined");
}
/*
* Remember the order in which the nonterminals
* were defined. Code must be generated in that
* order to keep track with the actions on the
* temporary file
*/
*maxorder++ = p - nonterms;
p->n_count = acount;
acount = 0;
p->n_lineno = linecount;
}
[ params(2) { p->n_flags |= PARAMS;
if (nparams > 7) {
error(linecount,"Too many parameters");
} else p->n_flags |= (nparams << 3);
}
]?
[ action(0) { p->n_flags |= LOCALS; }
]?
':' productions(&(p->n_rule)) ';'
;
action(int n;)
/*
* The parameter n is non-zero when the opening and closing
* bracket must be copied along with the action
*/
: '{' { copyact('{','}',n,0); }
'}'
;
productions(p_gram *p;)
/*
* One or more alternatives
*/
{ p_gram prod;
int conflres = 0;
int t = 0;
int haddefault = 0;
t_gram alts[100];
register p_gram p_alts = alts;
int o_lc, n_lc;
} :
{ n_lc = o_lc = linecount; }
simpleproduction(p,&conflres)
[ '|' { n_lc = linecount; }
[ C_DEFAULT { if (haddefault) {
error(linecount,
"multiple %%default");
}
haddefault = 1;
t |= DEF;
}
]?
simpleproduction(&prod,&t)
{ if (p_alts - alts >= 97) {
error(n_lc,"Too many alternatives");
p_alts = alts;
}
*p_alts++ = mkalt(*p,conflres,o_lc);
o_lc = n_lc;
conflres = t;
t = 0;
*p = prod;
}
]* { if (conflres & ~DEF) {
error(n_lc,
"Resolver on last alternative");
}
if (p_alts > alts) {
*p_alts++ = mkalt(*p,conflres,n_lc);
g_settype(p_alts,EORULE);
*p = copyrule(alts,p_alts+1-alts);
if (!haddefault) {
((p_link) pentry[g_getcont(*p)])
->l_flag |= DEF;
}
}
}
;
{
STATIC t_gram
mkalt(prod,condition,lc) p_gram prod; {
/*
* Create an alternation, initialise it and return
* a grammar element containing it
*/
register unsigned hulp;
register p_link l;
t_gram r;
g_init(&r);
hulp = get(sizeof(*l));
l = (p_link) pentry[hulp];
l->l_rule = prod;
l->l_flag = condition;
g_setcont(&r,hulp);
g_settype(&r,ALTERNATION);
r.g_lineno = lc;
return r;
}
}
simpleproduction(p_gram *p; register int *conflres;)
{ t_gram rule[100];
register p_gram p_rule = rule;
t_reps reps;
} :
{ r_init(&reps); }
[
/*
* Optional conflict reslover
*/
C_IF expr { *conflres |= COND; }
| C_PREFER { *conflres |= PREFERING; }
| C_AVOID { *conflres |= AVOIDING; }
]?
[ %persistent elem
{ if (p_rule - rule >= 98) {
error(linecount,"Production too long");
p_rule = rule;
}
r_setkind(&reps,FIXED);
r_setnum(&reps,0);
}
[ repeats(&reps)
{ if (g_gettype(&elem) != TERM) {
*p_rule = elem;
g_settype(p_rule+1,EORULE);
elem = mkterm(copyrule(p_rule,2),
0,
&reps,
p_rule->g_lineno);
}
}
]? { if (g_gettype(&elem) == TERM) {
register p_term q;
q = (p_term) pentry[g_getcont(&elem)];
q->t_reps = reps;
if (q->t_flags & RESOLVER &&
(r_getkind(&reps) == PLUS ||
r_getkind(&reps) == FIXED)) {
error(linecount,
"illegal %%while");
}
if (q->t_flags & PERSISTENT &&
r_getkind(&reps) == FIXED) {
error(linecount,
"illegal %%persistent");
}
}
*p_rule++ = elem;
}
]* { register p_term q;
g_settype(p_rule,EORULE);
if (g_gettype(&rule[0]) == TERM &&
p_rule-rule == 1) {
q=(p_term)pentry[g_getcont(&rule[0])];
if (r_getkind(&(q->t_reps))==FIXED &&
r_getnum(&(q->t_reps)) == 0) {
*p = q->t_rule;
}
else *p = copyrule(rule,2);
}
else *p = copyrule(rule,p_rule-rule+1);
}
;
{
STATIC t_gram
mkterm(prod,flags,reps,lc) p_gram prod; register p_reps reps; {
/*
* Create a term, initialise it and return
* a grammar element contianing it
*/
register p_term q;
register unsigned hulp;
t_gram r;
g_init(&r);
hulp = get(sizeof(*q));
q = (p_term) pentry[hulp];
q->t_rule = prod;
q->t_contains = 0;
/* "*1" = "?" */
if (r_getnum(reps) == 1 && r_getkind(reps) == STAR) {
r_setkind(reps,OPT);
}
q->t_reps = *reps;
q->t_flags = flags;
g_settype(&r,TERM);
g_setcont(&r,hulp);
r.g_lineno = lc;
return r;
}
}
elem { register short t = 0;
p_gram p1;
t_reps reps;
int ln;
} :
'[' { ln = linecount; }
[ C_WHILE expr { t |= RESOLVER; }
]?
[ C_PERSISTENT { t |= PERSISTENT; }
]?
productions(&p1)
']' { r_init(&reps);
elem = mkterm(p1,t,&reps,ln);
}
| %default
C_IDENT { elem = search(UNKNOWN,lextoken.t_string,BOTH); }
[ params(0) { if (nparams > 6) {
error(linecount,"Too many parameters");
} else g_setnpar(&elem,nparams+1);
if (g_gettype(&elem) == TERMINAL) {
error(linecount,
"Terminal with parameters");
}
}
]?
| C_LITERAL { elem = search(LITERAL,lextoken.t_string,BOTH); }
| { g_settype(&elem,ACTION);
elem.g_lineno = linecount;
}
action(1)
;
params(int n;)
: '(' { copyact('(',')',n,0); }
')'
;
expr : '(' { copyact('(',')',1,0); }
')'
;
repeats(t_reps *t;) { int t1 = 0; } :
[
'?' { r_setkind(t,OPT); }
| [ '*' { r_setkind(t,STAR); }
| '+' { r_setkind(t,PLUS); }
]
number(&t1)?
{ if (t1 == 1 && r_getkind(t) == PLUS) {
error(linecount,
"Illegal repetition specifier");
}
}
| number(&t1)
] { r_setnum(t,t1); }
;
number(int *t;)
: C_NUMBER
{ *t = lextoken.t_num;
if (*t <= 0 || *t >= 8192) {
error(linecount,"Illegal number");
}
}
;
firsts { register string p; }
: C_FIRST C_IDENT
{ p = store(lextoken.t_string); }
',' C_IDENT ';'
{ /*
* Store this %first in the list belonging
* to this input file
*/
t_gram temp;
register p_first ff;
temp = search(NONTERM,lextoken.t_string,BOTH);
ff = (p_first) alloc(sizeof(t_first));
ff->ff_nont = &nonterms[g_getnont(&temp)];
ff->ff_name = p;
ff->ff_next = pfile->f_firsts;
pfile->f_firsts = ff;
}
;
{
STATIC
copyact(ch1,ch2,flag,level) char ch1,ch2; {
/*
* Copy an action to file f. Opening bracket is ch1, closing bracket
* is ch2.
* If flag = 1, copy opening and closing parameters too.
* If flag = 2, the copy is a parameter declaration copy.
* Give an error message if not ending on a ';'
*/
register FILE *f;
register ch; /* Current char */
register match; /* used to read strings */
int saved; /* save linecount */
int semicolon = 0;
f = fact;
if (!level) {
saved = linecount;
nparams = 0; /* count comma's */
putc('\0',f);
fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
if (flag == 1) putc(ch1,f);
}
else putc(ch1,f);
for (;;) {
ch = input();
if (ch == ch2) {
if (!level) unput(ch);
if (level || flag == 1) putc(ch,f);
if ((!level) && flag == 2 && !semicolon) {
error(linecount,"missing ';'");
}
return;
}
if (! isspace(ch)) semicolon = 0;
switch(ch) {
case ')':
case '}':
case ']':
error(linecount,"Parentheses mismatch");
break;
case '(':
copyact('(',')',flag,level+1);
continue;
case '{':
copyact('{','}',flag,level+1);
continue;
case '[':
copyact('[',']',flag,level+1);
continue;
case '/':
ch = input();
unput(ch);
if (ch == '*') {
skipcomment(1);
continue;
}
break;
case ';':
semicolon = 1;
/* Fall through */
case ',':
if (!level) { /*
* Only ','s and ';'s on the
* outer level are counted
*/
nparams++;
}
break;
case '\'':
case '"' :
/*
* watch out for brackets in strings, they do not
* count !
*/
match = ch;
putc(ch,f);
while((ch = input())) {
if (ch == match) break;
if (ch == '\\') {
putc(ch,f);
ch = input();
}
if (ch == '\n') {
error(linecount,"newline in string");
unput(match);
}
putc(ch,f);
}
if (ch == match) break;
/* Fall through */
case EOF :
if (!level) error(saved,"Action does not terminate");
return;
}
putc(ch,f);
}
}
static int ecount; /* Index in "pentry" array */
STATIC unsigned
get(size) unsigned size; {
/*
* Get some core, save a pointer to it in "pentry",
* return index in pentry
*/
if (ecount >= ENTSIZ) fatal(linecount,"Entry table overflow");
pentry[ecount] = alloc(size);
return ecount++;
}
STATIC p_gram
copyrule(p,length) register p_gram p; register length; {
/*
* Returns a pointer to a grammar rule that was created in
* p. The space pointed to by p can now be reused
*/
register p_gram t;
p_gram rule;
t = (p_gram) alloc((unsigned) length * sizeof(t_gram));
rule = t;
while (length--) {
*t++ = *p++;
}
return rule;
}
}

476
util/LLgen/src/check.c Normal file
View file

@ -0,0 +1,476 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/*
* L L G E N
*
* An Extended LL(1) Parser Generator
*
* Author : Ceriel J.H. Jacobs
*/
/*
* check.c
* Several routines to perform checks and printouts
*/
# include "types.h"
# include "extern.h"
# include "tunable.h"
# include "io.h"
# include "sets.h"
# include "assert.h"
# ifndef NORCSID
static string rcsid1 = "$Header$";
# endif
# define PTERM(p) fprintf(fout,(p)->h_num < 0400 ? "'%s' " : "%s ",(p)->h_name);
static string c_first = "> firstset ";
static string c_contains = "> containset ";
static string c_follow = "> followset ";
p_set setalloc();
static string ntname; /* nonterminal that is currently checked */
static p_nont nt; /* pointer to its struct */
static int level;
/* In this file are defined : */
extern conflchecks();
STATIC newline();
STATIC printset();
STATIC check();
STATIC moreverbose();
STATIC prrule();
STATIC cfcheck();
STATIC resolve();
STATIC propagate();
STATIC spaces();
conflchecks() {
/*
* Check for conflicts, that is,
* in a repeating term, the FIRST and FOLLOW must be disjunct,
* unless there is a disambiguating condition.
* in an alternation, the sets that determine the direction to take,
* must be disjunct.
*/
register p_nont p;
register FILE *f;
register short *s;
p_file x = files;
f_input = x->f_name;
if (verbose >= 3) {
for (p = nonterms; p < maxnt; p++) p->n_flags |= VERBOSE;
}
if (verbose) {
if ((f = fopen(f_out,"w")) == NULL) fatal(1,e_noopen,f_out);
fout = f;
}
/*
* Check the rules in the order in which they are declared,
* and input file by input file, to give proper error messages
*/
for (; x->f_end < maxorder; x++) {
f_input = x->f_name;
# ifndef NDEBUG
if (debug) fprintf(stderr, "CHECK: file %s\n", f_input);
# endif
for (s = x->f_start; s <= x->f_end; s++) {
nt = p = &nonterms[*s];
ntname = (min_nt_ent + *s)->h_name;
# ifndef NDEBUG
if (debug) fprintf(stderr,"CHECK: nonterminal %s\n", ntname);
# endif
if (p->n_flags & RECURSIVE) {
error(p->n_lineno,
"Recursion in default for nonterminal %s",
ntname);
}
check(p->n_rule);
/*
* If a printout is needed for this rule in
* LL.output, just do it
*/
if (verbose && (p->n_flags & VERBOSE)) {
fprintf(f,"\n%s :\n",ntname);
printset(p->n_first,c_first);
printset(p->n_contains,c_contains);
printset(p->n_follow,c_follow);
fputs("> rule\n\t",f);
level = 8;
prrule(p->n_rule);
level = 0;
newline();
}
/*
* Now, the conflicts may be resolved
*/
resolve(p->n_rule);
}
}
if (verbose) fclose(f);
}
STATIC
newline() {
/*
* Newline and "level" spaces indentation
*/
if (level > 0) fprintf(fout,"\n%*c",level,' ');
else putc('\n',fout);
}
STATIC
printset(p,s) register p_set p; string s; {
/*
* Print the elements of a set
*/
register FILE *f;
register i;
register j;
int k;
int hulp;
k = strlen(s)+1;
/*
* k contains relative level of indentation
*/
f = fout;
fprintf(f,"%s{ ",s);
j = level+1+k;
/*
* j will gather the total length of the line
*/
if (p == (p_set) 0) fputs(">non existent (yet?)< ",f);
else {
for (i = 0; i < nterminals; i++) {
if (IN(p,i)) {
hulp = strlen(h_entry[i].h_name)+1;
if (h_entry[i].h_num < 0400) hulp += 2;
if ((j += hulp) >= 78) {
/*
* Line becoming too long
*/
j = level+k+1+hulp;
newline();
fprintf(f,">%*c",k,' ');
}
PTERM(&h_entry[i]);
}
}
if (ntprint) for (i = 0; i < nnonterms; i++) {
/*
* Nonterminals in the set must also be printed
*/
if (NTIN(p,i)) {
hulp = strlen((min_nt_ent+i)->h_name)+3;
if ((j += hulp) >= 78) {
j = level + k + 1 + hulp;
newline();
fprintf(f,">%*c",k,' ');
}
fprintf(f,"<%s> ",(min_nt_ent+i)->h_name);
}
}
}
putc('}',f);
newline();
}
STATIC
check(p) register p_gram p; {
/*
* Search for conflicts in a grammar rule.
*/
p_set temp;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return;
case NONTERM :
if (g_getnpar(p)!=((nonterms[g_getnont(p)].n_flags&077) >> 3)){
error(p->g_lineno,
"Call of %s : parameter count mismatch",
(min_nt_ent+g_getnont(p))->h_name);
}
break;
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
check(q->t_rule);
if (r_getkind(&(q->t_reps)) == FIXED) break;
if (setempty(q->t_first)) {
q->t_flags |= EMPTYFIRST;
nt->n_flags |= VERBOSE;
error(p->g_lineno, "No symbols in term");
}
if (empty(q->t_rule)) {
q->t_flags |= EMPTYTERM;
nt->n_flags |= VERBOSE;
error(p->g_lineno, "Term produces empty");
}
temp = setalloc(setsize);
setunion(temp,q->t_first,setsize);
if (!setintersect(temp,q->t_follow,setsize)) {
/*
* q->t_first * q->t_follow != EMPTY
*/
if (!(q->t_flags & RESOLVER)) {
/*
* No conflict resolver
*/
error(p->g_lineno,
"Repitition conflict");
nt->n_flags |= VERBOSE;
if (verbose == 2) moreverbose(temp);
}
} else {
if (q->t_flags & RESOLVER) {
q->t_flags |= NOCONF;
error(p->g_lineno,
"%%while, no conflict");
nt->n_flags |= VERBOSE;
}
}
free((p_mem) temp);
break; }
case ALTERNATION : {
register p_link l;
l = (p_link) pentry[g_getcont(p)];
temp = setalloc(setsize);
setunion(temp,l->l_symbs,setsize);
if(!setintersect(temp,l->l_others,setsize)) {
/*
* temp now contains the conflicting
* symbols
*/
if (!(l->l_flag & (COND|PREFERING|AVOIDING))) {
error(p->g_lineno,
"Alternation conflict");
nt->n_flags |= VERBOSE;
if (verbose == 2) moreverbose(temp);
}
} else {
if (l->l_flag & (COND|PREFERING|AVOIDING)) {
l->l_flag |= NOCONF;
error(p->g_lineno,"No conflict");
nt->n_flags |= VERBOSE;
}
}
free( (p_mem) temp);
check(l->l_rule);
break; }
}
p++;
}
}
STATIC
moreverbose(t) register p_set t; {
/*
* t points to a set containing conflicting symbols and pssibly
* also containing nonterminals.
* Take care that a printout will be prepared for these nonterminals
*/
register i;
if (verbose) for (i = 0; i < nnonterms; i++) {
if (NTIN(t,i)) nonterms[i].n_flags |= VERBOSE;
}
}
STATIC
prrule(p) register p_gram p; {
/*
* Create a verbose printout of grammar rule p
*/
register FILE *f;
int present = 0;
f = fout;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
putc('\n',f);
return;
case TERM : {
register p_term q;
register c;
q = (p_term) pentry[g_getcont(p)];
if (present) newline();
if (r_getkind(&(q->t_reps)) != FIXED ||
r_getnum(&(q->t_reps)) != 0) {
fputs("[ ",f);
level += 4;
if (q->t_flags & RESOLVER) {
fputs("%%while (..)",f);
newline();
}
if (r_getkind(&(q->t_reps)) != FIXED) {
printset(q->t_first, c_first);
printset(q->t_contains, c_contains);
printset(q->t_follow,c_follow);
if (q->t_flags & EMPTYFIRST) {
fputs(">>> empty first",f);
newline();
}
if (q->t_flags & EMPTYTERM) {
fputs(">>> term produces empty",f);
newline();
}
cfcheck(q->t_first,q->t_follow,
q->t_flags & RESOLVER);
}
prrule(q->t_rule);
level -= 4;
spaces();
c = r_getkind(&(q->t_reps));
putc(']',f);
if (c != FIXED) {
c = (c==STAR)?'*':(c==PLUS)?'+':'?';
putc(c,f);
}
if (c = r_getnum(&(q->t_reps))) {
fprintf(f,"%d",c);
}
newline();
} else {
prrule(q->t_rule);
spaces();
}
break; }
case ACTION :
fputs("{..} ",f);
break;
case ALTERNATION : {
register p_link l;
l = (p_link) pentry[g_getcont(p)];
if (l->l_flag & (COND|PREFERING|AVOIDING)) {
printset(l->l_symbs,"> alt with resolver on ");
} else printset(l->l_symbs,"> alternative on ");
cfcheck(l->l_symbs,
l->l_others,
(int)(l->l_flag&(COND|PREFERING|AVOIDING)));
fputs(" ",f);
level += 4;
prrule(l->l_rule);
level -= 4;
spaces();
if (g_gettype(p+1) == EORULE) {
fputs("> end alternatives\n",f);
return;
}
p++; continue; }
case TERMINAL :
PTERM(&h_entry[g_getcont(p)]);
break;
case NONTERM :
fprintf(f,"%s ",(g_getnont(p)+min_nt_ent)->h_name);
break;
}
p++;
present = 1;
}
}
STATIC
cfcheck(s1,s2,flag) p_set s1,s2; {
/*
* Check if s1 and s2 have elements in common.
* If so, flag must be non-zero, indicating that there is a
* conflict resolver, otherwise, flag must be zero, indicating
* that there is not.
*/
register p_set temp;
temp = setalloc(setsize);
setunion(temp,s1,setsize);
if (!setintersect(temp,s2,setsize)) {
if (! flag) {
printset(temp,">>> conflict on ");
newline();
}
} else {
if (flag) {
fputs(">>> %if/%while, no conflict",fout);
newline();
}
}
free((p_mem) temp);
}
STATIC
resolve(p) register p_gram p; {
/*
* resolve conflicts, as specified by the user
*/
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return;
case TERM :
resolve(((p_term) pentry[g_getcont(p)])->t_rule);
break;
case ALTERNATION : {
register p_link l;
l = (p_link) pentry[g_getcont(p)];
if (l->l_flag & AVOIDING) {
/*
* On conflicting symbols, this rule
* is never chosen
*/
setminus(l->l_symbs,l->l_others,setsize);
}
if (setempty(l->l_symbs)) {
/*
* This may be caused by the statement above
*/
error(p->g_lineno,"Alternative never chosen");
}
resolve(l->l_rule);
if (l->l_flag & PREFERING) propagate(l->l_symbs,p+1);
break; }
}
p++;
}
}
STATIC
propagate(set,p) p_set set; register p_gram p; {
/*
* Propagate the fact that on the elements of set the grammar rule
* p will not be chosen.
*/
while (g_gettype(p) != EORULE) {
setminus(((p_link) pentry[g_getcont(p)])->l_symbs,set,setsize);
p++;
}
}
STATIC
spaces() {
if (level > 0) fprintf(fout,"%*c",level,' ');
}