ack/util/LLgen/src/compute.c

838 lines
17 KiB
C
Raw Normal View History

1985-10-02 22:20:04 +00:00
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/*
* L L G E N
*
* An Extended LL(1) Parser Generator
*
* Author : Ceriel J.H. Jacobs
*/
/*
* compute.c
* Defines routines to compute FIRST, FOLLOW etc.
* Also checks the continuation grammar from the specified grammar.
*/
# include "types.h"
# include "tunable.h"
# include "extern.h"
# include "sets.h"
# include "assert.h"
# ifndef NDEBUG
# include "io.h"
# endif
# ifndef NORCSID
static string rcsid2 = "$Header$";
# endif
p_set setalloc();
/* Defined in this file : */
extern createsets();
STATIC walk();
extern co_empty();
extern empty();
extern co_first();
STATIC first();
extern co_follow();
STATIC follow();
extern co_symb();
STATIC co_dirsymb();
STATIC co_others();
STATIC do_checkdefault();
STATIC checkdefault();
extern co_contains();
STATIC do_contains();
STATIC contains();
extern co_safes();
STATIC int do_safes();
createsets() {
/*
* Allocate space for the sets
*/
register p_nont p;
for (p = nonterms; p < maxnt; p++) {
p->n_first = setalloc(setsize);
p->n_follow = setalloc(setsize);
walk(p->n_rule);
}
}
STATIC
walk(p) register p_gram p; {
/*
* Walk through the grammar rule p, allocating sets
*/
for (;;) {
switch (g_gettype(p)) {
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
q->t_first = setalloc(setsize);
q->t_follow = setalloc(setsize);
walk(q->t_rule);
break; }
case ALTERNATION : {
register p_link l;
l = (p_link) pentry[g_getcont(p)];
l->l_symbs = setalloc(setsize);
walk(l->l_rule);
break; }
case EORULE :
return;
}
p++;
}
}
co_empty() {
/*
* Which nonterminals produce the empty string ?
*/
register int change;
register p_nont p;
change = 1;
while (change) {
change = 0;
for (p=nonterms; p < maxnt; p++) {
if ((!(p->n_flags & EMPTY)) && empty(p->n_rule)) {
p->n_flags |= EMPTY;
change = 1;
}
}
}
}
empty(p) register p_gram p; {
/*
* Does the rule pointed to by p produce empty ?
*/
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return 1;
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
if (r_getkind(&(q->t_reps)) == STAR
|| r_getkind(&(q->t_reps)) == OPT
|| empty(q->t_rule) ) break;
return 0; }
case ALTERNATION :
if (empty(((p_link)pentry[g_getcont(p)])->l_rule)) {
return 1;
}
if (g_gettype(p+1) == EORULE) return 0;
break;
case NONTERM :
if (nonterms[g_getnont(p)].n_flags & EMPTY) {
break;
}
/* Fall through */
case TERMINAL :
return 0;
}
p++;
}
}
co_first() {
/*
* Compute the FIRST set for each nonterminal
*/
register p_nont p;
register int change;
change = 1;
while (change) {
change = 0;
for (p = nonterms; p < maxnt; p++) {
if (first(p->n_first,p->n_rule,0)) change = 1;
}
}
}
STATIC
first(setp,p,flag) p_set setp; register p_gram p; {
/*
* Compute the FIRST set of rule p.
* If flag = 0, also the first sets for terms and alternations in
* the rule p are computed.
* The FIRST set is put in setp.
* return 1 if any of the sets changed
*/
register s; /* Will gather return value */
int noenter;/* when set, unables entering of elements into
* setp. This is necessary to walk through the
* rest of rule p.
*/
s = 0;
noenter = 0;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return s;
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
if (flag == 0) s |= first(q->t_first,q->t_rule,0);
if (!noenter) s |= setunion(setp,q->t_first,setsize);
p++;
if (r_getkind(&(q->t_reps)) == STAR
|| r_getkind(&(q->t_reps)) == OPT
|| empty(q->t_rule) ) continue;
break; }
case ALTERNATION : {
register p_link l;
l = (p_link) pentry[g_getcont(p)];
if (flag == 0) s |= first(l->l_symbs,l->l_rule,0);
if (noenter == 0) {
s |= setunion(setp,l->l_symbs,setsize);
}
if (g_gettype(p+1) == EORULE) return s;
}
/* Fall Through */
case ACTION :
p++;
continue;
case TERMINAL :
if ((noenter == 0) && !IN(setp,g_getcont(p))) {
s = 1;
PUTIN(setp,g_getcont(p));
}
p++;
break;
case NONTERM : {
register p_nont n;
n = &nonterms[g_getnont(p)];
if (noenter == 0) {
s |= setunion(setp,n->n_first,setsize);
if (ntneeded && ! NTIN(setp,n-nonterms)) {
s = 1;
NTPUTIN(setp,n-nonterms);
}
}
p++;
if (n->n_flags & EMPTY) continue;
break; }
}
if (flag == 0) {
noenter = 1;
continue;
}
return s;
}
}
co_follow() {
/*
* Compute the follow set for each nonterminal
*/
register p_nont p;
register change;
register i;
p_start st;
/*
* First put EOFILE in the follow set of the start symbols
*/
for (st = start; st; st = st->ff_next) PUTIN(st->ff_nont->n_follow,0);
change = 1;
i = 1;
while (change) {
change = 0;
for (p = nonterms; p < maxnt; p++) {
if (follow(p->n_follow,p->n_rule,i)) change = 1;
}
i = 0;
}
}
STATIC
follow(setp,p,flag) p_set setp; register p_gram p; {
/*
* setp is the follow set for the rule p.
* Compute the follow sets in the rule p from this set.
* Return 1 if any set changed
* flag controls the use of "first" in the computation.
* It should be 1 the first time a rule is done, 0 otherwise.
*/
register s; /* Will gather return value */
s = 0;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return s;
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
if (empty(p+1)) {
/*
* If what follows the term can be empty,
* everything that can follow the whole
* rule can also follow the term
*/
s |= setunion(q->t_follow,setp,setsize);
}
/*
* Everything that can start the rest of the rule
* can follow the term
*/
if (flag) s |= first(q->t_follow,p+1,1);
if (r_getkind(&(q->t_reps)) == STAR
|| r_getkind(&(q->t_reps)) == PLUS
|| r_getnum(&(q->t_reps)) ) {
/*
* If the term involves a repetition
* of possibly more than one,
* everything that can start the term
* can also follow it.
*/
s |= follow(q->t_first,q->t_rule,flag);
}
/*
* Now propagate the set computed sofar
*/
s |= follow(q->t_follow, q->t_rule,flag);
break; }
case ALTERNATION :
/*
* Just propagate setp
*/
s |= follow(setp,((p_link)pentry[g_getcont(p)])->l_rule,
flag);
break;
case NONTERM : {
register p_nont n;
n = &nonterms[g_getnont(p)];
if (flag) s |= first(n->n_follow,p+1,1);
if (empty(p+1)) {
/*
* If the rest of p can produce empty,
* everything that follows p can follow
* the nonterminal
*/
s |= setunion(n->n_follow,setp,setsize);
}
break; }
}
p++;
}
}
co_symb() {
/*
* Compute the sets which determine which alternative to choose
* in case of a choice
* Also check the continuation grammar and see if rules do scan
* ahead.
*/
register p_nont p;
for (p = nonterms; p < maxnt; p++) {
co_dirsymb(p->n_follow,p->n_rule);
}
for (p = nonterms; p < maxnt; p++) {
do_checkdefault(p);
}
}
STATIC
co_dirsymb(setp,p) p_set setp; register p_gram p; {
/*
* Walk the rule p, doing the work for alternations
*/
register p_gram s = 0;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return;
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
co_dirsymb(q->t_follow,q->t_rule);
break; }
case ALTERNATION : {
register p_link l;
/*
* Save first alternative
*/
if (!s) s = p;
l = (p_link) pentry[g_getcont(p)];
l->l_others = setalloc(setsize);
co_dirsymb(setp,l->l_rule);
if (empty(l->l_rule)) {
/*
* If the rule can produce empty, everything
* that follows it can also start it
*/
setunion(l->l_symbs,setp,setsize);
}
if (g_gettype(p+1) == EORULE) {
/*
* Every alternation is implemented as a
* choice between two alternatives :
* this one or one of the following.
* The l_others set will contain the starters
* of the other alternatives
*/
co_others(s);
return;
} }
}
p++;
}
}
STATIC
co_others(p) register p_gram p; {
/*
* compute the l_others-sets for the list of alternatives
* indicated by p
*/
register p_link l1,l2;
l1 = (p_link) pentry[g_getcont(p)];
p++;
l2 = (p_link) pentry[g_getcont(p)];
setunion(l1->l_others,l2->l_symbs,setsize);
if (g_gettype(p+1) != EORULE) {
/*
* First compute l2->l_others
*/
co_others(p);
/*
* and then l1->l_others
*/
setunion(l1->l_others,l2->l_others,setsize);
}
}
STATIC
do_checkdefault(p) register p_nont p; {
/*
* check the continuation rule for nonterminal p, unless
* this is already being(!) done
*/
if (p->n_flags & BUSY) {
/*
* Error situation, recursion in continuation grammar
*/
p->n_flags ^= (RECURSIVE|BUSY);
return;
}
if (p->n_flags & CONTIN) {
/*
* Was already done
*/
return;
}
/*
* Now mark it as busy, and check the rule
*/
p->n_flags |= BUSY;
checkdefault(p->n_rule);
/*
* Now release the busy mark, and mark it as done
*/
p->n_flags ^= (CONTIN|BUSY);
return;
}
STATIC
checkdefault(p) register p_gram p; {
/*
* Walk grammar rule p, checking the continuation grammar
*/
register p_link l;
register p_term q;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return;
case ALTERNATION :
l = (p_link) pentry[g_getcont(p)];
if (l->l_flag & DEF) {
/*
* This alternative belongs to the
* continuation grammar, so check it
*/
checkdefault(l->l_rule);
return;
}
break;
case TERM :
q = (p_term) pentry[g_getcont(p)];
/*
* First check the rest of the rule
*/
checkdefault(p+1);
/*
* Now check the term if it belongs to the
* continuation grammar
*/
if (r_getkind(&(q->t_reps))==FIXED ||
r_getkind(&(q->t_reps))==PLUS) {
checkdefault(q->t_rule);
return;
}
/*
* Here we have OPT or STAR
* Only in the continuation grammar if %persistent
*/
if (q->t_flags & PERSISTENT) {
checkdefault(q->t_rule);
}
return;
case NONTERM :
/*
* Check the continuation grammar for this nonterminal.
* Note that the nonterminal we are working on is
* marked as busy, so that direct or indirect recursion
* can be detected
*/
do_checkdefault(&nonterms[g_getnont(p)]);
break;
}
p++;
}
}
co_contains() {
/*
* Compute the contains sets
*/
register p_nont p;
register p_set dummy;
for (p = nonterms; p < maxnt; p++) do_contains(p);
dummy = setalloc(setsize);
# ifndef NDEBUG
if (debug) fputs("Contains 1 done\n", stderr);
# endif
free(dummy);
for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0);
# ifndef NDEBUG
if (debug) fputs("Contains 2 done\n", stderr);
# endif
dummy = setalloc(setsize);
free(dummy);
}
STATIC
do_contains(n) register p_nont n; {
/*
* Compute the total set of symbols that nonterminal n can
* produce
*/
if (n->n_contains == 0) {
n->n_contains = setalloc(setsize);
contains(n->n_rule,n->n_contains);
/*
* If the rule can produce empty, delete all symbols that
* can follow the rule as well as be in the rule.
* This is needed because the contains-set may only contain
* symbols that are guaranteed to be eaten by the rule!
* Otherwise, the generated parser may loop forever
*/
if (n->n_flags & EMPTY) {
setminus(n->n_contains,n->n_follow,setsize);
}
/*
* But the symbols that can start the rule are always
* eaten
*/
setunion(n->n_contains,n->n_first,setsize);
}
}
STATIC
contains(p,set) register p_gram p; register p_set set; {
/*
* Does the real computation of the contains-sets
*/
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return;
case TERM : {
register p_term q;
q = (p_term) pentry[g_getcont(p)];
if ((q->t_flags & PERSISTENT) ||
r_getkind(&(q->t_reps)) == PLUS ||
r_getkind(&(q->t_reps)) == FIXED) {
/*
* In these cases, the term belongs to the
* continuation grammar.
* Otherwise, q->t_contains is just
* q->t_first
*/
if (!q->t_contains) {
q->t_contains = setalloc(setsize);
}
contains(q->t_rule,q->t_contains);
if (empty(q->t_rule)) {
/*
* Same trouble as mentioned in the
* routine do_contains
*/
setminus(q->t_contains,q->t_follow,
setsize);
}
setunion(q->t_contains,q->t_first,setsize);
} else {
contains(q->t_rule, (p_set) 0);
q->t_contains = q->t_first;
}
if (set) setunion(set,q->t_contains,setsize);
break; }
case NONTERM : {
register p_nont n;
n = &nonterms[g_getnont(p)];
do_contains(n);
if(set) setunion(set, n->n_contains,setsize);
break; }
case ALTERNATION : {
register p_link l;
l = (p_link) pentry[g_getcont(p)];
contains(l->l_rule,
(l->l_flag & DEF) ? set : (p_set) 0);
break; }
case TERMINAL : {
register hulp;
if (set) {
hulp = g_getcont(p);
assert(hulp < nterminals);
PUTIN(set,hulp);
}}
}
p++;
}
}
static int change;
co_safes() {
/*
* Compute the safety of each nonterminal and term.
* The safety gives an answer to the question whether a scan is done,
* and how it should be handled.
*/
register p_nont p;
register i;
register p_start st;
for (p = nonterms; p < maxnt; p++) {
/*
* Don't know anything yet
*/
setntsafe(p, NOSAFETY);
setntout(p, NOSAFETY);
1985-10-02 22:20:04 +00:00
}
for (st = start; st; st = st->ff_next) {
/*
* But start symbols are called with lookahead done
*/
p = st->ff_nont;
setntsafe(p,SCANDONE);
1985-10-02 22:20:04 +00:00
}
change = 1;
while (change) {
change = 0;
for (p = nonterms; p < maxnt; p++) {
i = getntsafe(p);
1985-10-02 22:20:04 +00:00
if (i == NOSAFETY) {
continue;
}
i = do_safes(p->n_rule, i);
if (getntout(p) != i) {
1985-10-02 22:20:04 +00:00
change = 1;
setntout(p, i);
1985-10-02 22:20:04 +00:00
}
}
}
1985-10-03 17:19:14 +00:00
# ifndef NDEBUG
if (debug) {
fputs("Safeties:\n", stderr);
for (p = nonterms; p < maxnt; p++) {
fprintf(stderr, "%s\t%d\t%d\n",
(min_nt_ent + (p - nonterms))->h_name,
getntsafe(p),
getntout(p));
}
}
# endif
1985-10-02 22:20:04 +00:00
}
STATIC int
do_safes(p,safe) register p_gram p; {
/*
* Walk the grammar rule, doing the computation described in the
* comment of the procedure above this one.
*/
register retval;
for (;;) {
switch (g_gettype(p)) {
case ACTION:
p++;
continue;
case TERMINAL:
1985-10-02 22:49:16 +00:00
safe = NOSCANDONE;
1985-10-02 22:20:04 +00:00
break;
case TERM : {
register p_term q;
int i,rep;
q = (p_term) pentry[g_getcont(p)];
i = r_getnum(&(q->t_reps));
rep = r_getkind(&(q->t_reps));
retval = do_safes(q->t_rule,
t_safety(rep,i,q->t_flags&PERSISTENT,safe));
if (retval != gettout(q)) {
settout(q, retval);
1985-10-02 22:20:04 +00:00
}
safe = t_after(rep, i, gettout(q));
1985-10-02 22:20:04 +00:00
break; }
case ALTERNATION : {
register p_link l;
int f, i;
f = 1;
while (g_gettype(p) == ALTERNATION) {
l = (p_link) pentry[g_getcont(p)];
if (safe > SAFE && (l->l_flag & DEF)) {
1985-10-02 22:20:04 +00:00
i = do_safes(l->l_rule,SAFESCANDONE);
}
else i = do_safes(l->l_rule,SAFE);
if (f) retval = i;
else if (i != retval) {
if (i == NOSCANDONE ||
retval == NOSCANDONE) {
retval = SCANDONE;
}
else if (i > retval) retval = i;
}
1985-10-02 22:20:04 +00:00
p++;
f = 0;
}
return retval; }
case NONTERM : {
register p_nont n;
int nsafe, osafe;
1985-10-02 22:20:04 +00:00
n = &nonterms[g_getnont(p)];
nsafe = getntsafe(n);
1985-10-02 22:49:16 +00:00
osafe = safe;
safe = getntout(n);
if (safe == NOSAFETY) safe = SCANDONE;
1985-10-02 22:49:16 +00:00
if (osafe == nsafe) break;
1985-10-02 22:20:04 +00:00
if (nsafe == NOSAFETY) {
change = 1;
setntsafe(n, osafe);
1985-10-02 22:20:04 +00:00
break;
}
1985-10-02 22:49:16 +00:00
if (osafe == NOSCANDONE || nsafe == NOSCANDONE) {
1985-10-02 22:20:04 +00:00
if (nsafe != SCANDONE) {
change = 1;
setntsafe(n, SCANDONE);
1985-10-02 22:20:04 +00:00
}
break;
}
1985-10-02 22:49:16 +00:00
if (osafe > nsafe) {
setntsafe(n, osafe);
1985-10-02 22:20:04 +00:00
change = 1;
}
break; }
case EORULE :
return safe;
1985-10-02 22:20:04 +00:00
}
p++;
}
}
t_safety(rep, count, persistent, safety) {
switch(rep) {
default:
assert(0);
case OPT:
if (!persistent) return SAFE;
if (safety < SAFESCANDONE) return safety;
return SAFESCANDONE;
case STAR:
if (persistent) return SAFESCANDONE;
return SAFE;
case PLUS:
if (safety == NOSCANDONE) safety = SCANDONE;
if (persistent) {
if (safety > SAFESCANDONE) return safety;
return SAFESCANDONE;
}
if (safety > SAFE) return safety;
return SAFE;
case FIXED:
if (!count) {
if (safety == NOSCANDONE) safety = SCANDONE;
return safety;
}
return SCANDONE;
}
/* NOTREACHED */
}
1985-10-02 22:49:16 +00:00
t_after(rep, count, outsafety) {
1985-10-02 22:49:16 +00:00
if (count == 0 && (rep == STAR || rep == PLUS)) {
return SAFESCANDONE;
}
if (rep != FIXED) {
if (outsafety <= SAFESCANDONE) return SAFESCANDONE;
return SCANDONE;
}
return outsafety;
1985-10-02 22:49:16 +00:00
}