fd91851005
now.
1160 lines
24 KiB
C
1160 lines
24 KiB
C
/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
|
|
* For full copyright and restrictions on use see the file COPYING in the top
|
|
* level of the LLgen tree.
|
|
*/
|
|
|
|
/*
|
|
* L L G E N
|
|
*
|
|
* An Extended LL(1) Parser Generator
|
|
*
|
|
* Author : Ceriel J.H. Jacobs
|
|
*/
|
|
|
|
/*
|
|
* compute.c
|
|
* Defines routines to compute FIRST, FOLLOW etc.
|
|
* Also checks the continuation grammar from the specified grammar.
|
|
*/
|
|
|
|
# include <stdlib.h>
|
|
# include "types.h"
|
|
# include "extern.h"
|
|
# include "sets.h"
|
|
# include "assert.h"
|
|
# include "io.h"
|
|
|
|
# ifndef NORCSID
|
|
static string rcsid = "$Id$";
|
|
# endif
|
|
|
|
p_set get_set();
|
|
typedef struct lngth {
|
|
/* Structure used to compute the shortest possible
|
|
* length of a terminal production of a rule.
|
|
* In case of a tie, the second field is used.
|
|
*/
|
|
int cnt;
|
|
int val;
|
|
} t_length, *p_length;
|
|
|
|
/* Defined in this file : */
|
|
extern do_compute();
|
|
STATIC createsets();
|
|
STATIC void walk();
|
|
STATIC co_trans();
|
|
STATIC int nempty();
|
|
extern empty();
|
|
STATIC int nfirst();
|
|
STATIC first();
|
|
STATIC int nfollow();
|
|
STATIC follow();
|
|
STATIC void co_dirsymb();
|
|
STATIC co_others();
|
|
STATIC do_lengthcomp();
|
|
STATIC void complength();
|
|
STATIC void add();
|
|
STATIC int compare();
|
|
STATIC void setdefaults();
|
|
STATIC do_contains();
|
|
STATIC void contains();
|
|
STATIC int nsafes();
|
|
STATIC int do_safes();
|
|
#ifdef NON_CORRECTING
|
|
STATIC int nc_nfirst();
|
|
STATIC nc_first();
|
|
STATIC int nc_nfollow();
|
|
STATIC nc_follow();
|
|
#endif
|
|
|
|
do_compute() {
|
|
/*
|
|
* Does all the work, by calling other routines (divide and conquer)
|
|
*/
|
|
register p_nont p;
|
|
register p_start st;
|
|
|
|
createsets();
|
|
co_trans(nempty); /* Which nonterminals produce empty? */
|
|
co_trans(nfirst); /* Computes first sets */
|
|
/*
|
|
* Compute FOLLOW sets.
|
|
* First put EOFILE in the follow set of the start nonterminals.
|
|
*/
|
|
for (st = start; st; st = st->ff_next) {
|
|
p = &nonterms[st->ff_nont];
|
|
PUTIN(p->n_follow,0);
|
|
}
|
|
co_trans(nfollow);
|
|
/*
|
|
* Compute the sets which determine which alternative to choose
|
|
* in case of a choice
|
|
*/
|
|
for (p = nonterms; p < maxnt; p++) {
|
|
co_dirsymb(p->n_follow,p->n_rule);
|
|
}
|
|
/*
|
|
* Compute the minimum length of productions of nonterminals,
|
|
* and then determine the default choices
|
|
*/
|
|
do_lengthcomp();
|
|
/*
|
|
* Compute the contains sets
|
|
*/
|
|
for (p = nonterms; p < maxnt; p++) do_contains(p);
|
|
for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0);
|
|
/*
|
|
* Compute the safety of each nonterminal and term.
|
|
* The safety gives an answer to the question whether a scan is done,
|
|
* and how it should be handled.
|
|
*/
|
|
for (p = nonterms; p < maxnt; p++) {
|
|
/*
|
|
* Don't know anything yet
|
|
*/
|
|
setntsafe(p, NOSAFETY);
|
|
setntout(p, NOSAFETY);
|
|
}
|
|
for (st = start; st; st = st->ff_next) {
|
|
/*
|
|
* But start symbols are called with lookahead done
|
|
*/
|
|
p = &nonterms[st->ff_nont];
|
|
setntsafe(p,SCANDONE);
|
|
}
|
|
co_trans(nsafes);
|
|
|
|
#ifdef NON_CORRECTING
|
|
if (subpars_sim) {
|
|
int s;
|
|
|
|
/* compute the union of the first sets of all start symbols
|
|
Used to compute the nc-first-sets when -s option is given */
|
|
start_firsts = get_set();
|
|
for (st = start; st; st = st->ff_next) {
|
|
s = setunion(start_firsts, (&nonterms[st->ff_nont])->n_first);
|
|
}
|
|
}
|
|
|
|
if (non_corr) {
|
|
/* compute the non_corr first sets for all nonterminals and terms */
|
|
|
|
co_trans(nc_nfirst);
|
|
for (st = start; st; st = st->ff_next) {
|
|
p = &nonterms[st->ff_nont];
|
|
PUTIN(p->n_nc_follow,0);
|
|
}
|
|
co_trans(nc_nfollow);
|
|
}
|
|
#endif
|
|
|
|
# ifndef NDEBUG
|
|
if (debug) {
|
|
fputs("Safeties:\n", stderr);
|
|
for (p = nonterms; p < maxnt; p++) {
|
|
fprintf(stderr, "%s\t%d\t%d\n",
|
|
p->n_name,
|
|
getntsafe(p),
|
|
getntout(p));
|
|
}
|
|
}
|
|
# endif
|
|
}
|
|
|
|
STATIC
|
|
createsets() {
|
|
/*
|
|
* Allocate space for the sets. Also determine which files use
|
|
* which nonterminals, and determine which nonterminals can be
|
|
* made static.
|
|
*/
|
|
register p_nont p;
|
|
register p_file f;
|
|
register p_start st;
|
|
register int i;
|
|
int n = NINTS(NBYTES(nnonterms));
|
|
p_mem alloc();
|
|
|
|
for (f = files; f < maxfiles; f++) {
|
|
register p_set s;
|
|
f->f_used = s = (p_set) alloc((unsigned)n*sizeof(*(f->f_used)));
|
|
for (i = n; i; i--) *s++ = 0;
|
|
for (i = f->f_nonterminals; i != -1; i = p->n_next) {
|
|
p = &nonterms[i];
|
|
p->n_flags |= GENSTATIC;
|
|
p->n_first = get_set();
|
|
#ifdef NON_CORRECTING
|
|
p->n_nc_first = get_set();
|
|
p->n_nc_follow = get_set();
|
|
#endif
|
|
p->n_follow = get_set();
|
|
walk(f->f_used, p->n_rule);
|
|
}
|
|
}
|
|
for (f = files; f < maxfiles; f++) {
|
|
for (i = f->f_nonterminals; i != -1; i = p->n_next) {
|
|
register p_file f2;
|
|
|
|
p = &nonterms[i];
|
|
for (f2 = files; f2 < maxfiles; f2++) {
|
|
if (f2 != f && IN(f2->f_used, i)) {
|
|
p->n_flags &= ~GENSTATIC;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (st = start; st; st = st->ff_next) {
|
|
nonterms[st->ff_nont].n_flags &= ~GENSTATIC;
|
|
}
|
|
}
|
|
|
|
STATIC void
|
|
walk(u, p) p_set u; register p_gram p; {
|
|
/*
|
|
* Walk through the grammar rule p, allocating sets
|
|
*/
|
|
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
q->t_first = get_set();
|
|
#ifdef NON_CORRECTING
|
|
q->t_nc_first = get_set();
|
|
q->t_nc_follow = get_set();
|
|
#endif
|
|
q->t_follow = get_set();
|
|
walk(u, q->t_rule);
|
|
break; }
|
|
case ALTERNATION : {
|
|
register p_link l;
|
|
|
|
l = g_getlink(p);
|
|
l->l_symbs = get_set();
|
|
#ifdef NON_CORRECTING
|
|
l->l_nc_symbs = get_set();
|
|
#endif
|
|
l->l_others = get_set();
|
|
walk(u, l->l_rule);
|
|
break; }
|
|
case NONTERM : {
|
|
register int i = g_getcont(p);
|
|
|
|
PUTIN(u, i);
|
|
break; }
|
|
case EORULE :
|
|
return;
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
STATIC
|
|
co_trans(fc) int (*fc)(); {
|
|
register p_nont p;
|
|
register int change;
|
|
|
|
do {
|
|
change = 0;
|
|
for (p = nonterms; p < maxnt; p++) {
|
|
if ((*fc)(p)) change = 1;
|
|
}
|
|
} while (change);
|
|
}
|
|
|
|
STATIC int
|
|
nempty(p) register p_nont p; {
|
|
if (!(p->n_flags & EMPTY) && empty(p->n_rule)) {
|
|
p->n_flags |= EMPTY;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
empty(p) register p_gram p; {
|
|
/*
|
|
* Does the rule pointed to by p produce empty ?
|
|
*/
|
|
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return 1;
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
if (r_getkind(q) == STAR
|
|
|| r_getkind(q) == OPT
|
|
|| empty(q->t_rule) ) break;
|
|
return 0; }
|
|
case ALTERNATION :
|
|
if (empty(g_getlink(p)->l_rule)) {
|
|
return 1;
|
|
}
|
|
if (g_gettype(p+1) == EORULE) return 0;
|
|
break;
|
|
case NONTERM :
|
|
if (nonterms[g_getcont(p)].n_flags & EMPTY) {
|
|
break;
|
|
}
|
|
/* Fall through */
|
|
case LITERAL :
|
|
case TERMINAL :
|
|
return 0;
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
STATIC int
|
|
nfirst(p) register p_nont p; {
|
|
return first(p->n_first, p->n_rule, 0);
|
|
}
|
|
|
|
#ifdef NON_CORRECTING
|
|
STATIC int nc_nfirst(p) register p_nont p; {
|
|
return nc_first(p->n_nc_first, p->n_rule, 0);
|
|
}
|
|
#endif
|
|
|
|
STATIC
|
|
first(setp,p,flag) p_set setp; register p_gram p; {
|
|
/*
|
|
* Compute the FIRST set of rule p.
|
|
* If flag = 0, also the first sets for terms and alternations in
|
|
* the rule p are computed.
|
|
* The FIRST set is put in setp.
|
|
* return 1 if the set refered to by "setp" changed
|
|
*/
|
|
register s; /* Will gather return value */
|
|
int noenter;/* when set, unables entering of elements into
|
|
* setp. This is necessary to walk through the
|
|
* rest of rule p.
|
|
*/
|
|
|
|
s = 0;
|
|
noenter = 0;
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return s;
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
if (flag == 0) {
|
|
if (first(q->t_first,q->t_rule,0))/*nothing*/;
|
|
}
|
|
if (!noenter) s |= setunion(setp,q->t_first);
|
|
p++;
|
|
if (r_getkind(q) == STAR ||
|
|
r_getkind(q) == OPT ||
|
|
empty(q->t_rule)) continue;
|
|
break; }
|
|
case ALTERNATION : {
|
|
register p_link l;
|
|
|
|
l = g_getlink(p);
|
|
if (flag == 0) {
|
|
if (first(l->l_symbs,l->l_rule,0))/*nothing*/;
|
|
}
|
|
if (noenter == 0) {
|
|
s |= setunion(setp,l->l_symbs);
|
|
}
|
|
if (g_gettype(p+1) == EORULE) return s;
|
|
}
|
|
/* Fall Through */
|
|
case ACTION :
|
|
p++;
|
|
continue;
|
|
case LITERAL :
|
|
case TERMINAL :
|
|
if ((noenter == 0) && !IN(setp,g_getcont(p))) {
|
|
s = 1;
|
|
PUTIN(setp,g_getcont(p));
|
|
}
|
|
p++;
|
|
break;
|
|
case NONTERM : {
|
|
register p_nont n;
|
|
|
|
n = &nonterms[g_getcont(p)];
|
|
if (noenter == 0) {
|
|
s |= setunion(setp,n->n_first);
|
|
if (ntneeded) NTPUTIN(setp,g_getcont(p));
|
|
}
|
|
p++;
|
|
if (n->n_flags & EMPTY) continue;
|
|
break; }
|
|
}
|
|
if (flag == 0) {
|
|
noenter = 1;
|
|
continue;
|
|
}
|
|
return s;
|
|
}
|
|
}
|
|
|
|
#ifdef NON_CORRECTING
|
|
STATIC
|
|
nc_first(setp,p,flag) p_set setp; register p_gram p; {
|
|
/*
|
|
* Compute the non_corr FIRST set of rule p.
|
|
* If flag = 0, also the non_corr first sets for terms and
|
|
* alternations in the rule p are computed.
|
|
* The non_corr FIRST set is put in setp.
|
|
* return 1 if the set refered to by "setp" changed
|
|
* If the -s flag was given, the union of the first-sets of all
|
|
* start symbols is used whenever an action occurs. Else, only the
|
|
* first-sets of startsynbols in the %substart are used
|
|
*/
|
|
|
|
register s; /* Will gather return value */
|
|
int noenter;/* when set, unables entering of elements into
|
|
* setp. This is necessary to walk through the
|
|
* rest of rule p.
|
|
*/
|
|
|
|
s = 0;
|
|
noenter = 0;
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return s;
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
if (flag == 0) {
|
|
if (nc_first(q->t_nc_first,q->t_rule,0))/*nothing*/;
|
|
}
|
|
if (!noenter) s |= setunion(setp,q->t_nc_first);
|
|
p++;
|
|
if (r_getkind(q) == STAR ||
|
|
r_getkind(q) == OPT ||
|
|
empty(q->t_rule)) continue;
|
|
break; }
|
|
case ALTERNATION : {
|
|
register p_link l;
|
|
|
|
l = g_getlink(p);
|
|
if (flag == 0) {
|
|
if (nc_first(l->l_nc_symbs,l->l_rule,0))/*nothing*/;
|
|
}
|
|
if (noenter == 0) {
|
|
s |= setunion(setp,l->l_nc_symbs);
|
|
}
|
|
if (g_gettype(p+1) == EORULE) return s;
|
|
}
|
|
p++;
|
|
continue;
|
|
case ACTION : {
|
|
register p_start subp;
|
|
|
|
if (!noenter)
|
|
if (subpars_sim)
|
|
s |= setunion(setp, start_firsts);
|
|
else {
|
|
for (subp = g_getsubparse(p); subp;
|
|
subp = subp->ff_next)
|
|
s |= setunion(setp, (&nonterms[subp->ff_nont])->n_nc_first);
|
|
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
case LITERAL :
|
|
case TERMINAL :
|
|
if (g_getcont(p) == g_getcont(illegal_gram)) {
|
|
/* Ignore for this set. */
|
|
p++;
|
|
continue;
|
|
}
|
|
if ((noenter == 0) && !IN(setp,g_getcont(p))) {
|
|
s = 1;
|
|
PUTIN(setp,g_getcont(p));
|
|
}
|
|
p++;
|
|
break;
|
|
case NONTERM : {
|
|
register p_nont n;
|
|
|
|
n = &nonterms[g_getcont(p)];
|
|
if (noenter == 0) {
|
|
s |= setunion(setp,n->n_nc_first);
|
|
if (ntneeded) NTPUTIN(setp,g_getcont(p));
|
|
}
|
|
p++;
|
|
if (n->n_flags & EMPTY) continue;
|
|
break; }
|
|
}
|
|
if (flag == 0) {
|
|
noenter = 1;
|
|
continue;
|
|
}
|
|
return s;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
STATIC int
|
|
nfollow(p) register p_nont p; {
|
|
return follow(p->n_follow, p->n_rule);
|
|
}
|
|
|
|
STATIC
|
|
follow(setp,p) p_set setp; register p_gram p; {
|
|
/*
|
|
* setp is the follow set for the rule p.
|
|
* Compute the follow sets in the rule p from this set.
|
|
* Return 1 if a follow set of a nonterminal changed.
|
|
*/
|
|
register s; /* Will gather return value */
|
|
|
|
s = 0;
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return s;
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
if (empty(p+1)) {
|
|
/*
|
|
* If what follows the term can be empty,
|
|
* everything that can follow the whole
|
|
* rule can also follow the term
|
|
*/
|
|
s |= setunion(q->t_follow,setp);
|
|
}
|
|
/*
|
|
* Everything that can start the rest of the rule
|
|
* can follow the term
|
|
*/
|
|
s |= first(q->t_follow,p+1,1);
|
|
if (r_getkind(q) == STAR ||
|
|
r_getkind(q) == PLUS ||
|
|
r_getnum(q) ) {
|
|
/*
|
|
* If the term involves a repetition
|
|
* of possibly more than one,
|
|
* everything that can start the term
|
|
* can also follow it.
|
|
*/
|
|
s |= follow(q->t_first,q->t_rule);
|
|
}
|
|
/*
|
|
* Now propagate the set computed sofar
|
|
*/
|
|
s |= follow(q->t_follow, q->t_rule);
|
|
break; }
|
|
case ALTERNATION :
|
|
/*
|
|
* Just propagate setp
|
|
*/
|
|
s |= follow(setp,g_getlink(p)->l_rule);
|
|
break;
|
|
case NONTERM : {
|
|
register p_nont n;
|
|
|
|
n = &nonterms[g_getcont(p)];
|
|
s |= first(n->n_follow,p+1,1);
|
|
if (empty(p+1)) {
|
|
/*
|
|
* If the rest of p can produce empty,
|
|
* everything that follows p can follow
|
|
* the nonterminal
|
|
*/
|
|
s |= setunion(n->n_follow,setp);
|
|
}
|
|
break; }
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
#ifdef NON_CORRECTING
|
|
|
|
STATIC int
|
|
nc_nfollow(p) register p_nont p; {
|
|
return follow(p->n_nc_follow, p->n_rule);
|
|
}
|
|
|
|
STATIC
|
|
nc_follow(setp,p) p_set setp; register p_gram p; {
|
|
/*
|
|
* setp is the follow set for the rule p.
|
|
* Compute the follow sets in the rule p from this set.
|
|
* Return 1 if a follow set of a nonterminal changed.
|
|
*/
|
|
register s; /* Will gather return value */
|
|
|
|
s = 0;
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return s;
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
if (empty(p+1)) {
|
|
/*
|
|
* If what follows the term can be empty,
|
|
* everything that can follow the whole
|
|
* rule can also follow the term
|
|
*/
|
|
s |= setunion(q->t_nc_follow,setp);
|
|
}
|
|
/*
|
|
* Everything that can start the rest of the rule
|
|
* can follow the term
|
|
*/
|
|
s |= nc_first(q->t_nc_follow,p+1,1);
|
|
if (r_getkind(q) == STAR ||
|
|
r_getkind(q) == PLUS ||
|
|
r_getnum(q) ) {
|
|
/*
|
|
* If the term involves a repetition
|
|
* of possibly more than one,
|
|
* everything that can start the term
|
|
* can also follow it.
|
|
*/
|
|
s |= nc_follow(q->t_nc_first,q->t_rule);
|
|
}
|
|
/*
|
|
* Now propagate the set computed sofar
|
|
*/
|
|
s |= nc_follow(q->t_nc_follow, q->t_rule);
|
|
break; }
|
|
case ALTERNATION :
|
|
/*
|
|
* Just propagate setp
|
|
*/
|
|
s |= nc_follow(setp,g_getlink(p)->l_rule);
|
|
break;
|
|
case NONTERM : {
|
|
register p_nont n;
|
|
|
|
n = &nonterms[g_getcont(p)];
|
|
s |= nc_first(n->n_nc_follow,p+1,1);
|
|
if (empty(p+1)) {
|
|
/*
|
|
* If the rest of p can produce empty,
|
|
* everything that follows p can follow
|
|
* the nonterminal
|
|
*/
|
|
s |= setunion(n->n_nc_follow,setp);
|
|
}
|
|
break; }
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
STATIC void
|
|
co_dirsymb(setp,p) p_set setp; register p_gram p; {
|
|
/*
|
|
* Walk the rule p, doing the work for alternations
|
|
*/
|
|
register p_gram s = 0;
|
|
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return;
|
|
case TERM : {
|
|
register p_term q;
|
|
|
|
q = g_getterm(p);
|
|
co_dirsymb(q->t_follow,q->t_rule);
|
|
break; }
|
|
case ALTERNATION : {
|
|
register p_link l;
|
|
/*
|
|
* Save first alternative
|
|
*/
|
|
if (!s) s = p;
|
|
l = g_getlink(p);
|
|
co_dirsymb(setp,l->l_rule);
|
|
if (empty(l->l_rule)) {
|
|
/*
|
|
* If the rule can produce empty, everything
|
|
* that follows it can also start it
|
|
*/
|
|
setunion(l->l_symbs,setp);
|
|
}
|
|
if (g_gettype(p+1) == EORULE) {
|
|
/*
|
|
* Every alternation is implemented as a
|
|
* choice between two alternatives :
|
|
* this one or one of the following.
|
|
* The l_others set will contain the starters
|
|
* of the other alternatives
|
|
*/
|
|
co_others(s);
|
|
return;
|
|
} }
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
STATIC
|
|
co_others(p) register p_gram p; {
|
|
/*
|
|
* compute the l_others-sets for the list of alternatives
|
|
* indicated by p
|
|
*/
|
|
register p_link l1,l2;
|
|
|
|
l1 = g_getlink(p);
|
|
p++;
|
|
l2 = g_getlink(p);
|
|
setunion(l1->l_others,l2->l_symbs);
|
|
if (g_gettype(p+1) != EORULE) {
|
|
/*
|
|
* First compute l2->l_others
|
|
*/
|
|
co_others(p);
|
|
/*
|
|
* and then l1->l_others
|
|
*/
|
|
setunion(l1->l_others,l2->l_others);
|
|
}
|
|
}
|
|
|
|
static p_length length;
|
|
# define INFINITY 32767
|
|
|
|
STATIC
|
|
ncomplength(p)
|
|
register p_nont p;
|
|
{
|
|
register p_length pl = &length[p - nonterms];
|
|
int x = pl->cnt;
|
|
|
|
pl->cnt = -1;
|
|
complength(p->n_rule, pl);
|
|
return pl->cnt < INFINITY && x == INFINITY;
|
|
}
|
|
|
|
STATIC
|
|
do_lengthcomp() {
|
|
/*
|
|
* Compute the minimum length of a terminal production for each
|
|
* nonterminal.
|
|
* This length consists of two fields: the number of terminals,
|
|
* and a number that is composed of
|
|
* - the number of this alternative
|
|
* - a crude measure of the number of terms and nonterminals in the
|
|
* production of this shortest string.
|
|
*/
|
|
register p_length pl;
|
|
register p_nont p;
|
|
p_mem alloc();
|
|
|
|
length = (p_length) alloc((unsigned) (nnonterms * sizeof(*length)));
|
|
for (pl = &length[nnonterms-1]; pl >= length; pl--) {
|
|
pl->val = pl->cnt = INFINITY;
|
|
}
|
|
|
|
co_trans(ncomplength);
|
|
|
|
pl = length;
|
|
for (p = nonterms; p < maxnt; p++, pl++) {
|
|
if (pl->cnt == INFINITY) {
|
|
p->n_flags |= RECURSIVE;
|
|
}
|
|
setdefaults(p->n_rule);
|
|
}
|
|
free ((p_mem) length);
|
|
}
|
|
|
|
STATIC void
|
|
complength(p,le) register p_gram p; p_length le; {
|
|
/*
|
|
* Walk grammar rule p, computing minimum lengths
|
|
*/
|
|
register p_link l;
|
|
register p_term q;
|
|
t_length i;
|
|
t_length X;
|
|
int cnt = 0;
|
|
|
|
X.cnt = 0;
|
|
X.val = 0;
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case LITERAL :
|
|
case TERMINAL :
|
|
#ifdef NON_CORRECTING
|
|
if (g_getcont(p) == g_getcont(illegal_gram)) {
|
|
add(&X, INFINITY, 0);
|
|
break;
|
|
}
|
|
#endif
|
|
add(&X, 1, 0);
|
|
break;
|
|
case ALTERNATION :
|
|
|
|
X.cnt = INFINITY;
|
|
X.val = INFINITY;
|
|
while (g_gettype(p) != EORULE) {
|
|
cnt++;
|
|
l = g_getlink(p);
|
|
p++;
|
|
complength(l->l_rule,&i);
|
|
i.val += cnt;
|
|
if (l->l_flag & DEF) {
|
|
X = i;
|
|
break;
|
|
}
|
|
if (compare(&i, &X) < 0) {
|
|
X = i;
|
|
}
|
|
}
|
|
/* Fall through */
|
|
case EORULE :
|
|
le->cnt = X.cnt;
|
|
le->val = X.val;
|
|
return;
|
|
case TERM : {
|
|
register int rep;
|
|
|
|
q = g_getterm(p);
|
|
rep = r_getkind(q);
|
|
X.val += 1;
|
|
if ((q->t_flags&PERSISTENT) ||
|
|
rep==FIXED || rep==PLUS) {
|
|
complength(q->t_rule,&i);
|
|
add(&X, i.cnt, i.val);
|
|
if (rep == FIXED && r_getnum(q) > 0) {
|
|
for (rep = r_getnum(q) - 1;
|
|
rep > 0; rep--) {
|
|
add(&X, i.cnt, i.val);
|
|
}
|
|
}
|
|
}
|
|
break; }
|
|
case NONTERM : {
|
|
int nn = g_getcont(p);
|
|
register p_length pl = &length[nn];
|
|
int x = pl->cnt;
|
|
|
|
if (x == INFINITY) {
|
|
pl->cnt = -1;
|
|
complength(nonterms[nn].n_rule,pl);
|
|
x = pl->cnt;
|
|
}
|
|
else if (x == -1) x = INFINITY;
|
|
add(&X, x, pl->val);
|
|
X.val += 1;
|
|
}
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
STATIC void
|
|
add(a, c, v) register p_length a; {
|
|
|
|
if (a->cnt == INFINITY || c == INFINITY) {
|
|
a->cnt = INFINITY;
|
|
return;
|
|
}
|
|
a->val += v;
|
|
a->cnt += c;
|
|
}
|
|
|
|
STATIC int
|
|
compare(a, b) register p_length a, b; {
|
|
if (a->cnt != b->cnt) return a->cnt - b->cnt;
|
|
return a->val - b->val;
|
|
}
|
|
|
|
STATIC void
|
|
setdefaults(p) register p_gram p; {
|
|
for (;;) {
|
|
switch(g_gettype(p)) {
|
|
case EORULE:
|
|
return;
|
|
case TERM:
|
|
setdefaults(g_getterm(p)->t_rule);
|
|
break;
|
|
case ALTERNATION: {
|
|
register p_link l, l1;
|
|
int temp = 0, temp1, cnt = 0;
|
|
t_length count, i;
|
|
|
|
count.cnt = INFINITY;
|
|
count.val = INFINITY;
|
|
l1 = g_getlink(p);
|
|
do {
|
|
cnt++;
|
|
l = g_getlink(p);
|
|
p++;
|
|
complength(l->l_rule,&i);
|
|
i.val += cnt;
|
|
if (l->l_flag & DEF) temp = 1;
|
|
temp1 = compare(&i, &count);
|
|
if (temp1 < 0 ||
|
|
(temp1 == 0 && l1->l_flag & AVOIDING)) {
|
|
l1 = l;
|
|
count = i;
|
|
}
|
|
setdefaults(l->l_rule);
|
|
} while (g_gettype(p) != EORULE);
|
|
if (!temp) {
|
|
/* No user specified default */
|
|
l1->l_flag |= DEF;
|
|
}
|
|
return; }
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
STATIC
|
|
do_contains(n) register p_nont n; {
|
|
/*
|
|
* Compute the total set of symbols that nonterminal n can
|
|
* produce
|
|
*/
|
|
|
|
if (n->n_contains == 0) {
|
|
n->n_contains = get_set();
|
|
contains(n->n_rule,n->n_contains);
|
|
/*
|
|
* If the rule can produce empty, delete all symbols that
|
|
* can follow the rule as well as be in the rule.
|
|
* This is needed because the contains-set may only contain
|
|
* symbols that are guaranteed to be eaten by the rule!
|
|
* Otherwise, the generated parser may loop forever
|
|
*/
|
|
if (n->n_flags & EMPTY) {
|
|
setminus(n->n_contains,n->n_follow);
|
|
}
|
|
/*
|
|
* But the symbols that can start the rule are always
|
|
* eaten
|
|
*/
|
|
setunion(n->n_contains,n->n_first);
|
|
}
|
|
}
|
|
|
|
STATIC void
|
|
contains(p,set) register p_gram p; register p_set set; {
|
|
/*
|
|
* Does the real computation of the contains-sets
|
|
*/
|
|
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case EORULE :
|
|
return;
|
|
case TERM : {
|
|
register p_term q;
|
|
int rep;
|
|
|
|
q = g_getterm(p);
|
|
rep = r_getkind(q);
|
|
if ((q->t_flags & PERSISTENT) ||
|
|
rep == PLUS || rep == FIXED) {
|
|
/*
|
|
* In these cases, the term belongs to the
|
|
* continuation grammar.
|
|
* Otherwise, q->t_contains is just
|
|
* q->t_first
|
|
*/
|
|
if (!q->t_contains) {
|
|
q->t_contains = get_set();
|
|
}
|
|
contains(q->t_rule,q->t_contains);
|
|
if (rep != FIXED || empty(q->t_rule)) {
|
|
setminus(q->t_contains,q->t_follow);
|
|
}
|
|
setunion(q->t_contains,q->t_first);
|
|
} else {
|
|
contains(q->t_rule, (p_set) 0);
|
|
q->t_contains = q->t_first;
|
|
}
|
|
if (set) setunion(set,q->t_contains);
|
|
break; }
|
|
case NONTERM : {
|
|
register p_nont n;
|
|
|
|
n = &nonterms[g_getcont(p)];
|
|
do_contains(n);
|
|
if (set) {
|
|
setunion(set, n->n_contains);
|
|
if (ntneeded) NTPUTIN(set, g_getcont(p));
|
|
}
|
|
break; }
|
|
case ALTERNATION : {
|
|
register p_link l;
|
|
|
|
l = g_getlink(p);
|
|
contains(l->l_rule,
|
|
(l->l_flag & DEF) ? set : (p_set) 0);
|
|
break; }
|
|
case LITERAL :
|
|
case TERMINAL : {
|
|
register hulp;
|
|
|
|
if (set) {
|
|
hulp = g_getcont(p);
|
|
assert(hulp < ntokens);
|
|
PUTIN(set,hulp);
|
|
}}
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
STATIC int nsafes(p) register p_nont p; {
|
|
int ch;
|
|
register int i;
|
|
|
|
ch = 0;
|
|
i = getntsafe(p);
|
|
if (i != NOSAFETY) {
|
|
i = do_safes(p->n_rule, i, &ch);
|
|
if (i < SCANDONE) i = SCANDONE;
|
|
/* After a nonterminal, we only know whether a scan was done
|
|
or not
|
|
*/
|
|
if (getntout(p) != i) {
|
|
ch = 1;
|
|
setntout(p,i);
|
|
}
|
|
}
|
|
return ch;
|
|
}
|
|
|
|
STATIC int
|
|
do_safes(p,safe,ch) register p_gram p; register int *ch; {
|
|
/*
|
|
* Walk the grammar rule, doing the computation described in the
|
|
* comment of the procedure above this one.
|
|
*/
|
|
int retval;
|
|
|
|
for (;;) {
|
|
switch (g_gettype(p)) {
|
|
case ACTION:
|
|
p++;
|
|
continue;
|
|
case LITERAL:
|
|
case TERMINAL:
|
|
safe = NOSCANDONE;
|
|
break;
|
|
case TERM : {
|
|
register p_term q;
|
|
int i,rep;
|
|
|
|
q = g_getterm(p);
|
|
i = r_getnum(q);
|
|
rep = r_getkind(q);
|
|
retval = do_safes(q->t_rule,
|
|
t_safety(rep,i,q->t_flags&PERSISTENT,safe),ch);
|
|
settout(q, retval);
|
|
safe = t_after(rep, i, retval);
|
|
break; }
|
|
case ALTERNATION : {
|
|
register p_link l;
|
|
register int i;
|
|
|
|
retval = -1;
|
|
while (g_gettype(p) == ALTERNATION) {
|
|
l = g_getlink(p);
|
|
p++;
|
|
if (safe > SAFE && (l->l_flag & DEF)) {
|
|
i = do_safes(l->l_rule,SAFESCANDONE,ch);
|
|
}
|
|
else i = do_safes(l->l_rule,SAFE,ch);
|
|
if (retval == -1) retval = i;
|
|
else if (i != retval) {
|
|
if (i == NOSCANDONE ||
|
|
retval == NOSCANDONE) {
|
|
retval = SCANDONE;
|
|
}
|
|
else if (i > retval) retval = i;
|
|
}
|
|
}
|
|
return retval; }
|
|
case NONTERM : {
|
|
register p_nont n;
|
|
register int nsafe, osafe;
|
|
|
|
n = &nonterms[g_getcont(p)];
|
|
nsafe = getntsafe(n);
|
|
osafe = safe;
|
|
safe = getntout(n);
|
|
if (safe == NOSAFETY) safe = SCANDONE;
|
|
if (osafe == nsafe) break;
|
|
if (nsafe == NOSAFETY) {
|
|
*ch = 1;
|
|
setntsafe(n, osafe);
|
|
break;
|
|
}
|
|
if (osafe == NOSCANDONE || nsafe == NOSCANDONE) {
|
|
if (nsafe != SCANDONE) {
|
|
*ch = 1;
|
|
setntsafe(n, SCANDONE);
|
|
}
|
|
break;
|
|
}
|
|
if (osafe > nsafe) {
|
|
setntsafe(n, osafe);
|
|
*ch = 1;
|
|
}
|
|
break; }
|
|
case EORULE :
|
|
return safe;
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
t_safety(rep, count, persistent, safety) {
|
|
|
|
if (safety == NOSCANDONE) safety = SCANDONE;
|
|
switch(rep) {
|
|
default:
|
|
assert(0);
|
|
case OPT:
|
|
if (!persistent || safety < SAFESCANDONE) return SAFE;
|
|
return SAFESCANDONE;
|
|
case STAR:
|
|
if (persistent) return SAFESCANDONE;
|
|
return SAFE;
|
|
case PLUS:
|
|
if (persistent) {
|
|
if (safety > SAFESCANDONE) return safety;
|
|
return SAFESCANDONE;
|
|
}
|
|
return safety;
|
|
case FIXED:
|
|
if (!count) return safety;
|
|
return SCANDONE;
|
|
}
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
t_after(rep, count, outsafety) {
|
|
if (count == 0 && (rep == STAR || rep == PLUS)) {
|
|
return SAFESCANDONE;
|
|
}
|
|
if (rep != FIXED) {
|
|
if (outsafety <= SAFESCANDONE) return SAFESCANDONE;
|
|
return SCANDONE;
|
|
}
|
|
return outsafety;
|
|
}
|