/*
 * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
 *
 *          This product is part of the Amsterdam Compiler Kit.
 *
 * Permission to use, sell, duplicate or disclose this software must be
 * obtained in writing. Requests for such permissions may be sent to
 *
 *      Dr. Andrew S. Tanenbaum
 *      Wiskundig Seminarium
 *      Vrije Universiteit
 *      Postbox 7161
 *      1007 MC Amsterdam
 *      The Netherlands
 *
 */

/*
 *  L L G E N
 *
 *  An Extended LL(1) Parser Generator
 *
 *  Author : Ceriel J.H. Jacobs
 */

/*
 * compute.c
 * Defines routines to compute FIRST, FOLLOW etc.
 * Also checks the continuation grammar from the specified grammar.
 */

# include "types.h"
# include "tunable.h"
# include "extern.h"
# include "sets.h"
# include "assert.h"
# ifndef NDEBUG
# include "io.h"
# endif

# ifndef NORCSID
static string rcsid2 = "$Header$";
# endif

p_set		setalloc();

/* Defined in this file : */
extern createsets();
STATIC walk();
extern co_empty();
extern empty();
extern co_first();
STATIC first();
extern co_follow();
STATIC follow();
extern co_symb();
STATIC co_dirsymb();
STATIC co_others();
STATIC do_checkdefault();
STATIC checkdefault();
extern co_contains();
STATIC do_contains();
STATIC contains();
extern co_safes();
STATIC int do_safes();

createsets() {
	/*
	 * Allocate space for the sets
	 */
	register p_nont p;

	for (p = nonterms; p < maxnt; p++) {
		p->n_first = setalloc(setsize);
		p->n_follow = setalloc(setsize);
		walk(p->n_rule);
	}
}

STATIC
walk(p) register p_gram p; {
	/*
	 * Walk through the grammar rule p, allocating sets
	 */

	for (;;) {
		switch (g_gettype(p)) {
		  case TERM : {
			register p_term q;

			q = (p_term) pentry[g_getcont(p)];
			q->t_first = setalloc(setsize);
			q->t_follow = setalloc(setsize);
			walk(q->t_rule);
			break; }
		  case ALTERNATION : {
			register p_link l;

			l = (p_link) pentry[g_getcont(p)];
			l->l_symbs = setalloc(setsize);
			walk(l->l_rule);
			break; }
		  case EORULE :
			return;
		}
		p++;
	}
}

co_empty() {
	/* 
	 * Which nonterminals produce the empty string ?
	 */
	register int	change;
	register p_nont	p;

	change = 1;
	while (change) {
		change = 0;
		for (p=nonterms; p < maxnt; p++) {
			if ((!(p->n_flags & EMPTY)) && empty(p->n_rule)) {
				p->n_flags |= EMPTY;
				change = 1;
			}
		}
	}
}

empty(p) register p_gram p; {
	/*
	 * Does the rule pointed to by p produce empty ?
	 */

	for (;;) {
		switch (g_gettype(p)) {
		  case EORULE : 
			return 1;
		  case TERM :  {
			register p_term q;

			q = (p_term) pentry[g_getcont(p)];
			if (r_getkind(&(q->t_reps)) == STAR
			    || r_getkind(&(q->t_reps)) == OPT
			    || empty(q->t_rule) ) break;
			return 0; }
		  case ALTERNATION :
			if (empty(((p_link)pentry[g_getcont(p)])->l_rule)) {
				return 1;
			}
			if (g_gettype(p+1) == EORULE) return 0;
			break;
		  case NONTERM :
			if (nonterms[g_getnont(p)].n_flags & EMPTY) {
				break;
			}
			/* Fall through */
		  case TERMINAL :
			return 0;
		}
		p++;
	}
}

co_first() {
	/*
	 * Compute the FIRST set for each nonterminal
	 */

	register p_nont	p;
	register int	change;

	change = 1;
	while (change) {
		change = 0;
		for (p = nonterms; p < maxnt; p++) {
			if (first(p->n_first,p->n_rule,0)) change = 1;
		}
	}
}

STATIC
first(setp,p,flag) p_set setp; register p_gram p; {
	/*
	 * Compute the FIRST set of rule p.
	 * If flag = 0, also the first sets for terms and alternations in
	 * the rule p are computed.
	 * The FIRST set is put in setp.
	 * return 1 if any of the sets changed
	 */
	register	s;	/* Will gather return value */
	int		noenter;/* when set, unables entering of elements into
			 	 * setp. This is necessary to walk through the
			 	 * rest of rule p.
				 */

	s = 0;
	noenter = 0;
	for (;;) {
		switch (g_gettype(p)) {
		  case EORULE :
			return s;
		  case TERM : {
			register p_term q;

			q = (p_term) pentry[g_getcont(p)];
			if (flag == 0) s |= first(q->t_first,q->t_rule,0);
			if (!noenter) s |= setunion(setp,q->t_first,setsize);
			p++;
			if (r_getkind(&(q->t_reps)) == STAR
			    || r_getkind(&(q->t_reps)) == OPT
			    || empty(q->t_rule) ) continue;
			break; }
		  case ALTERNATION : {
			register p_link l;

			l = (p_link) pentry[g_getcont(p)];
			if (flag == 0) s |= first(l->l_symbs,l->l_rule,0);
			if (noenter == 0) {
				s |= setunion(setp,l->l_symbs,setsize);
			}
			if (g_gettype(p+1) == EORULE) return s;
			}
			/* Fall Through */
		  case ACTION :
			p++;
			continue;
		  case TERMINAL :
			if ((noenter == 0) && !IN(setp,g_getcont(p))) {
				s = 1;
				PUTIN(setp,g_getcont(p));
			}
			p++;
			break;
		  case NONTERM : {
			register p_nont n;

			n = &nonterms[g_getnont(p)];
			if (noenter == 0)  {
				s |= setunion(setp,n->n_first,setsize);
				if (ntneeded && ! NTIN(setp,n-nonterms)) {
					s = 1;
					NTPUTIN(setp,n-nonterms);
				}
			}
			p++;
			if (n->n_flags & EMPTY) continue;
			break; }
		}
		if (flag == 0) {
			noenter = 1;
			continue;
		}
		return s;
	}
}

co_follow() {
	/*
	 * Compute the follow set for each nonterminal
	 */

	register p_nont	p;
	register	change;
	register	i;
	p_start		st;

	/*
	 * First put EOFILE in the follow set of the start symbols
	 */
	for (st = start; st; st = st->ff_next) PUTIN(st->ff_nont->n_follow,0);
	change = 1;
	i = 1;
	while (change) {
		change = 0;
		for (p = nonterms; p < maxnt; p++) {
			if (follow(p->n_follow,p->n_rule,i)) change = 1;
		}
		i = 0;
	}
}

STATIC
follow(setp,p,flag) p_set setp; register p_gram p; {
	/*
	 * setp is the follow set for the rule p.
	 * Compute the follow sets in the rule p from this set.
	 * Return 1 if any set changed
	 * flag controls the use of "first" in the computation.
	 * It should be 1 the first time a rule is done, 0 otherwise.
	 */
	register	s;	/* Will gather return value */

	s = 0;
	for (;;) {
		switch (g_gettype(p)) {
		  case EORULE :
			return s;
		  case TERM : {
			register p_term q;

			q = (p_term) pentry[g_getcont(p)];
			if (empty(p+1)) {
				/*
				 * If what follows the term can be empty,
				 * everything that can follow the whole
				 * rule can also follow the term
				 */
				s |= setunion(q->t_follow,setp,setsize);
			}
			/*
			 * Everything that can start the rest of the rule
			 * can follow the term
			 */
			if (flag) s |= first(q->t_follow,p+1,1);
			if (r_getkind(&(q->t_reps)) == STAR
			    || r_getkind(&(q->t_reps)) == PLUS
			    || r_getnum(&(q->t_reps)) ) {
				/*
				 * If the term involves a repetition
				 * of possibly more than one,
				 * everything that can start the term
				 * can also follow it.
				 */
				s |= follow(q->t_first,q->t_rule,flag);
			}
			/*
			 * Now propagate the set computed sofar
			 */
			s |= follow(q->t_follow, q->t_rule,flag);
			break; }
		  case ALTERNATION :
			/*
			 * Just propagate setp
			 */
			s |= follow(setp,((p_link)pentry[g_getcont(p)])->l_rule,
					flag);
			break;
		  case NONTERM : {
			register p_nont n;

			n = &nonterms[g_getnont(p)];
			if (flag) s |= first(n->n_follow,p+1,1);
			if (empty(p+1)) {
				/*
				 * If the rest of p can produce empty,
				 * everything that follows p can follow
				 * the nonterminal
				 */
				s |= setunion(n->n_follow,setp,setsize);
			}
			break; }
		}
		p++;
	}
}

co_symb() {
	/*
	 * Compute the sets which determine which alternative to choose
	 * in case of a choice
	 * Also check the continuation grammar and see if rules do scan
	 * ahead.
	 */
	register p_nont p;

	for (p = nonterms; p < maxnt; p++) {
		co_dirsymb(p->n_follow,p->n_rule);
	}
	for (p = nonterms; p < maxnt; p++) {
		do_checkdefault(p);
	}
}

STATIC
co_dirsymb(setp,p) p_set setp; register p_gram p; {
	/*
	 * Walk the rule p, doing the work for alternations
	 */
	register p_gram s = 0;

	for (;;) {
		switch (g_gettype(p)) {
		  case EORULE :
			return;
		  case TERM : {
			register p_term q;

			q = (p_term) pentry[g_getcont(p)];
			co_dirsymb(q->t_follow,q->t_rule);
			break; }
		  case ALTERNATION : {
			register p_link l;
			/*
			 * Save first alternative
			 */
			if (!s) s = p;
			l = (p_link) pentry[g_getcont(p)];
			l->l_others = setalloc(setsize);
			co_dirsymb(setp,l->l_rule);
			if (empty(l->l_rule)) {
				/*
				 * If the rule can produce empty, everything
				 * that follows it can also start it
				 */
				setunion(l->l_symbs,setp,setsize);
			}
			if (g_gettype(p+1) == EORULE) {
				/*
				 * Every alternation is implemented as a
				 * choice between two alternatives :
				 * this one or one of the following.
				 * The l_others set will contain the starters
				 * of the other alternatives
				 */
				co_others(s);
				return;
			} }
		}
		p++;
	}
}

STATIC
co_others(p) register p_gram p; {
	/*
	 * compute the l_others-sets for the list of alternatives
	 * indicated by p
	 */
	register p_link l1,l2;

	l1 = (p_link) pentry[g_getcont(p)];
	p++;
	l2 = (p_link) pentry[g_getcont(p)];
	setunion(l1->l_others,l2->l_symbs,setsize);
	if (g_gettype(p+1) != EORULE) {
		/*
		 * First compute l2->l_others
		 */
		co_others(p);
		/*
		 * and then l1->l_others
		 */
		setunion(l1->l_others,l2->l_others,setsize);
	}
}

STATIC
do_checkdefault(p) register p_nont p; {
	/*
	 * check the continuation rule for nonterminal p, unless
	 * this is already being(!) done
	 */
	if (p->n_flags & BUSY) {
		/*
		 * Error situation, recursion in continuation grammar
		 */
		p->n_flags ^= (RECURSIVE|BUSY);
		return; 
	}
	if (p->n_flags & CONTIN) {
		/*
		 * Was already done
		 */
		return;
	}
	/*
	 * Now mark it as busy, and check the rule
	 */
	p->n_flags |= BUSY;
	checkdefault(p->n_rule);
	/*
	 * Now release the busy mark, and mark it as done
	 */
	p->n_flags ^= (CONTIN|BUSY);
	return;
}

STATIC
checkdefault(p) register p_gram p; {
	/*
	 * Walk grammar rule p, checking the continuation grammar
	 */
	register p_link l;
	register p_term q;

	for (;;) {
		switch (g_gettype(p)) {
		  case EORULE :
			return;
		  case ALTERNATION :
			l = (p_link) pentry[g_getcont(p)];
			if (l->l_flag & DEF) {
				/*
				 * This alternative belongs to the
				 * continuation grammar, so check it
				 */
				checkdefault(l->l_rule);
				return;
			}
			break;
		  case TERM :
			q = (p_term) pentry[g_getcont(p)];
			/*
			 * First check the rest of the rule
			 */
			checkdefault(p+1);
			/*
			 * Now check the term if it belongs to the
			 * continuation grammar
			 */
			if (r_getkind(&(q->t_reps))==FIXED ||
			    r_getkind(&(q->t_reps))==PLUS) {
				checkdefault(q->t_rule);
				return;
			}
			/*
			 * Here we have OPT or STAR
			 * Only in the continuation grammar if %persistent
			 */
			if (q->t_flags & PERSISTENT) {
				checkdefault(q->t_rule);
			}
			return;
		  case NONTERM :
			/*
			 * Check the continuation grammar for this nonterminal.
			 * Note that the nonterminal we are working on is
			 * marked as busy, so that direct or indirect recursion
			 * can be detected
			 */
			do_checkdefault(&nonterms[g_getnont(p)]);
			break;
		}
		p++;
	}
}

co_contains() {
	/*
	 * Compute the contains sets
	 */
	register p_nont p;
	register p_set dummy;

	for (p = nonterms; p < maxnt; p++) do_contains(p);
	dummy = setalloc(setsize);
# ifndef NDEBUG
	if (debug) fputs("Contains 1 done\n", stderr);
# endif
	free(dummy);
	for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0);
# ifndef NDEBUG
	if (debug) fputs("Contains 2 done\n", stderr);
# endif
	dummy = setalloc(setsize);
	free(dummy);
}

STATIC
do_contains(n) register p_nont n; {
	/*
	 * Compute the total set of symbols that nonterminal n can
	 * produce 
	 */

	if (n->n_contains == 0) {
		n->n_contains = setalloc(setsize);
		contains(n->n_rule,n->n_contains);
		/*
		 * If the rule can produce empty, delete all symbols that
		 * can follow the rule as well as be in the rule.
		 * This is needed because the contains-set may only contain
		 * symbols that are guaranteed to be eaten by the rule!
		 * Otherwise, the generated parser may loop forever
		 */
		if (n->n_flags & EMPTY) {
			setminus(n->n_contains,n->n_follow,setsize);
		}
		/*
		 * But the symbols that can start the rule are always
		 * eaten
		 */
		setunion(n->n_contains,n->n_first,setsize);
	}
}

STATIC
contains(p,set) register p_gram p; register p_set set; {
	/*
	 * Does the real computation of the contains-sets
	 */

	for (;;) {
		switch (g_gettype(p)) {
		  case EORULE :
			return;
		  case TERM : {
			register p_term q;

			q = (p_term) pentry[g_getcont(p)];
			if ((q->t_flags & PERSISTENT) ||
			    r_getkind(&(q->t_reps)) == PLUS ||
			    r_getkind(&(q->t_reps)) == FIXED) {
				/*
				 * In these cases, the term belongs to the
				 * continuation grammar.
				 * Otherwise, q->t_contains is just
				 * q->t_first
				 */
				if (!q->t_contains) {
				    q->t_contains = setalloc(setsize);
				}
				contains(q->t_rule,q->t_contains);
				if (empty(q->t_rule)) {
					/*
					 * Same trouble as mentioned in the
					 * routine do_contains
					 */
					setminus(q->t_contains,q->t_follow,
						setsize);
				}
				setunion(q->t_contains,q->t_first,setsize);
			} else {
				contains(q->t_rule, (p_set) 0);
				q->t_contains = q->t_first;
			}
			if (set) setunion(set,q->t_contains,setsize);
			break; }
		  case NONTERM : {
			register p_nont n;

			n = &nonterms[g_getnont(p)];
			do_contains(n);
			if(set) setunion(set, n->n_contains,setsize);
			break; }
		  case ALTERNATION : {
			register p_link l;

			l = (p_link) pentry[g_getcont(p)];
			contains(l->l_rule,
				(l->l_flag & DEF) ? set : (p_set) 0);
			break; }
		  case TERMINAL : {
			register hulp;

			if (set) {
				hulp = g_getcont(p);
				assert(hulp < nterminals);
				PUTIN(set,hulp);
			}}
		}
		p++;
	}
}

static int change;

co_safes() {
	/*
	 * Compute the safety of each nonterminal and term.
	 * The safety gives an answer to the question whether a scan is done,
	 * and how it should be handled.
	 */

	register p_nont	p;
	register	i;
	register p_start st;

	for (p = nonterms; p < maxnt; p++) {
		/*
		 * Don't know anything yet
		 */
		setntsafe(p, NOSAFETY);
		setntout(p, NOSAFETY);
	}
	for (st = start; st; st = st->ff_next) {
		/*
		 * But start symbols are called with lookahead done
		 */
		p = st->ff_nont;
		setntsafe(p,SCANDONE);
	}
	change = 1;
	while (change) {
		change = 0;
		for (p = nonterms; p < maxnt; p++) {
			i = getntsafe(p);
			if (i == NOSAFETY) {
				continue;
			}
			i = do_safes(p->n_rule, i);
			if (getntout(p) != i) {
				change = 1;
				setntout(p, i);
			}
		}
	}
# ifndef NDEBUG
	if (debug) {
		fputs("Safeties:\n", stderr);
		for (p = nonterms; p < maxnt; p++) {
			fprintf(stderr, "%s\t%d\t%d\n",
				(min_nt_ent + (p - nonterms))->h_name,
				getntsafe(p),
				getntout(p));
		}
	}
# endif
}

STATIC int
do_safes(p,safe) register p_gram p; {
	/*
	 * Walk the grammar rule, doing the computation described in the
	 * comment of the procedure above this one.
	 */
	register	retval;

	for (;;) {
		switch (g_gettype(p)) {
		  case ACTION:
		  	p++;
			continue;
		  case TERMINAL:
			safe = NOSCANDONE;
			break;
		  case TERM : {
			register p_term q;
			int i,rep;

			q = (p_term) pentry[g_getcont(p)];
			i = r_getnum(&(q->t_reps));
			rep = r_getkind(&(q->t_reps));
			retval = do_safes(q->t_rule,
			       t_safety(rep,i,q->t_flags&PERSISTENT,safe));
			if (retval != gettout(q)) {
				settout(q, retval);
			}
			safe = t_after(rep, i, gettout(q));
			break; }
		  case ALTERNATION : {
		  	register p_link l;
			int f, i;

			f = 1;
			while (g_gettype(p) == ALTERNATION) {
				l = (p_link) pentry[g_getcont(p)];
				if (safe > SAFE && (l->l_flag & DEF)) {
					i = do_safes(l->l_rule,SAFESCANDONE);
				}
				else	i = do_safes(l->l_rule,SAFE);
				if (f) retval = i;
				else if (i != retval) {
					if (i == NOSCANDONE ||
					    retval == NOSCANDONE) {
						retval = SCANDONE;
					}
					else if (i > retval) retval = i;
				}
				p++;
				f = 0;
			}
			return retval; }
		  case NONTERM : {
			register p_nont n;
			int nsafe, osafe;

			n = &nonterms[g_getnont(p)];
			nsafe = getntsafe(n);
			osafe = safe;
			safe = getntout(n);
			if (safe == NOSAFETY) safe = SCANDONE;
			if (osafe == nsafe) break;
			if (nsafe == NOSAFETY) {
				change = 1;
				setntsafe(n, osafe);
				break;
			}
			if (osafe == NOSCANDONE || nsafe == NOSCANDONE) {
				if (nsafe != SCANDONE) {
					change = 1;
					setntsafe(n, SCANDONE);
				}
				break;
			}
			if (osafe > nsafe) {
				setntsafe(n, osafe);
				change = 1;
			}
			break; }
		  case EORULE :
			return safe;
		}
		p++;
	}
}

t_safety(rep, count, persistent, safety) {

	switch(rep) {
	  default:
		assert(0);
	  case OPT:
		if (!persistent) return SAFE;
		if (safety < SAFESCANDONE) return safety;
		return SAFESCANDONE;
	  case STAR:
		if (persistent) return SAFESCANDONE;
		return SAFE;
	  case PLUS:
		if (safety == NOSCANDONE) safety = SCANDONE;
		if (persistent) {
			if (safety > SAFESCANDONE) return safety;
			return SAFESCANDONE;
		}
		if (safety > SAFE) return safety;
		return SAFE;
	  case FIXED:
		if (!count) {
			if (safety == NOSCANDONE) safety = SCANDONE;
			return safety;
		}
		return SCANDONE;
	}
	/* NOTREACHED */
}

t_after(rep, count, outsafety) {
	if (count == 0 && (rep == STAR || rep == PLUS)) {
		return SAFESCANDONE;
	}
	if (rep != FIXED) {
		if (outsafety <= SAFESCANDONE) return SAFESCANDONE;
		return SCANDONE;
	}
	return outsafety;
}