ack/util/LLgen/src/LLgen.g

/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
 * For full copyright and restrictions on use see the file COPYING in the top
 * level of the LLgen tree.
 */

/*
 *  L L G E N
 *
 *  An Extended LL(1) Parser Generator
 *
 *  Author : Ceriel J.H. Jacobs
 */

/*
 * LLgen.g
 * Defines the grammar of LLgen.
 * Some routines that build the internal structure are also included
 */

{
# include <stdlib.h>
# include <string.h>
# include "types.h"
# include "io.h"
# include "extern.h"
# include "assert.h"
# include "cclass.h"

# ifndef NORCSID
static string	rcsid = "$Id$";
# endif
p_mem		alloc(), ralloc();
string		store();
p_gram		search();
long		ftell();

static int	acount;			/* count #of global actions */
static p_term t_list;
static int t_cnt;
static p_gram	alt_table;
static int	n_alts;
static int	max_alts;
#define ALTINCR 32

static p_gram	rule_table;
static int	n_rules;
static int	max_rules;
#define RULEINCR	32

/* Here are defined : */
STATIC		newnorder();
STATIC		newtorder();
STATIC		mkalt();
STATIC		mkterm();
STATIC p_gram	copyrule();
/* and of course LLparse() */

STATIC
newnorder(index) {
	static int porder;

	if (norder != -1) {
		nonterms[porder].n_next = index;
	}
	else	norder = index;
	porder = index;
	nonterms[porder].n_next = -1;
}

STATIC
newtorder(index) {
	static int porder;

	if (torder != -1) {
		tokens[porder].t_next = index;
	}
	else	torder = index;
	porder = index;
	tokens[porder].t_next = -1;
}

p_init()
{
	alt_table = (p_gram )alloc(ALTINCR*sizeof(t_gram));
	n_alts = 0;
	max_alts = ALTINCR;
	rule_table = (p_gram )alloc(RULEINCR*sizeof(t_gram));
	n_rules = 0;
	max_rules = RULEINCR;
}

}

%start	LLparse, spec;

spec	:		{	acount = 0; p_init(); }
	  [ %persistent def ]*
			{	/*
				 * Put an endmarker in temporary file
				 */
				putc('\0', fact);
				putc('\0', fact);
				free((p_mem) rule_table);
				free((p_mem) alt_table);
			}
	;

def			{	register string p; }
	: rule
	  /*
	   * A grammar rule
	   */
	| C_TOKEN listel [ ',' listel ]* ';'
	  /*
	   * A token declaration
	   */
	| C_START C_IDENT
			{	p = store(lextoken.t_string); }
	  ',' C_IDENT
	  /*
	   * A start symbol declaration
	   */
			{	/*
				 * Put the declaration in the list
				 * of start symbols
				 */
				register p_gram temp;
				register p_start ff;

				temp = search(NONTERM,lextoken.t_string,BOTH);
				ff = (p_start) alloc(sizeof(t_start));
				ff->ff_nont = g_getcont(temp);
				ff->ff_name = p;
				ff->ff_next = start;
				start = ff;
				while (ff = ff->ff_next) {
					if (! strcmp(p, ff->ff_name)) {
						error(linecount, "\"%s\" already used in a %%start", p);
						break;
					}
				}
			}
	  ';'
	| C_LEXICAL C_IDENT
	  /*
	   * Declaration of a name for the lexical analyser.
	   * May appear only once
	   */
			{	if (!lexical) {
					lexical = store(lextoken.t_string);
				}
				else	error(linecount,"Duplicate %%lexical");
			}
	  ';'
	| C_PREFIX C_IDENT
	  /*
	   * Prefix of external names (default: LL)
	   */
			{	if (!prefix) {
					prefix = store(lextoken.t_string);
					if (strlen(prefix) > 6) {
						error(linecount,
							"%%prefix too long");
						prefix[6] = 0;
					}
				}
				else	error(linecount,"Duplicate %%prefix");
			}
	  ';'
	| C_ONERROR C_IDENT
			{
#ifdef NON_CORRECTING
				if (non_corr) {
					warning(linecount, "%%onerror conflicts with -n option");
				}
				else
#endif
				  if (! onerror) {
					onerror = store(lextoken.t_string);
				}
				else	error(linecount,"Duplicate %%onerror");
			}
	  ';'
	| C_ACTION	{	acount++; }
	  /*
	   * A global C-declaration
	   */
	| firsts
	  /*
	   * declarations for macros
	   */
	;

listel	: C_IDENT	{	p_gram temp = search(TERMINAL,lextoken.t_string,ENTERING);
				newtorder(g_getcont(temp));
				tokens[g_getcont(temp)].t_lineno = linecount;
			}
	;

rule			{	register p_nont p;
				p_gram rr;
				register p_gram temp;
			}
	: /*
	   * grammar for a production rule
	   */
	  C_IDENT	{	temp = search(NONTERM,lextoken.t_string,BOTH);
				p = &nonterms[g_getcont(temp)];
				if (p->n_rule) {
					error(linecount,
"Nonterminal %s already defined", lextoken.t_string);
				}
				/*
				 * Remember the order in which the nonterminals
				 * were defined. Code must be generated in that
				 * order to keep track with the actions on the
				 * temporary file
				 */
				newnorder(p - nonterms);
				p->n_count = acount;
				acount = 0;
				p->n_lineno = linecount;
				p->n_off = ftell(fact);
			}
	  [ C_PARAMS	{	if (lextoken.t_num > 0) {
					p->n_flags |= PARAMS;
					if (lextoken.t_num > 15) {
						error(linecount,"Too many parameters");
					}
					else	setntparams(p,lextoken.t_num);
				}
			}
	  ]?
	  [ C_ACTION	{	p->n_flags |= LOCALS; }
	  ]?
	  ':' 		{ 	in_production = 1; }
	  productions(&rr) ';'
			{	in_production = 0; }
			/*
			 * Do not use p->n_rule now! The nonterms array
			 * might have been re-allocated.
			 */
			{	nonterms[g_getcont(temp)].n_rule = rr;}
	;

productions(p_gram *p;)
	/*
	 * One or more alternatives
	 */
	{	p_gram		prod;
		int		conflres = 0;
		int		t = 0;
		int		haddefault = 0;
		int		altcnt = 0;
		int		o_lc, n_lc;
	} :
			{	o_lc = linecount; }
	  simpleproduction(p,&conflres)
			{	if (conflres & DEF) haddefault = 1; }
	  [
	    [ '|'	{	n_lc = linecount; }
	      simpleproduction(&prod,&t)
			{	if (n_alts >= max_alts-2) {
					alt_table = (p_gram ) ralloc(
						(p_mem) alt_table,
						(unsigned)(max_alts+=ALTINCR)*sizeof(t_gram));
				}
				if (t & DEF) {
					if (haddefault) {
						error(n_lc,
		"More than one %%default in alternation");
					}
					haddefault = 1;
				}
				mkalt(*p,conflres,o_lc,&alt_table[n_alts++]);
				altcnt++;
				o_lc = n_lc;
				conflres = t;
				t = 0;
				*p = prod;
			}
	    ]+		{	if (conflres & (COND|PREFERING|AVOIDING)) {
					error(n_lc,
		"Resolver on last alternative not allowed");
				}
				mkalt(*p,conflres,n_lc,&alt_table[n_alts++]);
				altcnt++;
				g_settype((&alt_table[n_alts]),EORULE);
				*p = copyrule(&alt_table[n_alts-altcnt],altcnt+1);
			}
	  |
			{	if (conflres & (COND|PREFERING|AVOIDING)) {
					error(o_lc,
		"No alternation conflict resolver allowed here");
				}
				/*
				if (conflres & DEF) {
					error(o_lc,
		"No %%default allowed here");
				}
				*/
			}
	  ]
			{	n_alts -= altcnt; }
	;
{

STATIC
mkalt(prod,condition,lc,res) p_gram prod; register p_gram res; {
	/*
	 * Create an alternation and initialise it.
	 */
	register p_link		l;
	static p_link list;
	static int cnt;

	if (! cnt) {
		cnt = 50;
		list = (p_link) alloc(50 * sizeof(t_link));
	}
	cnt--;
	l = list++;
	l->l_rule = prod;
	l->l_flag = condition;
	g_setlink(res,l);
	g_settype(res,ALTERNATION);
	res->g_lineno = lc;
	nalts++;
}
}

simpleproduction(p_gram *p; register int *conflres;)
	{	t_gram		elem;
		int		elmcnt = 0;
		int		cnt, kind;
		int		termdeleted = 0;
	} :
	  [ C_DEFAULT	{	*conflres |= DEF; }
	  ]?
	  [
	    /*
	     * Optional conflict reslover
	     */
	      C_IF C_EXPR {	*conflres |= COND; }
	    | C_PREFER	{	*conflres |= PREFERING; }
	    | C_AVOID	{	*conflres |= AVOIDING; }
	  ]?
	  [ C_ILLEGAL	{
#ifdef NON_CORRECTING
				if (n_rules >= max_rules-2) {
					rule_table = (p_gram) ralloc(
						  (p_mem) rule_table,
						  (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
				}
				elmcnt++;
				rule_table[n_rules++] =
				    *search(TERMINAL, "LLILLEGAL", BOTH);
				if (*conflres & DEF) {
					error(linecount, "%%illegal not allowed in %%default rule");
				}
#endif
			}
	  ]?
	  [ %persistent elem(&elem)
			{	if (n_rules >= max_rules-2) {
					rule_table = (p_gram) ralloc(
						  (p_mem) rule_table,
						  (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
				}
				kind = FIXED;
				cnt = 0;
			}
	    [ repeats(&kind, &cnt)
			{	if (g_gettype(&elem) != TERM) {
					rule_table[n_rules] = elem;
					g_settype((&rule_table[n_rules+1]),EORULE);
					mkterm(copyrule(&rule_table[n_rules],2),
					       0,
					       elem.g_lineno,
					       &elem);
				}
			}
	    |
			{ if (g_gettype(&elem) == TERM) {
				register p_term q = g_getterm(&elem);

				if (! (q->t_flags & RESOLVER) &&
				    g_gettype(q->t_rule) != ALTERNATION &&
				    g_gettype(q->t_rule) != EORULE) {
				    while (g_gettype(q->t_rule) != EORULE) {
					rule_table[n_rules++] = *q->t_rule++;
					elmcnt++;
					if (n_rules >= max_rules-2) {
					    rule_table = (p_gram) ralloc(
						  (p_mem) rule_table,
						  (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
					}
				    }
				    elem = *--(q->t_rule);
				    n_rules--;
				    elmcnt--;
				    if (q == t_list - 1) {
					t_list--;
					nterms--;
					t_cnt++;
				    }
				    termdeleted = 1;
				}
			  }
			}
	    ]		{	if (!termdeleted && g_gettype(&elem) == TERM) {
					register p_term q;

					q = g_getterm(&elem);
					r_setkind(q,kind);
					r_setnum(q,cnt);
					if ((q->t_flags & RESOLVER) &&
					    (kind == PLUS || kind == FIXED)) {
						error(linecount,
		"%%while not allowed in this term");
					}
					/*
					 * A persistent fixed term is the same
					 * as a non-persistent fixed term.
					 * Should we complain?
					if ((q->t_flags & PERSISTENT) &&
					    kind == FIXED) {
						error(linecount,
							"Illegal %%persistent");
					}
					*/
				}
				termdeleted = 0;
				elmcnt++;
				rule_table[n_rules++] = elem;
			}
	  ]*		{	register p_term q;

				g_settype((&rule_table[n_rules]),EORULE);
				*p = 0;
				n_rules -= elmcnt;
				if (g_gettype(&rule_table[n_rules]) == TERM &&
				    elmcnt == 1) {
					q = g_getterm(&rule_table[n_rules]);
					if (r_getkind(q) == FIXED &&
					    r_getnum(q) == 0) {
						*p = q->t_rule;
					}
				}
				if (!*p) *p = copyrule(&rule_table[n_rules],
						elmcnt+1);
			}
	;
{

STATIC
mkterm(prod,flags,lc,result) p_gram prod; register p_gram result; {
	/*
	 * Create a term, initialise it and return
	 * a grammar element containing it
	 */
	register p_term		q;

	if (! t_cnt) {
		t_cnt = 50;
		t_list = (p_term) alloc(50 * sizeof(t_term));
	}
	t_cnt--;
	q = t_list++;
	q->t_rule = prod;
	q->t_contains = 0;
	q->t_flags = flags;
	g_settype(result,TERM);
	g_setterm(result,q);
	result->g_lineno = lc;
	nterms++;
}
}

elem (register p_gram pres;)
	{	register int	t = 0;
		p_gram		p1;
		int		ln;
		p_gram		pe;
#ifdef NON_CORRECTING
		int		erroneous = 0;
#endif
	} :
	  '['		{	ln = linecount; }
	  [ C_WHILE C_EXPR	{	t |= RESOLVER; }
	  ]?
	  [ C_PERSISTENT	{	t |= PERSISTENT; }
	  ]?
	  productions(&p1)
	  ']'		{
				mkterm(p1,t,ln,pres);
			}
	|
	  [ C_ERRONEOUS		{
#ifdef NON_CORRECTING
					erroneous = 1;
#endif
				}
	  ]?

	  [
	  C_IDENT	{	pe = search(UNKNOWN,lextoken.t_string,BOTH);
				*pres = *pe;
#ifdef NON_CORRECTING
				if (erroneous) {
					if (g_gettype(pres) != TERMINAL){
						warning(linecount,
							"Erroneous only allowed on terminal");
						erroneous = 0;
					}
					else
						pres->g_erroneous = 1;
				}
#endif

			}
	  [ C_PARAMS	{	if (lextoken.t_num > 15) {
					error(linecount,"Too many parameters");
				} else	g_setnpar(pres,lextoken.t_num);
				if (g_gettype(pres) == TERMINAL) {
					error(linecount,
						"Terminal with parameters");
				}
			}
	  ]?
	| C_LITERAL	{	pe = search(LITERAL,lextoken.t_string,BOTH);
				*pres = *pe;
#ifdef NON_CORRECTING
				if (erroneous)
					pres->g_erroneous = 1;
#endif
			}
	  ]
	|		{	g_settype(pres,ACTION);
				pres->g_lineno = linecount;
#ifdef NON_CORRECTING
				g_setsubparse(pres, (p_start) 0);
#endif
			}

	  [ C_SUBSTART

			{
#ifdef NON_CORRECTING
				nsubstarts++;
#endif
			}

	    C_IDENT
			{
#ifdef NON_CORRECTING
				register p_gram temp;
				register p_start subp;

				temp = search(NONTERM,lextoken.t_string,BOTH);
				subp = (p_start) alloc (sizeof(t_start));

				subp->ff_nont = g_getcont(temp);
				subp->ff_name = (string) 0;
				subp->ff_next = (p_start) 0;
				g_setsubparse(pres, subp);
#endif
			}

		[ ',' C_IDENT
			{
#ifdef NON_CORRECTING
				register p_gram temp;
				register p_start ff;

				temp = search(NONTERM,lextoken.t_string,BOTH);

				ff = g_getsubparse(pres);
				while (ff) {
					if (ff->ff_nont == g_getcont(temp)) {
						warning(linecount, "\"%s\" used twice in %%substart", lextoken.t_string);
						break;
					}
					ff = ff->ff_next;

				}

				ff = (p_start) alloc(sizeof(t_start));
				ff->ff_nont = g_getcont(temp);
				ff->ff_name = (string) 0;
				ff->ff_next = g_getsubparse(pres);
				g_setsubparse(pres, ff);
#endif
			}

	       ]* ';'
	   ]?

	  C_ACTION
	;

repeats(int *kind; int *cnt;)	{	int t1 = 0; } :
	  [
	    '?'		{	*kind = OPT; }
	  | [ '*'	{	*kind = STAR; }
	    | '+'	{	*kind = PLUS; }
	    ]
	    number(&t1)?
			{	if (t1 == 1) {
					t1 = 0;
					if (*kind == STAR) *kind = OPT;
					if (*kind == PLUS) *kind = FIXED;
				}
			}
	  | number(&t1)
	  ]		{	*cnt = t1; }
	;

number(int *t;)
	: C_NUMBER
			{	*t = lextoken.t_num;
				if (*t <= 0 || *t >= 8192) {
					error(linecount,"Illegal number");
				}
			}
	;

firsts	{	register string p; }
	: C_FIRST C_IDENT
			{	p = store(lextoken.t_string); }
	  ',' C_IDENT ';'
			{	/*
				 * Store this %first in the list belonging
				 * to this input file
				 */
				p_gram temp;
				register p_first ff;

				temp = search(NONTERM,lextoken.t_string,BOTH);
				ff = (p_first) alloc(sizeof(t_first));
				ff->ff_nont = g_getcont(temp);
				ff->ff_name = p;
				ff->ff_next = pfile->f_firsts;
				pfile->f_firsts = ff;
			}
	;
{

STATIC p_gram
copyrule(p,length) register p_gram p; {
	/*
	 * Returns a pointer to a grammar rule that was created in
	 * p. The space pointed to by p can now be reused
	 */
	register p_gram t;
	p_gram rule;

	t = (p_gram) alloc((unsigned) length * sizeof(t_gram));
	rule = t;
	while (length--) {
		*t++ = *p++;
	}
	return rule;
}
}