1997-02-21 11:27:57 +00:00
|
|
|
/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
|
|
|
|
* All rights reserved.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef NON_CORRECTING
|
|
|
|
|
|
|
|
/*
|
|
|
|
* L L G E N
|
|
|
|
*
|
|
|
|
* An Extended LL(1) Parser Generator
|
|
|
|
*
|
|
|
|
* Author : Ceriel J.H. Jacobs
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* savegram.c
|
|
|
|
* Save the input grammar for non-correcting error recovery
|
|
|
|
*
|
|
|
|
* Grammar rules are `flattened' by introducing anonymous nonterminals.
|
|
|
|
* [B]? becomes X; X: B | {empty}
|
|
|
|
* [B]+ becomes X: B Y; Y: X | {empty}
|
|
|
|
* [B]* becomes X; X: B X | {empty}
|
|
|
|
* [B | C] becomes X; X: B | C
|
|
|
|
* [B | C]* becomes X; X: B X | C X | {empty} etc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2013-05-12 19:45:55 +00:00
|
|
|
#include <stdlib.h>
|
2019-02-18 16:39:05 +00:00
|
|
|
#include <stdio.h>
|
1997-02-21 11:27:57 +00:00
|
|
|
# include "types.h"
|
|
|
|
# include "extern.h"
|
|
|
|
# include "io.h"
|
|
|
|
# include "assert.h"
|
|
|
|
# include "sets.h"
|
|
|
|
|
|
|
|
#define LLALT 9999
|
|
|
|
|
|
|
|
static int nt_highest;
|
|
|
|
extern int nbytes;
|
|
|
|
extern p_mem alloc();
|
|
|
|
extern p_set start_firsts;
|
|
|
|
extern p_set setalloc();
|
|
|
|
extern p_gram search();
|
|
|
|
|
2016-11-10 21:04:18 +00:00
|
|
|
STATIC void save_rule();
|
|
|
|
STATIC void save_set();
|
1997-02-21 11:27:57 +00:00
|
|
|
|
|
|
|
/* t_list will contain terms to be `flattened' */
|
|
|
|
static struct t_list {
|
|
|
|
p_term term;
|
|
|
|
int t_nt_num;
|
|
|
|
} *t_list;
|
|
|
|
|
|
|
|
/* Subparse list will contain symbols in %substart */
|
|
|
|
static struct subparse_list {
|
|
|
|
p_gram sub_action;
|
|
|
|
int sub_nt_num;
|
|
|
|
} *sub_list;
|
|
|
|
|
|
|
|
/* Index in t_list */
|
|
|
|
static int t_list_index;
|
|
|
|
|
1997-06-16 13:07:37 +00:00
|
|
|
/* Index in subparse_list */
|
1997-02-21 11:27:57 +00:00
|
|
|
static int sub_list_index;
|
|
|
|
|
|
|
|
/* File to save grammar to */
|
|
|
|
static FILE *fgram;
|
|
|
|
|
|
|
|
/* Nonterminal number to simulate parsers that get called in actions
|
|
|
|
used when LLgen called with -n -s options */
|
|
|
|
int act_nt;
|
|
|
|
|
2019-02-18 16:39:05 +00:00
|
|
|
void save_grammar(FILE *f)
|
|
|
|
{
|
1997-02-21 11:27:57 +00:00
|
|
|
/*
|
|
|
|
* Save the grammar
|
|
|
|
*/
|
|
|
|
register p_nont p;
|
|
|
|
register p_start st;
|
|
|
|
register int nt_nr;
|
|
|
|
|
|
|
|
fgram = f;
|
|
|
|
|
|
|
|
/* Compute highest nonterminal nr. */
|
|
|
|
nt_highest = nnonterms + assval - 1;
|
|
|
|
|
|
|
|
|
|
|
|
/* Generate some constants in the grammar file */
|
|
|
|
|
|
|
|
/* Allocate terms list */
|
LLgen: use size_t, reduce warnings, other small changes
Use C89 size_t for sizes from sizeof() or to malloc() or realloc().
Remove obsolete (unsigned) casts. Sizes were unsigned int in
traditional C but are size_t in C89.
Silence some clang warnings. Add the second pair of round brackets in
`while ((ff = ff->ff_next))` to silence -Wparentheses. Change
`if (nc_first(...))/*nothing*/;` to `(void)nc_first(...);` to silence
-Wempty-body. The code in compute.c nc_first() had the form
`if (x) if (y) s; else t;`. The old indentation (before 10717cc)
suggests that the "else" belongs to the 2nd "if", so add braces like
`if (x) { if (y) s; else t; }` to silence -Wdangling-else.
Shuffle extern function declarations. Add missing declaration for
LLparse(). Stop declaring RENAME(); it doesn't exist. Move some
declarations from main.c to extern.h, so the C compiler may check that
the declarations are compatible with the function definitions.
Assume that standard C89 remove() is available and doesn't need the
UNLINK() wrapper.
In lib/incl, don't need to include <stdio.h> nor <stdlib.h> to use
assert().
Remove alloc.h. If you don't clean your build, then an outdated
BUILDDIR/obj/util/LLgen/headers/alloc.h will survive but should not
cause harm, because nothing includes it. Don't need to remove alloc.h
from util/LLgen/distr.sh, because it isn't there.
Run the bootstrap to rebuild LLgen.c, Lpars.c, tokens.c.
2019-10-22 16:56:50 +00:00
|
|
|
t_list = (struct t_list *) alloc(nterms * sizeof(struct t_list));
|
1997-02-21 11:27:57 +00:00
|
|
|
t_list_index = 0;
|
|
|
|
|
|
|
|
sub_list = (struct subparse_list *) alloc(nsubstarts * sizeof(struct subparse_list));
|
|
|
|
|
|
|
|
fputs("static ", fgram);
|
|
|
|
fputs((prefix ? prefix : "LL"), fgram);
|
|
|
|
fputs("grammar[] = {\n", fgram);
|
|
|
|
|
|
|
|
/* Check if -n -s option is on */
|
|
|
|
if (subpars_sim) {
|
|
|
|
|
|
|
|
/* Allocate action simulation nt */
|
|
|
|
|
|
|
|
act_nt = ++nt_highest;
|
|
|
|
|
|
|
|
/* write simualtion rule */
|
|
|
|
fprintf(fgram, "/* Simulation rule */\n");
|
|
|
|
fprintf(fgram, "%d,\n", act_nt);
|
|
|
|
|
|
|
|
/* Put a firstset and a fake followset */
|
|
|
|
/* Followset optimization is not implemented for
|
|
|
|
-s because it would be hard, and does not
|
|
|
|
bring enough improvement to jutify the effort
|
|
|
|
*/
|
|
|
|
save_set(start_firsts);
|
|
|
|
save_set(start_firsts);
|
|
|
|
/* Simulation rule procudes empty */
|
|
|
|
fprintf(fgram, "%d,\n", 1);
|
|
|
|
for (st = start; st; st = st->ff_next)
|
|
|
|
{
|
|
|
|
fprintf(fgram, "%d, %d, %d, \n", st->ff_nont + assval,
|
|
|
|
act_nt, LLALT);
|
|
|
|
}
|
|
|
|
fprintf(fgram, "%d, \n", 0);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now process all rules */
|
|
|
|
for (p = nonterms, nt_nr = assval; p < maxnt; p++, nt_nr++) {
|
|
|
|
fprintf(fgram, "/* nr. %d %s */\n", nt_nr, p->n_name);
|
|
|
|
fprintf(fgram, "%d, ",nt_nr);
|
|
|
|
if (! p->n_rule) { /* undefined */
|
|
|
|
f_input = p->n_string;
|
|
|
|
error(p->n_lineno,"Nonterminal %s not defined",
|
|
|
|
p->n_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Save the first_set and follow set */
|
|
|
|
save_set(p->n_nc_first);
|
|
|
|
save_set(p->n_nc_follow);
|
|
|
|
|
|
|
|
if (p->n_flags & EMPTY)
|
|
|
|
fprintf(fgram, "%d,\n", 1);
|
|
|
|
else
|
|
|
|
fprintf(fgram, "%d,\n", 0);
|
|
|
|
|
|
|
|
save_rule(p->n_rule, 0);
|
|
|
|
|
|
|
|
fprintf(fgram, "%d,\n", 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Resolve terms, they are on t_list */
|
|
|
|
|
|
|
|
fprintf(fgram, "/* Fresh nonterminals */\n");
|
|
|
|
|
|
|
|
{ int i;
|
|
|
|
for (i = 0; i < t_list_index; i++)
|
|
|
|
{
|
|
|
|
|
|
|
|
/* Terms of the form [] without + ? * or number produce
|
|
|
|
a NIL pointer in the term-list */
|
|
|
|
if ((t_list + i)->term == (struct term *) 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(fgram, "%d, ", (t_list + i)->t_nt_num);
|
|
|
|
|
|
|
|
/* Save the first and follow sets */
|
|
|
|
|
|
|
|
save_set((t_list + i)->term->t_nc_first);
|
|
|
|
save_set((t_list + i)->term->t_nc_follow);
|
|
|
|
|
|
|
|
/* NOTE: A VARIABLE REPETITION COUNT TERMS RULE IS NOT
|
|
|
|
ALLOWED TO PRODUCE EMPTY IN LLGEN
|
|
|
|
*/
|
|
|
|
|
|
|
|
switch(r_getkind((t_list + i)->term)) {
|
|
|
|
case FIXED:
|
|
|
|
/* Already done by repeating new nonterminal */
|
|
|
|
|
|
|
|
/* FIXED term-rule may produce empty */
|
|
|
|
if (empty((t_list +i)->term->t_rule))
|
|
|
|
fprintf(fgram, "%d,\n", 1);
|
|
|
|
else
|
|
|
|
fprintf(fgram, "%d,\n", 0);
|
|
|
|
|
|
|
|
save_rule((t_list + i)->term->t_rule, 0);
|
|
|
|
fprintf(fgram, "%d,\n", 0);
|
|
|
|
break;
|
|
|
|
case STAR:
|
|
|
|
/* Save the rule, appending the new lhs for this rule */
|
|
|
|
|
|
|
|
/* Star rules always produce empty */
|
|
|
|
fprintf(fgram, "1,\n");
|
|
|
|
|
|
|
|
save_rule((t_list + i)->term->t_rule,
|
|
|
|
(t_list + i)->t_nt_num);
|
|
|
|
fprintf(fgram, "%d,\n%d,\n", LLALT, 0);
|
|
|
|
/* ALT EMPTY*/
|
|
|
|
break;
|
|
|
|
case PLUS:
|
|
|
|
/* Save the rule appending a fresh nonterminal */
|
|
|
|
|
|
|
|
fprintf(fgram, "%d,\n", 0);
|
|
|
|
|
|
|
|
save_rule((t_list + i)->term->t_rule, ++nt_highest);
|
|
|
|
fprintf(fgram, "%d,\n", 0); /* EOR */
|
|
|
|
fprintf(fgram, "%d, ", nt_highest);
|
|
|
|
/* First set of the extra nonterm is same as
|
|
|
|
for the term */
|
|
|
|
/* Except that the new nonterm also produces empty ! */
|
|
|
|
save_set((t_list + i)->term->t_nc_first);
|
|
|
|
save_set((t_list + i)->term->t_nc_follow);
|
|
|
|
fprintf(fgram, "1,\n");
|
|
|
|
fprintf(fgram, "%d, ", (t_list+i)->t_nt_num);
|
|
|
|
fprintf(fgram, "%d,\n%d,\n", LLALT, 0); /* ALT EMPTY */
|
|
|
|
break;
|
|
|
|
case OPT:
|
|
|
|
fprintf(fgram, "1,\n");
|
|
|
|
save_rule((t_list + i)->term->t_rule, 0);
|
|
|
|
fprintf(fgram, "%d,\n%d,\n", LLALT, 0); /* ALT EMPTY */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Resolve %substarts */
|
|
|
|
if (!subpars_sim) {
|
|
|
|
int i,s,check;
|
|
|
|
p_start ff, gg;
|
|
|
|
p_set temp_set;
|
|
|
|
|
|
|
|
for (i = 0; i < sub_list_index; i++) {
|
|
|
|
fprintf(fgram, "%d, ", (sub_list + i)->sub_nt_num);
|
|
|
|
/* Compute the first set */
|
|
|
|
temp_set = setalloc();
|
|
|
|
for (ff = g_getsubparse((sub_list + i)->sub_action);
|
|
|
|
ff; ff = ff->ff_next){
|
|
|
|
s = setunion(temp_set,
|
|
|
|
(&nonterms[ff->ff_nont])->n_first);
|
|
|
|
check = 0;
|
|
|
|
for (gg =start; gg; gg = gg->ff_next)
|
|
|
|
if (ff->ff_nont == gg->ff_nont)
|
|
|
|
check = 1;
|
|
|
|
if (check == 0)
|
|
|
|
warning((sub_list + i)->sub_action->g_lineno,
|
|
|
|
"\"%s\" is not a startsymbol",
|
|
|
|
(&nonterms[ff->ff_nont])->n_name);
|
|
|
|
}
|
|
|
|
save_set(temp_set);
|
|
|
|
save_set(temp_set);
|
|
|
|
free(temp_set);
|
|
|
|
|
|
|
|
/* Produces empty */
|
|
|
|
fprintf(fgram, "1,\n");
|
|
|
|
|
|
|
|
ff = g_getsubparse((sub_list + i)->sub_action);
|
|
|
|
|
|
|
|
for (; ff; ff = ff->ff_next)
|
|
|
|
fprintf(fgram, "%d, %d, %d, \n", ff->ff_nont + assval,
|
|
|
|
(sub_list + i)->sub_nt_num,
|
|
|
|
LLALT);
|
|
|
|
fprintf(fgram, "%d, \n", 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(fgram, "%d\n};\n", 0);
|
|
|
|
fprintf(fgram, "#define LLNNONTERMINALS %d\n", nt_highest - assval + 1);
|
|
|
|
}
|
|
|
|
|
2019-02-18 16:39:05 +00:00
|
|
|
STATIC void save_rule(register p_gram p, int tail)
|
|
|
|
{
|
1997-02-21 11:27:57 +00:00
|
|
|
/*
|
|
|
|
Walk through rule p, saving it. The non-terminal tail is
|
|
|
|
appended to the rule. It needs to be appended in this function
|
|
|
|
to process alt-rules correctly. Tail == 0 means don't append.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int in_alt;
|
|
|
|
int illegal_num;
|
|
|
|
/* Processing an alt needs some special care. When processing the
|
|
|
|
first alternative, we don't want to write the alt-code;
|
|
|
|
When appending something to the alt, it needs to be appended to
|
|
|
|
every alternative and not at the end of the rule.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Look up the ILLEGAL token number */
|
|
|
|
illegal_num = tokens[g_getcont(illegal_gram)].t_tokno;
|
|
|
|
|
|
|
|
in_alt = 0;
|
|
|
|
for (;;) {
|
|
|
|
switch(g_gettype(p)) {
|
|
|
|
case ALTERNATION :
|
|
|
|
if (in_alt)
|
|
|
|
fprintf(fgram, "%d,\n", LLALT);
|
|
|
|
else
|
|
|
|
in_alt = 1;
|
|
|
|
save_rule(g_getlink(p)->l_rule, tail);
|
|
|
|
break;
|
|
|
|
case TERM :
|
|
|
|
/* Make entry in term list */
|
|
|
|
(t_list + t_list_index)->term = g_getterm(p);
|
|
|
|
/* Test for [] without specifier */
|
|
|
|
if (g_getterm(p) == (struct term *) 0) {
|
|
|
|
t_list_index++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
(t_list + t_list_index++)->t_nt_num = ++nt_highest;
|
|
|
|
fprintf(fgram, "%d, ", nt_highest);
|
|
|
|
/* Check if repetition, if so handle here */
|
|
|
|
if (r_getkind(g_getterm(p)) == FIXED)
|
|
|
|
{
|
|
|
|
int k;
|
|
|
|
for (k = 1; k < r_getnum(g_getterm(p)); k++)
|
|
|
|
fprintf(fgram, "%d, ", nt_highest);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case NONTERM :
|
|
|
|
fprintf(fgram, "%d, ", g_getcont(p) + assval);
|
|
|
|
break;
|
|
|
|
case TERMINAL:
|
|
|
|
if (g_getcont(p) == g_getcont(illegal_gram)) {
|
|
|
|
/* %illegal. Ignore. */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (p->g_erroneous)
|
|
|
|
fprintf(fgram, "%d, ", illegal_num);
|
|
|
|
else
|
|
|
|
fprintf(fgram, "%d, ",
|
|
|
|
tokens[g_getcont(p)].t_tokno);
|
|
|
|
break;
|
|
|
|
case LITERAL:
|
|
|
|
if (p->g_erroneous)
|
|
|
|
fprintf(fgram, "%d, ", illegal_num);
|
|
|
|
else
|
|
|
|
fprintf(fgram, "%d, ",
|
|
|
|
tokens[g_getcont(p)].t_tokno);
|
|
|
|
break;
|
|
|
|
case ACTION:
|
|
|
|
if (subpars_sim) {
|
|
|
|
fprintf(fgram, "%d, ", act_nt);
|
|
|
|
}
|
|
|
|
else if (g_getsubparse(p)) {
|
|
|
|
/* Allocate nonterminal that will simulate
|
|
|
|
subparser
|
|
|
|
*/
|
|
|
|
(sub_list + sub_list_index)->sub_nt_num =
|
|
|
|
++nt_highest;
|
|
|
|
(sub_list + sub_list_index++)->sub_action = p;
|
|
|
|
|
|
|
|
fprintf(fgram, "%d, ", nt_highest);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case EORULE :
|
|
|
|
if ((! in_alt) && tail )
|
|
|
|
/* If this rule is not an alt, append tail now.
|
|
|
|
If it is an alt, the recursive call of this function
|
|
|
|
has appended tail to each alternative
|
|
|
|
*/
|
|
|
|
fprintf(fgram, "%d, ", tail);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-18 16:39:05 +00:00
|
|
|
STATIC void save_set(p_set p)
|
|
|
|
{
|
1997-02-21 11:27:57 +00:00
|
|
|
register int k;
|
|
|
|
register unsigned i;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
j = nbytes;
|
|
|
|
for (;;) {
|
|
|
|
i = (unsigned) *p++;
|
|
|
|
for (k = 0; k < sizeof(int); k++) {
|
|
|
|
fprintf(fgram,"0%o,",(int)(i & 0377));
|
|
|
|
i >>= 8;
|
|
|
|
if (--j == 0) {
|
|
|
|
fputs("\n",fgram);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* NOTREACHED */
|
|
|
|
}
|
|
|
|
#endif
|