Added non-correcting error recovery stuff

This commit is contained in:
ceriel 1997-02-21 11:27:57 +00:00
parent a44875cf00
commit c2607fdf0f
21 changed files with 3178 additions and 262 deletions

View file

@ -21,6 +21,10 @@ extern unsigned int LLscnt[];
extern unsigned int LLtcnt[]; extern unsigned int LLtcnt[];
extern int LLcsymb; extern int LLcsymb;
#if LL_NON_CORR
extern int LLstartsymb;
#endif
#define LLsdecr(d) {LL_assert(LLscnt[d] > 0); LLscnt[d]--;} #define LLsdecr(d) {LL_assert(LLscnt[d] > 0); LLscnt[d]--;}
#define LLtdecr(d) {LL_assert(LLtcnt[d] > 0); LLtcnt[d]--;} #define LLtdecr(d) {LL_assert(LLtcnt[d] > 0); LLtcnt[d]--;}
#define LLsincr(d) LLscnt[d]++ #define LLsincr(d) LLscnt[d]++

70
util/LLgen/lib/nc_incl Normal file
View file

@ -0,0 +1,70 @@
#define LLALT 9999 /* Alternative is following */
#define LLTERMINAL 1 /* Symbol is a terminal */
#define LLNONTERMINAL 2 /* Symbol is a nonterminal */
#define LLEORULE 0 /* No more alternatives */
struct lhs { /* LHS of a rule */
int nr; /* Nr of the nonterminal */
struct symbol *rhs; /* Pointer to RHS */
char first[LLSETSIZE]; /* First set */
char follow[LLSETSIZE]; /* Follow set */
char empty; /* Set if nonterminal produces empty */
};
struct symbol { /* Symbol in the RHS of a rule */
int x; /* LLTERMINAL or LLNONTERMINAL */
int nr; /* Nr of the symbol */
struct symbol *link; /* Ptr to next rule with this symbol */
struct symbol *next; /* Ptr to next symbol in this rule */
struct lhs *lhs; /* Ptr to LHS */
};
struct terminal { /* Array with links to terminals in a */
struct symbol *link; /* rule */
};
struct nonterminal { /* Array with links to nt's in a rule */
struct symbol *link; /* and pointer to LHS's */
struct lhs *rule;
};
struct stack_elt { /* Stack element */
int flags; /* Some flags */
int nr; /* Nr of symbol */
int ref_count; /* Nr of predecessors */
int hyp_ref_count; /* Temporary nr of predecessors */
int matched; /* Nr of LHS trying to match */
int nr_nexts; /* Nr of successors */
struct edge *edges; /* Array of edges to other stack elt's*/
};
/* Possible flags in a stack element */
#define LLHEAD 1 /* Stack element is a head */
#define LLDUMMY 2 /* Stack element is substituted */
#define LLGEN_SEARCH 8 /* Set by 'generate_heads()' */
struct edge { /* Edges of a stack element */
char flags; /* Some flags */
struct stack_elt *ptr; /* Array with pointers to stack elt's */
};
/* Possible flags in an edge */
#define LLLOOP 1 /* Belongs to a loop */
#define LLLOOP_SEARCH 2 /* Used by 'loop()' */
#define LLHYP_SEARCH 4 /* Used by 'hyp_run()' */
#define PRINT_SEARCH 8 /* DEBUG */
#define LLMARK_SEARCH 16 /* Used by 'mark_loop()' */
#define LLYES 32
#define LLNO 64
#define LLEOSTACK -1 /* Indicates last element of a stack */
#define LLHEADS_BUF_INCR 10 /* Nr of elements the buffer will be */
#define LLCLEANUP_BUF_INCR 25 /* increased by */
#define LL_VIS_INCR 200
/* Macro's to manipulate bit sets */
#define LLIN(a, i) ((a)[(i)/8] & (1 << ((i) % 8)))
#define LLPUTIN(a, i) ((a)[(i)/8] |= (1 << ((i) % 8)))

1791
util/LLgen/lib/nc_rec Normal file

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,11 @@ unsigned int LLscnt[LL_NSETS];
int LLcsymb, LLsymb; int LLcsymb, LLsymb;
static int LLlevel; static int LLlevel;
#if LL_NON_CORR
int LLstartsymb;
static int fake_eof = 0;
#endif
#if LL_ANSI_C #if LL_ANSI_C
#define LL_VOIDCST (void) #define LL_VOIDCST (void)
void LLmessage(int); void LLmessage(int);
@ -39,7 +44,38 @@ LLscan(t)
/* /*
* Check if the next symbol is equal to the parameter * Check if the next symbol is equal to the parameter
*/ */
#if LL_NON_CORR
/* See if the error recovery has eaten an eof */
if (fake_eof) {
LLsymb = EOFILE;
fake_eof = 0;
}
else {
LLsymb = LL_LEXI();
}
if (LLsymb == t) {
#else
if ((LLsymb = LL_LEXI()) == t) { if ((LLsymb = LL_LEXI()) == t) {
#endif
#if LL_NON_CORR
/* Check if a previous parser has 'crashed', in that
* case continue with non-correcting parser
*/
if (err_seen && !nc_done) {
LLnc_recover();
nc_done = 1;
/* Remember that the error recovery has eaten an eof */
fake_eof = 1;
if (t != LLsymb) {
LLerror(t);
}
else
return;
}
#endif
return; return;
} }
/* /*
@ -54,6 +90,31 @@ void LLread(void) {
#else #else
LLread() { LLread() {
#endif #endif
#if LL_NON_CORR
/* Again, check if another parser has crashed,
* in that case intercept and go to the
* non-correcting parser
*/
if (err_seen && !nc_done) {
LLnc_recover();
nc_done = 1;
/* Pretend we read end of file */
LLsymb = EOFILE;
LLcsymb = LLindex[EOFILE];
fake_eof = 0;
return;
}
if (fake_eof) {
LLsymb = EOFILE;
LLcsymb = LLindex[EOFILE];
fake_eof = 0;
return;
}
#endif
for (;;) { for (;;) {
if ((LLcsymb = LLindex[(LLsymb = LL_LEXI())]) >= 0) return; if ((LLcsymb = LLindex[(LLsymb = LL_LEXI())]) >= 0) return;
LLmessage(0); LLmessage(0);
@ -85,6 +146,16 @@ LLerror(t)
return; return;
} }
#endif #endif
#if LL_NON_CORR
if ((!nc_done) && (LLsymb > 0) && (LLsymb != EOFILE)) {
LLmessage(0);
LLnc_recover();
nc_done = 1;
LLsymb = EOFILE;
}
#endif
if ((LLcsymb = LLindex[LLsymb]) < 0) { if ((LLcsymb = LLindex[LLsymb]) < 0) {
LLmessage(0); LLmessage(0);
LLread(); LLread();
@ -97,7 +168,25 @@ LLerror(t)
LL_VOIDCST LLskip(); LL_VOIDCST LLskip();
#endif #endif
LLtcnt[i]--; LLtcnt[i]--;
if (LLsymb != t) LLmessage(t); if (LLsymb != t) {
#if LL_NON_CORR
/* A little kludge here; when using non-correcting recovery
* it can happen that a program is correct but incomplete.
* Here, we test this, and make sure the appropriate
* message is generated
*/
if (! nc_done) {
int oldLLsymb;
oldLLsymb = LLsymb;
LLsymb = EOFILE;
LLmessage(0);
nc_done = 1;
/* Not really, but to prevent more than 1 error message */
LLsymb = oldLLsymb;
}
#endif
LLmessage(t);
}
} }
#if LL_ANSI_C #if LL_ANSI_C
@ -121,6 +210,14 @@ LLsafeerror(t)
} }
return; return;
} }
#endif
#if LL_NON_CORR
if ((!nc_done) && (LLsymb > 0) && (LLsymb != EOFILE)) {
LLmessage(0);
LLnc_recover();
nc_done = 1;
LLsymb = EOFILE;
}
#endif #endif
LLmessage(t); LLmessage(t);
} }
@ -265,7 +362,19 @@ static int LLdoskip(e)
continue; continue;
} }
#endif /* LL_USERHOOK */ #endif /* LL_USERHOOK */
#if LL_NON_CORR
if ((!nc_done) && (LLsymb > 0)) {
LLmessage(0);
LLnc_recover();
nc_done = 1;
fake_eof = 1;
}
else {
LLmessage(0);
}
#else
LLmessage(0); LLmessage(0);
#endif
retval = 1; retval = 1;
LLread(); LLread();
} }

View file

@ -32,9 +32,7 @@ string store();
p_gram search(); p_gram search();
long ftell(); long ftell();
static int nparams; /* parameter count for nonterminals */
static int acount; /* count #of global actions */ static int acount; /* count #of global actions */
static int order;
static p_term t_list; static p_term t_list;
static int t_cnt; static int t_cnt;
static p_gram alt_table; static p_gram alt_table;
@ -48,9 +46,8 @@ static int max_rules;
#define RULEINCR 32 #define RULEINCR 32
/* Here are defined : */ /* Here are defined : */
STATIC newnorder(); STATIC newnorder();
STATIC newtorder(); STATIC newtorder();
STATIC copyact();
STATIC mkalt(); STATIC mkalt();
STATIC mkterm(); STATIC mkterm();
STATIC p_gram copyrule(); STATIC p_gram copyrule();
@ -169,13 +166,20 @@ def { register string p; }
} }
';' ';'
| C_ONERROR C_IDENT | C_ONERROR C_IDENT
{ if (! onerror) { {
#ifdef NON_CORRECTING
if (non_corr) {
warning(linecount, "%%onerror conflicts with -n option");
}
else
#endif
if (! onerror) {
onerror = store(lextoken.t_string); onerror = store(lextoken.t_string);
} }
else error(linecount,"Duplicate %%onerror"); else error(linecount,"Duplicate %%onerror");
} }
';' ';'
| action(0) { acount++; } | C_ACTION { acount++; }
/* /*
* A global C-declaration * A global C-declaration
*/ */
@ -216,18 +220,20 @@ rule { register p_nont p;
p->n_lineno = linecount; p->n_lineno = linecount;
p->n_off = ftell(fact); p->n_off = ftell(fact);
} }
[ params(1) { if (nparams > 0) { [ C_PARAMS { if (lextoken.t_num > 0) {
p->n_flags |= PARAMS; p->n_flags |= PARAMS;
if (nparams > 15) { if (lextoken.t_num > 15) {
error(linecount,"Too many parameters"); error(linecount,"Too many parameters");
} }
else setntparams(p,nparams); else setntparams(p,lextoken.t_num);
} }
} }
]? ]?
[ action(0) { p->n_flags |= LOCALS; } [ C_ACTION { p->n_flags |= LOCALS; }
]? ]?
':' productions(&rr) ';' ':' { in_production = 1; }
productions(&rr) ';'
{ in_production = 0; }
/* /*
* Do not use p->n_rule now! The nonterms array * Do not use p->n_rule now! The nonterms array
* might have been re-allocated. * might have been re-allocated.
@ -235,15 +241,6 @@ rule { register p_nont p;
{ nonterms[g_getcont(temp)].n_rule = rr;} { nonterms[g_getcont(temp)].n_rule = rr;}
; ;
action(int n;)
/*
* The parameter n is non-zero when the opening and closing
* bracket must be copied along with the action
*/
: '{' { copyact('{','}',n,0); }
'}'
;
productions(p_gram *p;) productions(p_gram *p;)
/* /*
* One or more alternatives * One or more alternatives
@ -280,7 +277,7 @@ productions(p_gram *p;)
t = 0; t = 0;
*p = prod; *p = prod;
} }
]+ { if (conflres & ~DEF) { ]+ { if (conflres & (COND|PREFERING|AVOIDING)) {
error(n_lc, error(n_lc,
"Resolver on last alternative not allowed"); "Resolver on last alternative not allowed");
} }
@ -290,7 +287,7 @@ productions(p_gram *p;)
*p = copyrule(&alt_table[n_alts-altcnt],altcnt+1); *p = copyrule(&alt_table[n_alts-altcnt],altcnt+1);
} }
| |
{ if (conflres & ~DEF) { { if (conflres & (COND|PREFERING|AVOIDING)) {
error(o_lc, error(o_lc,
"No alternation conflict resolver allowed here"); "No alternation conflict resolver allowed here");
} }
@ -336,16 +333,32 @@ simpleproduction(p_gram *p; register int *conflres;)
int cnt, kind; int cnt, kind;
int termdeleted = 0; int termdeleted = 0;
} : } :
[ C_DEFAULT { *conflres = DEF; } [ C_DEFAULT { *conflres |= DEF; }
]? ]?
[ [
/* /*
* Optional conflict reslover * Optional conflict reslover
*/ */
C_IF expr { *conflres |= COND; } C_IF C_EXPR { *conflres |= COND; }
| C_PREFER { *conflres |= PREFERING; } | C_PREFER { *conflres |= PREFERING; }
| C_AVOID { *conflres |= AVOIDING; } | C_AVOID { *conflres |= AVOIDING; }
]? ]?
[ C_ILLEGAL {
#ifdef NON_CORRECTING
if (n_rules >= max_rules-2) {
rule_table = (p_gram) ralloc(
(p_mem) rule_table,
(unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
}
elmcnt++;
rule_table[n_rules++] =
*search(TERMINAL, "LLILLEGAL", BOTH);
if (*conflres & DEF) {
error(linecount, "%%illegal not allowed in %%default rule");
}
#endif
}
]?
[ %persistent elem(&elem) [ %persistent elem(&elem)
{ if (n_rules >= max_rules-2) { { if (n_rules >= max_rules-2) {
rule_table = (p_gram) ralloc( rule_table = (p_gram) ralloc(
@ -467,9 +480,12 @@ elem (register p_gram pres;)
p_gram p1; p_gram p1;
int ln; int ln;
p_gram pe; p_gram pe;
#ifdef NON_CORRECTING
int erroneous = 0;
#endif
} : } :
'[' { ln = linecount; } '[' { ln = linecount; }
[ C_WHILE expr { t |= RESOLVER; } [ C_WHILE C_EXPR { t |= RESOLVER; }
]? ]?
[ C_PERSISTENT { t |= PERSISTENT; } [ C_PERSISTENT { t |= PERSISTENT; }
]? ]?
@ -478,12 +494,32 @@ elem (register p_gram pres;)
mkterm(p1,t,ln,pres); mkterm(p1,t,ln,pres);
} }
| |
[ C_ERRONEOUS {
#ifdef NON_CORRECTING
erroneous = 1;
#endif
}
]?
[
C_IDENT { pe = search(UNKNOWN,lextoken.t_string,BOTH); C_IDENT { pe = search(UNKNOWN,lextoken.t_string,BOTH);
*pres = *pe; *pres = *pe;
#ifdef NON_CORRECTING
if (erroneous) {
if (g_gettype(pres) != TERMINAL){
warning(linecount,
"Erroneous only allowed on terminal");
erroneous = 0;
}
else
pres->g_erroneous = 1;
}
#endif
} }
[ params(0) { if (nparams > 15) { [ C_PARAMS { if (lextoken.t_num > 15) {
error(linecount,"Too many parameters"); error(linecount,"Too many parameters");
} else g_setnpar(pres,nparams); } else g_setnpar(pres,lextoken.t_num);
if (g_gettype(pres) == TERMINAL) { if (g_gettype(pres) == TERMINAL) {
error(linecount, error(linecount,
"Terminal with parameters"); "Terminal with parameters");
@ -492,27 +528,73 @@ elem (register p_gram pres;)
]? ]?
| C_LITERAL { pe = search(LITERAL,lextoken.t_string,BOTH); | C_LITERAL { pe = search(LITERAL,lextoken.t_string,BOTH);
*pres = *pe; *pres = *pe;
#ifdef NON_CORRECTING
if (erroneous)
pres->g_erroneous = 1;
#endif
} }
]
| { g_settype(pres,ACTION); | { g_settype(pres,ACTION);
pres->g_lineno = linecount; pres->g_lineno = linecount;
#ifdef NON_CORRECTING
g_setsubparse(pres, (p_start) 0);
#endif
} }
action(1)
;
params(int formal) [ C_SUBSTART
{
long off = ftell(fact); {
} #ifdef NON_CORRECTING
: '(' { copyact('(', ')', formal ? 2 : 0, 0); } nsubstarts++;
')' #endif
{ if (nparams == 0) { }
fseek(fact, off, 0);
C_IDENT
{
#ifdef NON_CORRECTING
register p_gram temp;
register p_start subp;
temp = search(NONTERM,lextoken.t_string,BOTH);
subp = (p_start) alloc (sizeof(t_start));
subp->ff_nont = g_getcont(temp);
subp->ff_name = (string) 0;
subp->ff_next = (p_start) 0;
g_setsubparse(pres, subp);
#endif
}
[ ',' C_IDENT
{
#ifdef NON_CORRECTING
register p_gram temp;
register p_start ff;
temp = search(NONTERM,lextoken.t_string,BOTH);
ff = g_getsubparse(pres);
while (ff) {
if (ff->ff_nont == g_getcont(temp)) {
warning(linecount, "\"%s\" used twice in %%substart", lextoken.t_string);
break;
}
ff = ff->ff_next;
} }
}
;
expr : '(' { copyact('(',')',1,0); } ff = (p_start) alloc(sizeof(t_start));
')' ff->ff_nont = g_getcont(temp);
ff->ff_name = (string) 0;
ff->ff_next = g_getsubparse(pres);
g_setsubparse(pres, ff);
#endif
}
]* ';'
]?
C_ACTION
; ;
repeats(int *kind; int *cnt;) { int t1 = 0; } : repeats(int *kind; int *cnt;) { int t1 = 0; } :
@ -562,119 +644,6 @@ firsts { register string p; }
; ;
{ {
STATIC
copyact(ch1,ch2,flag,level) char ch1,ch2; {
/*
* Copy an action to file f. Opening bracket is ch1, closing bracket
* is ch2.
* If flag & 1, copy opening and closing parameters too.
* If flag & 2, don't allow ','.
*/
static int text_seen = 0;
register FILE *f;
register ch; /* Current char */
register match; /* used to read strings */
int saved; /* save linecount */
int sav_strip = strip_grammar;
f = fact;
if (ch1 == '{' || flag != 1) strip_grammar = 0;
if (!level) {
saved = linecount;
text_seen = 0;
nparams = 0; /* count comma's */
putc('\0',f);
fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
}
if (level || (flag & 1)) putc(ch1,f);
for (;;) {
ch = input();
if (ch == ch2) {
if (!level) {
unput(ch);
if (text_seen) nparams++;
}
if (level || (flag & 1)) putc(ch,f);
if (strip_grammar != sav_strip) {
if (ch1 == '{' || flag != 1) putchar(ch);
}
strip_grammar = sav_strip;
return;
}
switch(ch) {
case ')':
case '}':
case ']':
error(linecount,"Parentheses mismatch");
break;
case '(':
text_seen = 1;
copyact('(',')',flag,level+1);
continue;
case '{':
text_seen = 1;
copyact('{','}',flag,level+1);
continue;
case '[':
text_seen = 1;
copyact('[',']',flag,level+1);
continue;
case '/':
ch = input();
unput(ch);
if (ch == '*') {
putc('/', f);
skipcomment(1);
continue;
}
ch = '/';
text_seen = 1;
break;
case ';':
case ',':
if (! level && text_seen) {
text_seen = 0;
nparams++;
if (ch == ',' && (flag & 2)) {
warning(linecount, "Parameters may not be separated with a ','");
ch = ';';
}
}
break;
case '\'':
case '"' :
/*
* watch out for brackets in strings, they do not
* count !
*/
text_seen = 1;
match = ch;
putc(ch,f);
while((ch = input())) {
if (ch == match) break;
if (ch == '\\') {
putc(ch,f);
ch = input();
}
if (ch == '\n') {
error(linecount,"Newline in string");
unput(match);
}
putc(ch,f);
}
if (ch == match) break;
/* Fall through */
case EOF :
if (!level) error(saved,"Action does not terminate");
strip_grammar = sav_strip;
return;
default:
if (c_class[ch] != ISSPA) text_seen = 1;
}
putc(ch,f);
}
}
STATIC p_gram STATIC p_gram
copyrule(p,length) register p_gram p; { copyrule(p,length) register p_gram p; {
/* /*

View file

@ -80,7 +80,7 @@ new_mem(p) register p_info p; {
*/ */
p->i_size += p->i_incr * p->i_esize; p->i_size += p->i_incr * p->i_esize;
} }
p->i_ptr = !p->i_ptr ? p->i_ptr = !p->i_ptr ?
alloc(p->i_size) : alloc(p->i_size) :
ralloc(p->i_ptr, p->i_size); ralloc(p->i_ptr, p->i_size);
p->i_max = p->i_ptr + sz; p->i_max = p->i_ptr + sz;

View file

@ -47,8 +47,8 @@ char c_class[] = {
ISKEY, /* '%' */ ISKEY, /* '%' */
0, /* '&' */ 0, /* '&' */
ISLIT, /* ''' */ ISLIT, /* ''' */
ISTOK, /* '(' */ ISACT, /* '(' */
ISTOK, /* ')' */ 0, /* ')' */
ISTOK, /* '*' */ ISTOK, /* '*' */
ISTOK, /* '+' */ ISTOK, /* '+' */
ISTOK, /* ',' */ ISTOK, /* ',' */
@ -130,9 +130,9 @@ char c_class[] = {
ISLET, /* 'x' */ ISLET, /* 'x' */
ISLET, /* 'y' */ ISLET, /* 'y' */
ISLET, /* 'z' */ ISLET, /* 'z' */
ISTOK, /* '{' */ ISACT, /* '{' */
ISTOK, /* '|' */ ISTOK, /* '|' */
ISTOK, /* '}' */ 0, /* '}' */
0, /* '~' */ 0, /* '~' */
0 /* 0177 */ 0 /* 0177 */
}; };

View file

@ -14,3 +14,4 @@ extern char c_class[];
#define ISTOK 5 #define ISTOK 5
#define ISCOM 6 #define ISCOM 6
#define ISLIT 7 #define ISLIT 7
#define ISACT 8

View file

@ -26,7 +26,6 @@
static string rcsid1 = "$Id$"; static string rcsid1 = "$Id$";
# endif # endif
static string c_first = "> firstset "; static string c_first = "> firstset ";
static string c_contains = "> containset "; static string c_contains = "> containset ";
static string c_follow = "> followset "; static string c_follow = "> followset ";
@ -72,7 +71,7 @@ conflchecks() {
f_input = x->f_name; f_input = x->f_name;
for (s = x->f_nonterminals; s != -1; s = p->n_next) { for (s = x->f_nonterminals; s != -1; s = p->n_next) {
p = &nonterms[s]; p = &nonterms[s];
if (check(p->n_rule)) p->n_flags |= VERBOSE; if (check(p->n_rule)) p->n_flags |= VERBOSE;
} }
} }
for (x = files; x < maxfiles; x++) { for (x = files; x < maxfiles; x++) {
@ -188,7 +187,7 @@ check(p) register p_gram p; {
n = &nonterms[g_getcont(p)]; n = &nonterms[g_getcont(p)];
if (g_getnpar(p) != getntparams(n)) { if (g_getnpar(p) != getntparams(n)) {
error(p->g_lineno, error(p->g_lineno,
"Call of %s: parameter count mismatch", "Call of %s: parameter count mismatch",
n->n_name); n->n_name);
} }
break; } break; }
@ -211,13 +210,13 @@ check(p) register p_gram p; {
temp = setalloc(); temp = setalloc();
setunion(temp,q->t_first); setunion(temp,q->t_first);
if (!setintersect(temp,q->t_follow)) { if (!setintersect(temp,q->t_follow)) {
/* /*
* q->t_first * q->t_follow != EMPTY * q->t_first * q->t_follow != EMPTY
*/ */
if (!(q->t_flags & RESOLVER)) { if (!(q->t_flags & RESOLVER)) {
/* /*
* No conflict resolver * No conflict resolver
*/ */
error(p->g_lineno, error(p->g_lineno,
"Repetition conflict"); "Repetition conflict");
retval = 1; retval = 1;
@ -249,7 +248,7 @@ check(p) register p_gram p; {
"Alternation conflict"); "Alternation conflict");
retval = 1; retval = 1;
moreverbose(temp); moreverbose(temp);
} }
} else { } else {
if (l->l_flag & (COND|PREFERING|AVOIDING)) { if (l->l_flag & (COND|PREFERING|AVOIDING)) {
l->l_flag |= NOCONF; l->l_flag |= NOCONF;
@ -257,8 +256,8 @@ check(p) register p_gram p; {
"Conflict resolver without conflict"); "Conflict resolver without conflict");
} }
} }
if (l->l_flag & PREFERING) propagate(l->l_symbs,p+1);
free( (p_mem) temp); free( (p_mem) temp);
if (l->l_flag & PREFERING) propagate(l->l_symbs,p+1);
retval |= check(l->l_rule); retval |= check(l->l_rule);
break; } break; }
} }
@ -378,7 +377,7 @@ prrule(p) register p_gram p; {
spaces(); spaces();
p++; continue; } p++; continue; }
case LITERAL : case LITERAL :
case TERMINAL : { case TERMINAL : {
register p_token pt = &tokens[g_getcont(p)]; register p_token pt = &tokens[g_getcont(p)];
fprintf(f,pt->t_tokno<0400 ? fprintf(f,pt->t_tokno<0400 ?
@ -463,7 +462,7 @@ propagate(set,p) p_set set; register p_gram p; {
while (g_gettype(p) != EORULE) { while (g_gettype(p) != EORULE) {
setminus(g_getlink(p)->l_symbs,set); setminus(g_getlink(p)->l_symbs,set);
p++; p++;
} }
} }
STATIC STATIC

View file

@ -59,6 +59,12 @@ STATIC do_contains();
STATIC contains(); STATIC contains();
STATIC int nsafes(); STATIC int nsafes();
STATIC int do_safes(); STATIC int do_safes();
#ifdef NON_CORRECTING
STATIC int nc_nfirst();
STATIC nc_first();
STATIC int nc_nfollow();
STATIC nc_follow();
#endif
do_compute() { do_compute() {
/* /*
@ -116,6 +122,31 @@ do_compute() {
setntsafe(p,SCANDONE); setntsafe(p,SCANDONE);
} }
co_trans(nsafes); co_trans(nsafes);
#ifdef NON_CORRECTING
if (subpars_sim) {
int s;
/* compute the union of the first sets of all start symbols
Used to compute the nc-first-sets when -s option is given */
start_firsts = get_set();
for (st = start; st; st = st->ff_next) {
s = setunion(start_firsts, (&nonterms[st->ff_nont])->n_first);
}
}
if (non_corr) {
/* compute the non_corr first sets for all nonterminals and terms */
co_trans(nc_nfirst);
for (st = start; st; st = st->ff_next) {
p = &nonterms[st->ff_nont];
PUTIN(p->n_nc_follow,0);
}
co_trans(nc_nfollow);
}
#endif
# ifndef NDEBUG # ifndef NDEBUG
if (debug) { if (debug) {
fputs("Safeties:\n", stderr); fputs("Safeties:\n", stderr);
@ -151,6 +182,10 @@ createsets() {
p = &nonterms[i]; p = &nonterms[i];
p->n_flags |= GENSTATIC; p->n_flags |= GENSTATIC;
p->n_first = get_set(); p->n_first = get_set();
#ifdef NON_CORRECTING
p->n_nc_first = get_set();
p->n_nc_follow = get_set();
#endif
p->n_follow = get_set(); p->n_follow = get_set();
walk(f->f_used, p->n_rule); walk(f->f_used, p->n_rule);
} }
@ -185,6 +220,10 @@ walk(u, p) p_set u; register p_gram p; {
q = g_getterm(p); q = g_getterm(p);
q->t_first = get_set(); q->t_first = get_set();
#ifdef NON_CORRECTING
q->t_nc_first = get_set();
q->t_nc_follow = get_set();
#endif
q->t_follow = get_set(); q->t_follow = get_set();
walk(u, q->t_rule); walk(u, q->t_rule);
break; } break; }
@ -193,6 +232,9 @@ walk(u, p) p_set u; register p_gram p; {
l = g_getlink(p); l = g_getlink(p);
l->l_symbs = get_set(); l->l_symbs = get_set();
#ifdef NON_CORRECTING
l->l_nc_symbs = get_set();
#endif
l->l_others = get_set(); l->l_others = get_set();
walk(u, l->l_rule); walk(u, l->l_rule);
break; } break; }
@ -237,7 +279,7 @@ empty(p) register p_gram p; {
for (;;) { for (;;) {
switch (g_gettype(p)) { switch (g_gettype(p)) {
case EORULE : case EORULE :
return 1; return 1;
case TERM : { case TERM : {
register p_term q; register p_term q;
@ -271,6 +313,12 @@ nfirst(p) register p_nont p; {
return first(p->n_first, p->n_rule, 0); return first(p->n_first, p->n_rule, 0);
} }
#ifdef NON_CORRECTING
STATIC int nc_nfirst(p) register p_nont p; {
return nc_first(p->n_nc_first, p->n_rule, 0);
}
#endif
STATIC STATIC
first(setp,p,flag) p_set setp; register p_gram p; { first(setp,p,flag) p_set setp; register p_gram p; {
/* /*
@ -282,8 +330,8 @@ first(setp,p,flag) p_set setp; register p_gram p; {
*/ */
register s; /* Will gather return value */ register s; /* Will gather return value */
int noenter;/* when set, unables entering of elements into int noenter;/* when set, unables entering of elements into
* setp. This is necessary to walk through the * setp. This is necessary to walk through the
* rest of rule p. * rest of rule p.
*/ */
s = 0; s = 0;
@ -349,6 +397,108 @@ first(setp,p,flag) p_set setp; register p_gram p; {
} }
} }
#ifdef NON_CORRECTING
STATIC
nc_first(setp,p,flag) p_set setp; register p_gram p; {
/*
* Compute the non_corr FIRST set of rule p.
* If flag = 0, also the non_corr first sets for terms and
* alternations in the rule p are computed.
* The non_corr FIRST set is put in setp.
* return 1 if the set refered to by "setp" changed
* If the -s flag was given, the union of the first-sets of all
* start symbols is used whenever an action occurs. Else, only the
* first-sets of startsynbols in the %substart are used
*/
register s; /* Will gather return value */
int noenter;/* when set, unables entering of elements into
* setp. This is necessary to walk through the
* rest of rule p.
*/
s = 0;
noenter = 0;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return s;
case TERM : {
register p_term q;
q = g_getterm(p);
if (flag == 0) {
if (nc_first(q->t_nc_first,q->t_rule,0))/*nothing*/;
}
if (!noenter) s |= setunion(setp,q->t_nc_first);
p++;
if (r_getkind(q) == STAR ||
r_getkind(q) == OPT ||
empty(q->t_rule)) continue;
break; }
case ALTERNATION : {
register p_link l;
l = g_getlink(p);
if (flag == 0) {
if (nc_first(l->l_nc_symbs,l->l_rule,0))/*nothing*/;
}
if (noenter == 0) {
s |= setunion(setp,l->l_nc_symbs);
}
if (g_gettype(p+1) == EORULE) return s;
}
p++;
continue;
case ACTION : {
register p_start subp;
if (!noenter)
if (subpars_sim)
s |= setunion(setp, start_firsts);
else {
for (subp = g_getsubparse(p); subp;
subp = subp->ff_next)
s |= setunion(setp, (&nonterms[subp->ff_nont])->n_nc_first);
}
p++;
continue;
}
case LITERAL :
case TERMINAL :
if (g_getcont(p) == g_getcont(illegal_gram)) {
/* Ignore for this set. */
p++;
continue;
}
if ((noenter == 0) && !IN(setp,g_getcont(p))) {
s = 1;
PUTIN(setp,g_getcont(p));
}
p++;
break;
case NONTERM : {
register p_nont n;
n = &nonterms[g_getcont(p)];
if (noenter == 0) {
s |= setunion(setp,n->n_nc_first);
if (ntneeded) NTPUTIN(setp,g_getcont(p));
}
p++;
if (n->n_flags & EMPTY) continue;
break; }
}
if (flag == 0) {
noenter = 1;
continue;
}
return s;
}
}
#endif
STATIC int STATIC int
nfollow(p) register p_nont p; { nfollow(p) register p_nont p; {
return follow(p->n_follow, p->n_rule); return follow(p->n_follow, p->n_rule);
@ -426,6 +576,87 @@ follow(setp,p) p_set setp; register p_gram p; {
} }
} }
#ifdef NON_CORRECTING
STATIC int
nc_nfollow(p) register p_nont p; {
return follow(p->n_nc_follow, p->n_rule);
}
STATIC
nc_follow(setp,p) p_set setp; register p_gram p; {
/*
* setp is the follow set for the rule p.
* Compute the follow sets in the rule p from this set.
* Return 1 if a follow set of a nonterminal changed.
*/
register s; /* Will gather return value */
s = 0;
for (;;) {
switch (g_gettype(p)) {
case EORULE :
return s;
case TERM : {
register p_term q;
q = g_getterm(p);
if (empty(p+1)) {
/*
* If what follows the term can be empty,
* everything that can follow the whole
* rule can also follow the term
*/
s |= setunion(q->t_nc_follow,setp);
}
/*
* Everything that can start the rest of the rule
* can follow the term
*/
s |= nc_first(q->t_nc_follow,p+1,1);
if (r_getkind(q) == STAR ||
r_getkind(q) == PLUS ||
r_getnum(q) ) {
/*
* If the term involves a repetition
* of possibly more than one,
* everything that can start the term
* can also follow it.
*/
s |= nc_follow(q->t_nc_first,q->t_rule);
}
/*
* Now propagate the set computed sofar
*/
s |= nc_follow(q->t_nc_follow, q->t_rule);
break; }
case ALTERNATION :
/*
* Just propagate setp
*/
s |= nc_follow(setp,g_getlink(p)->l_rule);
break;
case NONTERM : {
register p_nont n;
n = &nonterms[g_getcont(p)];
s |= nc_first(n->n_nc_follow,p+1,1);
if (empty(p+1)) {
/*
* If the rest of p can produce empty,
* everything that follows p can follow
* the nonterminal
*/
s |= setunion(n->n_nc_follow,setp);
}
break; }
}
p++;
}
}
#endif
STATIC STATIC
co_dirsymb(setp,p) p_set setp; register p_gram p; { co_dirsymb(setp,p) p_set setp; register p_gram p; {
/* /*
@ -519,7 +750,7 @@ do_lengthcomp() {
* Compute the minimum length of a terminal production for each * Compute the minimum length of a terminal production for each
* nonterminal. * nonterminal.
* This length consists of two fields: the number of terminals, * This length consists of two fields: the number of terminals,
* and a number that is composed of * and a number that is composed of
* - the number of this alternative * - the number of this alternative
* - a crude measure of the number of terms and nonterminals in the * - a crude measure of the number of terms and nonterminals in the
* production of this shortest string. * production of this shortest string.
@ -562,6 +793,12 @@ complength(p,le) register p_gram p; p_length le; {
switch (g_gettype(p)) { switch (g_gettype(p)) {
case LITERAL : case LITERAL :
case TERMINAL : case TERMINAL :
#ifdef NON_CORRECTING
if (g_getcont(p) == g_getcont(illegal_gram)) {
add(&X, INFINITY, 0);
break;
}
#endif
add(&X, 1, 0); add(&X, 1, 0);
break; break;
case ALTERNATION : case ALTERNATION :
@ -571,6 +808,7 @@ complength(p,le) register p_gram p; p_length le; {
while (g_gettype(p) != EORULE) { while (g_gettype(p) != EORULE) {
cnt++; cnt++;
l = g_getlink(p); l = g_getlink(p);
p++;
complength(l->l_rule,&i); complength(l->l_rule,&i);
i.val += cnt; i.val += cnt;
if (l->l_flag & DEF) { if (l->l_flag & DEF) {
@ -580,7 +818,6 @@ complength(p,le) register p_gram p; p_length le; {
if (compare(&i, &X) < 0) { if (compare(&i, &X) < 0) {
X = i; X = i;
} }
p++;
} }
/* Fall through */ /* Fall through */
case EORULE : case EORULE :
@ -593,7 +830,7 @@ complength(p,le) register p_gram p; p_length le; {
q = g_getterm(p); q = g_getterm(p);
rep = r_getkind(q); rep = r_getkind(q);
X.val += 1; X.val += 1;
if ((q->t_flags&PERSISTENT) || if ((q->t_flags&PERSISTENT) ||
rep==FIXED || rep==PLUS) { rep==FIXED || rep==PLUS) {
complength(q->t_rule,&i); complength(q->t_rule,&i);
add(&X, i.cnt, i.val); add(&X, i.cnt, i.val);
@ -661,6 +898,7 @@ setdefaults(p) register p_gram p; {
do { do {
cnt++; cnt++;
l = g_getlink(p); l = g_getlink(p);
p++;
complength(l->l_rule,&i); complength(l->l_rule,&i);
i.val += cnt; i.val += cnt;
if (l->l_flag & DEF) temp = 1; if (l->l_flag & DEF) temp = 1;
@ -671,7 +909,6 @@ setdefaults(p) register p_gram p; {
count = i; count = i;
} }
setdefaults(l->l_rule); setdefaults(l->l_rule);
p++;
} while (g_gettype(p) != EORULE); } while (g_gettype(p) != EORULE);
if (!temp) { if (!temp) {
/* No user specified default */ /* No user specified default */
@ -687,7 +924,7 @@ STATIC
do_contains(n) register p_nont n; { do_contains(n) register p_nont n; {
/* /*
* Compute the total set of symbols that nonterminal n can * Compute the total set of symbols that nonterminal n can
* produce * produce
*/ */
if (n->n_contains == 0) { if (n->n_contains == 0) {
@ -811,7 +1048,7 @@ do_safes(p,safe,ch) register p_gram p; register int *ch; {
for (;;) { for (;;) {
switch (g_gettype(p)) { switch (g_gettype(p)) {
case ACTION: case ACTION:
p++; p++;
continue; continue;
case LITERAL: case LITERAL:
case TERMINAL: case TERMINAL:
@ -830,12 +1067,13 @@ do_safes(p,safe,ch) register p_gram p; register int *ch; {
safe = t_after(rep, i, retval); safe = t_after(rep, i, retval);
break; } break; }
case ALTERNATION : { case ALTERNATION : {
register p_link l; register p_link l;
register int i; register int i;
retval = -1; retval = -1;
while (g_gettype(p) == ALTERNATION) { while (g_gettype(p) == ALTERNATION) {
l = g_getlink(p); l = g_getlink(p);
p++;
if (safe > SAFE && (l->l_flag & DEF)) { if (safe > SAFE && (l->l_flag & DEF)) {
i = do_safes(l->l_rule,SAFESCANDONE,ch); i = do_safes(l->l_rule,SAFESCANDONE,ch);
} }
@ -848,7 +1086,6 @@ do_safes(p,safe,ch) register p_gram p; register int *ch; {
} }
else if (i > retval) retval = i; else if (i > retval) retval = i;
} }
p++;
} }
return retval; } return retval; }
case NONTERM : { case NONTERM : {

View file

@ -36,6 +36,11 @@ extern int ntokens; /* number of terminals */
extern int nterms; /* number of terms */ extern int nterms; /* number of terms */
extern int nalts; /* number of alternatives */ extern int nalts; /* number of alternatives */
extern p_start start; /* will contain startsymbols */ extern p_start start; /* will contain startsymbols */
#ifdef NON_CORRECTING
extern int nsubstarts; /* number of subparserstarts */
extern p_set start_firsts; /* Will contain the union of first sets of
startsymbols when -n -s option is on */
#endif
extern int linecount; /* line number */ extern int linecount; /* line number */
extern int assval; /* to create difference between literals extern int assval; /* to create difference between literals
* and other terminals * and other terminals
@ -73,8 +78,17 @@ extern string LLgenid; /* LLgen identification string */
extern t_token lextoken; /* the current token */ extern t_token lextoken; /* the current token */
extern int nerrors; extern int nerrors;
extern string rec_file, incl_file; extern string rec_file, incl_file;
#ifdef NON_CORRECTING
extern string nc_rec_file, nc_incl_file;
#endif
extern int low_percentage, high_percentage; extern int low_percentage, high_percentage;
extern int min_cases_for_jmptable; extern int min_cases_for_jmptable;
extern int jmptable_option; extern int jmptable_option;
extern int ansi_c; extern int ansi_c;
#ifdef NON_CORRECTING
extern int non_corr;
extern int subpars_sim;
extern p_gram illegal_gram;
#endif
extern int strip_grammar; extern int strip_grammar;
extern int in_production;

View file

@ -51,6 +51,9 @@ extern gencode();
STATIC opentemp(); STATIC opentemp();
STATIC geninclude(); STATIC geninclude();
STATIC genrecovery(); STATIC genrecovery();
#ifdef NON_CORRECTING
STATIC genncrecovery();
#endif
STATIC string genname(); STATIC string genname();
STATIC generate(); STATIC generate();
STATIC prset(); STATIC prset();
@ -109,18 +112,31 @@ genhdr()
else { else {
fputs("#if __STDC__ || __cplusplus\n#define LL_ANSI_C 1\n#endif\n", fpars); fputs("#if __STDC__ || __cplusplus\n#define LL_ANSI_C 1\n#endif\n", fpars);
} }
#ifdef NON_CORRECTING
if (non_corr) fputs("#define LL_NON_CORR 1\n", fpars);
#endif
fprintf(fpars, "#define LL_LEXI %s\n", lexical); fprintf(fpars, "#define LL_LEXI %s\n", lexical);
copyfile(incl_file); copyfile(incl_file);
} }
gencode(argc) { gencode(argc) {
register p_file p = files; register p_file p = files;
/* Set up for code generation */ /* Set up for code generation */
if ((fact = fopen(f_temp,"r")) == NULL) { if ((fact = fopen(f_temp,"r")) == NULL) {
fatal(0,e_noopen,f_temp); fatal(0,e_noopen,f_temp);
} }
#ifdef NON_CORRECTING
/* The non-correcting error recovery must be generated BEFORE
parser code is generated!!!! In case of conflict resolvers,
the code-generation process will delete conflicting symbols
from first and followsets, making them UNUSABLE for the
non-correcting error recovery code.
*/
if (non_corr)
genncrecovery();
#endif
/* For every source file .... */ /* For every source file .... */
while (argc--) { while (argc--) {
/* Open temporary */ /* Open temporary */
@ -138,6 +154,7 @@ gencode(argc) {
} }
geninclude(); geninclude();
genrecovery(); genrecovery();
fclose(fact); fclose(fact);
} }
@ -167,13 +184,18 @@ geninclude() {
} }
fprintf(fpars, "#define %s_MAXTOKNO %d\n", prefix ? prefix : "LL", fprintf(fpars, "#define %s_MAXTOKNO %d\n", prefix ? prefix : "LL",
maxno); maxno);
#ifdef NON_CORRECTING
if (non_corr) {
fprintf(fpars, "#define %sNONCORR\n", prefix ? prefix : "LL");
}
#endif
doclose(fpars); doclose(fpars);
install(f_include, "."); install(f_include, ".");
} }
STATIC STATIC
genrecovery() { genrecovery() {
register FILE *f; register FILE *f;
register p_token t; register p_token t;
register int *q; register int *q;
register p_nont p; register p_nont p;
@ -202,6 +224,12 @@ genrecovery() {
i > 0 ? i : 1, i > 0 ? i : 1,
ntokens); ntokens);
if (onerror) fprintf(f,"#define LL_USERHOOK %s\n", onerror); if (onerror) fprintf(f,"#define LL_USERHOOK %s\n", onerror);
#ifdef NON_CORRECTING
if (non_corr) {
fputs("static int nc_done = 0;\n", f);
fputs("static int err_seen = 0;\n", f);
}
#endif
/* Now generate the routines that call the startsymbols */ /* Now generate the routines that call the startsymbols */
fputs("#if LL_ANSI_C\n", f); fputs("#if LL_ANSI_C\n", f);
for (st = start; st; st = st->ff_next) { for (st = start; st; st = st->ff_next) {
@ -214,7 +242,18 @@ genrecovery() {
for (st = start; st; st = st->ff_next) { for (st = start; st; st = st->ff_next) {
fprintf(f, "#if LL_ANSI_C\nvoid %s(void)\n#else\n%s()\n#endif\n", st->ff_name, st->ff_name); fprintf(f, "#if LL_ANSI_C\nvoid %s(void)\n#else\n%s()\n#endif\n", st->ff_name, st->ff_name);
p = &nonterms[st->ff_nont]; p = &nonterms[st->ff_nont];
fputs(" {\n\tunsigned int s[LL_NTERMINALS+LL_NSETS+2];\n\tLLnewlevel(s);\n\tLLread();\n", f); fputs(" {\n\tunsigned int s[LL_NTERMINALS+LL_NSETS+2];", f);
#ifdef NON_CORRECTING
if (non_corr) {
fputs(" \n\tint oldstartsymb;", f);
fputs(" \n\tint oldncflag;", f);
fputs(" \n\toldstartsymb = LLstartsymb;", f);
fputs(" \n\toldncflag = nc_done;", f);
fputs(" \n\tnc_done = 0;", f);
fprintf(f, "\n\tLLstartsymb = %d;", st->ff_nont + assval);
}
#endif
fputs("\n\tLLnewlevel(s);\n\tLLread();\n", f);
if (g_gettype(p->n_rule) == ALTERNATION) { if (g_gettype(p->n_rule) == ALTERNATION) {
genpush(findindex(p->n_contains)); genpush(findindex(p->n_contains));
} }
@ -224,7 +263,18 @@ genrecovery() {
fputs("\tLL_NOSCANDONE(EOFILE);\n",f); fputs("\tLL_NOSCANDONE(EOFILE);\n",f);
} }
else fputs("\tLL_SCANDONE(EOFILE);\n",f); else fputs("\tLL_SCANDONE(EOFILE);\n",f);
fputs("\tLLoldlevel(s);\n}\n",f); fputs("\tLLoldlevel(s);\n",f);
#ifdef NON_CORRECTING
if (non_corr) {
fputs("\tLLstartsymb = oldstartsymb;\n", f);
fputs("\tif (nc_done == 1) { \n", f);
fputs("\t\terr_seen = 1;\n", f);
fputs("\tnc_done = oldncflag;\n", f);
fputs("\t}\n", f);
}
#endif
fputs("}\n", f);
} }
/* Now generate the sets */ /* Now generate the sets */
fputs("static char LLsets[] = {\n",f); fputs("static char LLsets[] = {\n",f);
@ -254,6 +304,46 @@ genrecovery() {
install(f_rec, "."); install(f_rec, ".");
} }
#ifdef NON_CORRECTING
STATIC
genncrecovery() {
register FILE *f;
register p_token t;
register int *q;
int *index;
/* Generate the non-correcting error recovery file */
opentemp((string) 0);
f = fpars;
genhdr();
correct_prefix();
save_grammar(f);
fprintf(f, "#define LLFIRST_NT %d\n", assval);
fprintf(f, "#define LLSETSIZE %d\n", nbytes);
index = (int *) alloc((unsigned) (assval * sizeof(int)));
for (q = index; q < &index[assval];) *q++ = -1;
for (t = tokens; t < maxt; t++) {
index[t->t_tokno] = t - tokens;
}
fputs("#define LLindex (LL_index+1)\nstatic short LL_index[] = {0,0,\n",f);
for (q = index+1; q < &index[assval]; q++) {
fprintf(f, "%d,\n", *q);
}
fputs(c_arrend, f);
free((p_mem) index);
copyfile(nc_incl_file);
copyfile(nc_rec_file);
doclose(f);
install(f_nc, ".");
}
#endif
STATIC STATIC
generate(f) p_file f; { generate(f) p_file f; {
/* /*
@ -272,7 +362,7 @@ generate(f) p_file f; {
for (ff = f->f_firsts; ff; ff = ff->ff_next) { for (ff = f->f_firsts; ff; ff = ff->ff_next) {
macro(ff->ff_name,&nonterms[ff->ff_nont]); macro(ff->ff_name,&nonterms[ff->ff_nont]);
} }
/* For every nonterminal generate a function */ /* For every nonterminal generate a function */
for (s = f->f_nonterminals; s != -1; s = p->n_next) { for (s = f->f_nonterminals; s != -1; s = p->n_next) {
p = &nonterms[s]; p = &nonterms[s];
@ -378,7 +468,7 @@ STATIC
getparams() { getparams() {
/* getparams is called if a nonterminal has parameters. The names /* getparams is called if a nonterminal has parameters. The names
* of the parameters have to be found, and they should be declared * of the parameters have to be found, and they should be declared
*/ */
long off; long off;
register int l; register int l;
long ftell(); long ftell();
@ -407,7 +497,7 @@ getparams() {
} }
fputs(") ",fpars); fputs(") ",fpars);
/* /*
* Now copy the declarations * Now copy the declarations
*/ */
l = getc(fact); /* patch: some implementations of fseek l = getc(fact); /* patch: some implementations of fseek
do not work properly after "ungetc" do not work properly after "ungetc"
@ -469,7 +559,7 @@ getansiparams(mkdef) {
/* getansiparams is called if a nonterminal has parameters /* getansiparams is called if a nonterminal has parameters
* and an ANSI C function definition/declaration has to be produced. * and an ANSI C function definition/declaration has to be produced.
* If a definition has to be produced, "mkdef" is set to 1. * If a definition has to be produced, "mkdef" is set to 1.
*/ */
register int l; register int l;
int delayed = 0; int delayed = 0;
@ -911,7 +1001,7 @@ codeforterm(q,safety,toplevel) register p_term q; {
int term_is_persistent = (q->t_flags & PERSISTENT); int term_is_persistent = (q->t_flags & PERSISTENT);
int ispushed = NOPOP; int ispushed = NOPOP;
if (!(toplevel > 0 && if (!(toplevel > 0 &&
(safety == 0 || (!onerror && safety <= SAFESCANDONE)) && (safety == 0 || (!onerror && safety <= SAFESCANDONE)) &&
(rep_kind == OPT || (rep_kind == FIXED && rep_count == 0)))) { (rep_kind == OPT || (rep_kind == FIXED && rep_count == 0)))) {
ispushed = findindex(q->t_contains); ispushed = findindex(q->t_contains);
@ -1091,21 +1181,21 @@ genswhead(q, rep_kind, rep_count, safety, ispushed) register p_term q; {
STATIC STATIC
gencases(tokenlist, caseno, compacted) gencases(tokenlist, caseno, compacted)
int *tokenlist; int *tokenlist;
{ {
/* /*
* setp points to a bitset indicating which cases must * setp points to a bitset indicating which cases must
* be generated. * be generated.
* YECH, the PCC compiler does not accept many cases without statements * YECH, the PCC compiler does not accept many cases without
* inbetween, so after every case label an empty statement is * statements in between, so after every case label an empty
* generated. * statement is generated.
* The C-grammar used by PCC is really stupid on this point : * The C-grammar used by PCC is really stupid on this point :
* it contains the rule * it contains the rule
* statement : label statement * statement : label statement
* which is right-recursive, and as is well known, LALR parsers don't * which is right-recursive, and as is well known, LALR parsers don't
* handle these things very good. * handle these things very well.
* The grammar should have been written : * The grammar should have been written :
* labeledstatement : labels statement ; * labeledstatement : labels statement ;
* labels : labels label | ; * labels : labels label | ;
*/ */
register p_token p; register p_token p;
@ -1119,7 +1209,7 @@ gencases(tokenlist, caseno, compacted)
(p->t_tokno < 0400 ? "/* case '%s' */\n" : (p->t_tokno < 0400 ? "/* case '%s' */\n" :
"/* case %s */\n") : "/* case %s */\n") :
p->t_tokno<0400 ? "case /* '%s' */ %d : ;\n" p->t_tokno<0400 ? "case /* '%s' */ %d : ;\n"
: "case /* %s */ %d : ;\n", : "case /* %s */ %d : ;\n",
p->t_string, i); p->t_string, i);
} }
} }
@ -1220,10 +1310,10 @@ out_list(tokenlist, listno, casecnt)
register int i; register int i;
register FILE *f = fpars; register FILE *f = fpars;
fprintf(f, "static %s LL%d_tklist[] = {", fprintf(f, "static %s LL%d_tklist[] = {",
casecnt <= 127 ? "char" : "short", casecnt <= 127 ? "char" : "short",
listno); listno);
for (i = 0; i < ntokens; i++) { for (i = 0; i < ntokens; i++) {
fprintf(f, "%c%d,", i % 10 == 0 ? '\n': ' ', tokenlist[i]); fprintf(f, "%c%d,", i % 10 == 0 ? '\n': ' ', tokenlist[i]);
} }
@ -1260,6 +1350,10 @@ correct_prefix()
fprintf(f, "#define LLnewlevel %snewlevel\n", s); fprintf(f, "#define LLnewlevel %snewlevel\n", s);
fprintf(f, "#define LLoldlevel %soldlevel\n", s); fprintf(f, "#define LLoldlevel %soldlevel\n", s);
fprintf(f, "#define LLmessage %smessage\n", s); fprintf(f, "#define LLmessage %smessage\n", s);
#ifdef NON_CORRECTING
fprintf(f, "#define LLnc_recovery %sncrecovery\n", s);
fprintf(f, "#define LLstartsymb %sstartsymb\n", s);
#endif
} }
fprintf(f, "#include \"%s\"\n", f_include); fprintf(f, "#include \"%s\"\n", f_include);
} }

View file

@ -33,6 +33,10 @@ p_token maxt;
int ntokens; int ntokens;
int nterms, nalts; int nterms, nalts;
int norder, torder; int norder, torder;
#ifdef NON_CORRECTING
int nsubstarts;
p_set start_firsts;
#endif
p_start start; p_start start;
int linecount; int linecount;
int assval; int assval;
@ -42,6 +46,9 @@ FILE *finput;
FILE *fact; FILE *fact;
char f_pars[] = PARSERFILE; char f_pars[] = PARSERFILE;
char f_temp[] = ACTFILE; char f_temp[] = ACTFILE;
#ifdef NON_CORRECTING
char f_nc[20];
#endif
char f_out[20]; char f_out[20];
string f_input; string f_input;
char f_include[20]; char f_include[20];
@ -64,8 +71,19 @@ string LLgenid = "/* LLgen generated code from source %s */\n";
t_token lextoken; t_token lextoken;
int nerrors; int nerrors;
string rec_file, incl_file; string rec_file, incl_file;
#ifdef NON_CORRECTING
string nc_rec_file, nc_incl_file;
#endif
int low_percentage = 10, high_percentage = 30; int low_percentage = 10, high_percentage = 30;
int min_cases_for_jmptable = 8; int min_cases_for_jmptable = 8;
int jmptable_option; int jmptable_option;
int ansi_c = 0; int ansi_c = 0;
#ifdef NON_CORRECTING
int non_corr = 0;
int subpars_sim = 0;
p_gram illegal_gram;
#endif
int strip_grammar = 0; int strip_grammar = 0;
int in_production; /* set when the parser is reading a production
rule.
*/

View file

@ -25,7 +25,9 @@
# define ACTFILE "tempXXXXXX" /* temporary file to save actions */ # define ACTFILE "tempXXXXXX" /* temporary file to save actions */
# define HFILE "%spars.h" /* file for "#define's " */ # define HFILE "%spars.h" /* file for "#define's " */
# define RFILE "%spars.c" /* Error recovery */ # define RFILE "%spars.c" /* Error recovery */
#ifdef NON_CORRECTING
# define NCFILE "%sncor.c" /* Non-corrcting error recovery */
#endif
extern FILE *finput; extern FILE *finput;
extern FILE *fpars; extern FILE *fpars;
extern FILE *fact; extern FILE *fact;
@ -36,3 +38,6 @@ extern char f_out[];
extern string f_input; extern string f_input;
extern char f_include[]; extern char f_include[];
extern char f_rec[]; extern char f_rec[];
#ifdef NON_CORRECTING
extern char f_nc[];
#endif

View file

@ -41,13 +41,13 @@ extern char *sbrk();
main(argc,argv) register string argv[]; { main(argc,argv) register string argv[]; {
register string arg; register string arg;
string libpath(); string libpath();
char *beg_sbrk; char *beg_sbrk = 0;
/* Initialize */ /* Initialize */
assval = 0400; assval = 0400;
/* read options */ /* read options */
while (argc >= 2 && (arg = argv[1], *arg == '-')) { while (argc >= 2 && (arg = argv[1], *arg == '-')) {
while (*++arg) { while (*++arg) {
switch(*arg) { switch(*arg) {
@ -84,7 +84,7 @@ main(argc,argv) register string argv[]; {
fprintf(stderr,"duplicate -r flag\n"); fprintf(stderr,"duplicate -r flag\n");
exit(1); exit(1);
} }
rec_file = ++arg; rec_file = ++arg;
break; break;
case 'i': case 'i':
case 'I': case 'I':
@ -92,7 +92,7 @@ main(argc,argv) register string argv[]; {
fprintf(stderr,"duplicate -i flag\n"); fprintf(stderr,"duplicate -i flag\n");
exit(1); exit(1);
} }
incl_file = ++arg; incl_file = ++arg;
break; break;
#endif /* not NDEBUG */ #endif /* not NDEBUG */
case 'x': case 'x':
@ -104,8 +104,18 @@ main(argc,argv) register string argv[]; {
case 'A': case 'A':
ansi_c = 1; ansi_c = 1;
continue; continue;
#ifdef NON_CORRECTING
case 'n':
case 'N':
non_corr = 1;
continue;
case 's': case 's':
case 'S': case 'S':
subpars_sim = 1;
continue;
#endif
case 'g':
case 'G':
strip_grammar = 1; strip_grammar = 1;
continue; continue;
default: default:
@ -120,6 +130,13 @@ main(argc,argv) register string argv[]; {
if (verbose) beg_sbrk = sbrk(0); if (verbose) beg_sbrk = sbrk(0);
#ifdef NON_CORRECTING
if ((subpars_sim) && (!non_corr)) {
fprintf(stderr,"option -s illegal without -n, turned off\n");
subpars_sim = 0;
}
#endif
/* /*
* Now check wether the sets should include nonterminals * Now check wether the sets should include nonterminals
*/ */
@ -139,6 +156,12 @@ main(argc,argv) register string argv[]; {
# ifndef NDEBUG # ifndef NDEBUG
} }
# endif # endif
#ifdef NON_CORRECTING
if (non_corr) {
nc_incl_file = libpath("nc_incl");
nc_rec_file = libpath ("nc_rec");
}
#endif
mktemp(f_temp); mktemp(f_temp);
mktemp(f_pars); mktemp(f_pars);
if ((fact = fopen(f_temp,"w")) == NULL) { if ((fact = fopen(f_temp,"w")) == NULL) {
@ -154,6 +177,10 @@ main(argc,argv) register string argv[]; {
*/ */
sprintf(f_include, HFILE, prefix ? prefix : "L"); sprintf(f_include, HFILE, prefix ? prefix : "L");
sprintf(f_rec, RFILE, prefix ? prefix : "L"); sprintf(f_rec, RFILE, prefix ? prefix : "L");
#ifdef NON_CORRECTING
if (non_corr)
sprintf(f_nc, NCFILE, prefix ? prefix : "L");
#endif
setinit(ntneeded); setinit(ntneeded);
maxnt = &nonterms[nnonterms]; maxnt = &nonterms[nnonterms];
maxt = &tokens[ntokens]; maxt = &tokens[ntokens];
@ -216,7 +243,7 @@ readgrammar(argc,argv) char *argv[]; {
/* /*
* There must be a start symbol! * There must be a start symbol!
*/ */
if (start == 0) { if (! nerrors && start == 0) {
fatal(linecount,"Missing %%start"); fatal(linecount,"Missing %%start");
} }
if (nerrors) comfatal(); if (nerrors) comfatal();
@ -237,7 +264,7 @@ doparse(p) register p_file p; {
} }
/* VARARGS1 */ /* VARARGS1 */
error(lineno,s,t,u) string s,t,u; { error(lineno,s,t,u) string s,t,u; {
/* /*
* Just an error message * Just an error message
*/ */
@ -250,7 +277,7 @@ error(lineno,s,t,u) string s,t,u; {
} }
/* VARARGS1 */ /* VARARGS1 */
warning(lineno,s,t,u) string s,t,u; { warning(lineno,s,t,u) string s,t,u; {
/* /*
* Just a warning * Just a warning
*/ */
@ -292,7 +319,7 @@ copyfile(file) string file; {
register FILE *f; register FILE *f;
if ((f = fopen(file,"r")) == NULL) { if ((f = fopen(file,"r")) == NULL) {
fatal(0,"Cannot open libraryfile, call an expert"); fatal(0,"Cannot open library file %s, call an expert",file);
} }
while ((c = getc(f)) != EOF) putc(c,fpars); while ((c = getc(f)) != EOF) putc(c,fpars);
fclose(f); fclose(f);

View file

@ -51,6 +51,9 @@ name_init() {
nont_info.i_esize = sizeof (t_nont); nont_info.i_esize = sizeof (t_nont);
nont_info.i_incr = 50; nont_info.i_incr = 50;
search(TERMINAL,"EOFILE",ENTERING); search(TERMINAL,"EOFILE",ENTERING);
#ifdef NON_CORRECTING
illegal_gram = search(TERMINAL,"LLILLEGAL",ENTERING);
#endif
} }
STATIC p_entry STATIC p_entry
@ -65,10 +68,13 @@ newentry(str, next) string str; p_entry next; {
p->h_name = str; p->h_name = str;
p->h_next = next; p->h_next = next;
p->h_type.g_lineno = linecount; p->h_type.g_lineno = linecount;
#ifdef NON_CORRECTING
p->h_type.g_erroneous = 0;
#endif
return p; return p;
} }
string string
store(s) string s; { store(s) string s; {
/* /*
* Store a string s in the name table * Store a string s in the name table
@ -147,14 +153,14 @@ search(type,str,option) register string str; {
"%s : is already defined",str); "%s : is already defined",str);
} }
p->h_type.g_lineno = linecount; p->h_type.g_lineno = linecount;
return &(p->h_type); return &(p->h_type);
} }
} }
p = newentry(store(str), h_root[i]); p = newentry(store(str), h_root[i]);
h_root[i] = p; h_root[i] = p;
if (type == TERMINAL || type == LITERAL) { if (type == TERMINAL || type == LITERAL) {
register p_token pt; register p_token pt;
pt = (p_token) new_mem(&token_info); pt = (p_token) new_mem(&token_info);
tokens = (p_token) token_info.i_ptr; tokens = (p_token) token_info.i_ptr;
pt->t_string = p->h_name; pt->t_string = p->h_name;
@ -166,7 +172,7 @@ search(type,str,option) register string str; {
if (str[2] == '\0') { if (str[2] == '\0') {
switch(str[1]) { switch(str[1]) {
case 'n' : case 'n' :
val = '\n'; val = '\n';
break; break;
case 'r' : case 'r' :
val = '\r'; val = '\r';
@ -175,19 +181,19 @@ search(type,str,option) register string str; {
val = '\b'; val = '\b';
break; break;
case 'f' : case 'f' :
val = '\f'; val = '\f';
break; break;
case 't' : case 't' :
val = '\t'; val = '\t';
break; break;
case '\'': case '\'':
val = '\''; val = '\'';
break; break;
case '\\': case '\\':
val = '\\'; val = '\\';
break; break;
default : default :
error(linecount,e_literal); error(linecount,e_literal);
} }
} else { } else {
/* /*
@ -200,7 +206,7 @@ search(type,str,option) register string str; {
val = 64*str[1] - 73*'0' + val = 64*str[1] - 73*'0' +
8*str[2] + str[3]; 8*str[2] + str[3];
} }
} else { } else {
/* /*
* No escape in literal * No escape in literal
*/ */
@ -221,7 +227,7 @@ search(type,str,option) register string str; {
return &(p->h_type); return &(p->h_type);
} }
/* /*
* type == NONTERM || type == UNKNOWN * type == NONTERM || type == UNKNOWN
* UNKNOWN and not yet declared means : NONTERM * UNKNOWN and not yet declared means : NONTERM
*/ */
{ {

View file

@ -15,11 +15,11 @@ LLOPT= # -vvv -x
OBJECTS = main.$(SUF) gencode.$(SUF) compute.$(SUF) LLgen.$(SUF) tokens.$(SUF) \ OBJECTS = main.$(SUF) gencode.$(SUF) compute.$(SUF) LLgen.$(SUF) tokens.$(SUF) \
check.$(SUF) reach.$(SUF) global.$(SUF) name.$(SUF) sets.$(SUF) \ check.$(SUF) reach.$(SUF) global.$(SUF) name.$(SUF) sets.$(SUF) \
Lpars.$(SUF) alloc.$(SUF) machdep.$(SUF) cclass.$(SUF) Lpars.$(SUF) alloc.$(SUF) machdep.$(SUF) cclass.$(SUF) savegram.$(SUF)
CSRC = $(SRC_DIR)/main.c $(SRC_DIR)/gencode.c $(SRC_DIR)/compute.c \ CSRC = $(SRC_DIR)/main.c $(SRC_DIR)/gencode.c $(SRC_DIR)/compute.c \
$(SRC_DIR)/check.c $(SRC_DIR)/reach.c $(SRC_DIR)/global.c \ $(SRC_DIR)/check.c $(SRC_DIR)/reach.c $(SRC_DIR)/global.c \
$(SRC_DIR)/name.c $(SRC_DIR)/sets.c $(SRC_DIR)/alloc.c \ $(SRC_DIR)/name.c $(SRC_DIR)/sets.c $(SRC_DIR)/alloc.c \
$(SRC_DIR)/machdep.c $(SRC_DIR)/cclass.c $(SRC_DIR)/machdep.c $(SRC_DIR)/cclass.c $(SRC_DIR)/savegram.c
CFILES = LLgen.c tokens.c Lpars.c $(CSRC) CFILES = LLgen.c tokens.c Lpars.c $(CSRC)
GFILES = $(SRC_DIR)/tokens.g $(SRC_DIR)/LLgen.g GFILES = $(SRC_DIR)/tokens.g $(SRC_DIR)/LLgen.g
FILES = $(SRC_DIR)/types.h $(SRC_DIR)/extern.h \ FILES = $(SRC_DIR)/types.h $(SRC_DIR)/extern.h \

385
util/LLgen/src/savegram.c Normal file
View file

@ -0,0 +1,385 @@
/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
* All rights reserved.
*/
#ifdef NON_CORRECTING
/*
* L L G E N
*
* An Extended LL(1) Parser Generator
*
* Author : Ceriel J.H. Jacobs
*/
/*
* savegram.c
* Save the input grammar for non-correcting error recovery
*
* Grammar rules are `flattened' by introducing anonymous nonterminals.
* [B]? becomes X; X: B | {empty}
* [B]+ becomes X: B Y; Y: X | {empty}
* [B]* becomes X; X: B X | {empty}
* [B | C] becomes X; X: B | C
* [B | C]* becomes X; X: B X | C X | {empty} etc.
*/
# include "types.h"
# include "extern.h"
# include "io.h"
# include "assert.h"
# include "sets.h"
#define LLALT 9999
static int nt_highest;
extern int nbytes;
extern p_mem alloc();
extern p_set start_firsts;
extern p_set setalloc();
extern p_gram search();
STATIC save_rule();
STATIC save_set();
/* t_list will contain terms to be `flattened' */
static struct t_list {
p_term term;
int t_nt_num;
} *t_list;
/* Subparse list will contain symbols in %substart */
static struct subparse_list {
p_gram sub_action;
int sub_nt_num;
} *sub_list;
/* Index in t_list */
static int t_list_index;
/* Index in subparse_list */;
static int sub_list_index;
/* File to save grammar to */
static FILE *fgram;
/* Nonterminal number to simulate parsers that get called in actions
used when LLgen called with -n -s options */
int act_nt;
save_grammar(f) FILE *f; {
/*
* Save the grammar
*/
register p_nont p;
register p_start st;
register int nt_nr;
fgram = f;
/* Compute highest nonterminal nr. */
nt_highest = nnonterms + assval - 1;
/* Generate some constants in the grammar file */
/* Allocate terms list */
t_list = (struct t_list *) alloc((unsigned) nterms * sizeof(struct t_list));
t_list_index = 0;
sub_list = (struct subparse_list *) alloc(nsubstarts * sizeof(struct subparse_list));
fputs("static ", fgram);
fputs((prefix ? prefix : "LL"), fgram);
fputs("grammar[] = {\n", fgram);
/* Check if -n -s option is on */
if (subpars_sim) {
/* Allocate action simulation nt */
act_nt = ++nt_highest;
/* write simualtion rule */
fprintf(fgram, "/* Simulation rule */\n");
fprintf(fgram, "%d,\n", act_nt);
/* Put a firstset and a fake followset */
/* Followset optimization is not implemented for
-s because it would be hard, and does not
bring enough improvement to jutify the effort
*/
save_set(start_firsts);
save_set(start_firsts);
/* Simulation rule procudes empty */
fprintf(fgram, "%d,\n", 1);
for (st = start; st; st = st->ff_next)
{
fprintf(fgram, "%d, %d, %d, \n", st->ff_nont + assval,
act_nt, LLALT);
}
fprintf(fgram, "%d, \n", 0);
}
/* Now process all rules */
for (p = nonterms, nt_nr = assval; p < maxnt; p++, nt_nr++) {
fprintf(fgram, "/* nr. %d %s */\n", nt_nr, p->n_name);
fprintf(fgram, "%d, ",nt_nr);
if (! p->n_rule) { /* undefined */
f_input = p->n_string;
error(p->n_lineno,"Nonterminal %s not defined",
p->n_name);
}
/* Save the first_set and follow set */
save_set(p->n_nc_first);
save_set(p->n_nc_follow);
if (p->n_flags & EMPTY)
fprintf(fgram, "%d,\n", 1);
else
fprintf(fgram, "%d,\n", 0);
save_rule(p->n_rule, 0);
fprintf(fgram, "%d,\n", 0);
}
/* Resolve terms, they are on t_list */
fprintf(fgram, "/* Fresh nonterminals */\n");
{ int i;
for (i = 0; i < t_list_index; i++)
{
/* Terms of the form [] without + ? * or number produce
a NIL pointer in the term-list */
if ((t_list + i)->term == (struct term *) 0) {
continue;
}
fprintf(fgram, "%d, ", (t_list + i)->t_nt_num);
/* Save the first and follow sets */
save_set((t_list + i)->term->t_nc_first);
save_set((t_list + i)->term->t_nc_follow);
/* NOTE: A VARIABLE REPETITION COUNT TERMS RULE IS NOT
ALLOWED TO PRODUCE EMPTY IN LLGEN
*/
switch(r_getkind((t_list + i)->term)) {
case FIXED:
/* Already done by repeating new nonterminal */
/* FIXED term-rule may produce empty */
if (empty((t_list +i)->term->t_rule))
fprintf(fgram, "%d,\n", 1);
else
fprintf(fgram, "%d,\n", 0);
save_rule((t_list + i)->term->t_rule, 0);
fprintf(fgram, "%d,\n", 0);
break;
case STAR:
/* Save the rule, appending the new lhs for this rule */
/* Star rules always produce empty */
fprintf(fgram, "1,\n");
save_rule((t_list + i)->term->t_rule,
(t_list + i)->t_nt_num);
fprintf(fgram, "%d,\n%d,\n", LLALT, 0);
/* ALT EMPTY*/
break;
case PLUS:
/* Save the rule appending a fresh nonterminal */
fprintf(fgram, "%d,\n", 0);
save_rule((t_list + i)->term->t_rule, ++nt_highest);
fprintf(fgram, "%d,\n", 0); /* EOR */
fprintf(fgram, "%d, ", nt_highest);
/* First set of the extra nonterm is same as
for the term */
/* Except that the new nonterm also produces empty ! */
save_set((t_list + i)->term->t_nc_first);
save_set((t_list + i)->term->t_nc_follow);
fprintf(fgram, "1,\n");
fprintf(fgram, "%d, ", (t_list+i)->t_nt_num);
fprintf(fgram, "%d,\n%d,\n", LLALT, 0); /* ALT EMPTY */
break;
case OPT:
fprintf(fgram, "1,\n");
save_rule((t_list + i)->term->t_rule, 0);
fprintf(fgram, "%d,\n%d,\n", LLALT, 0); /* ALT EMPTY */
break;
}
}
}
/* Resolve %substarts */
if (!subpars_sim) {
int i,s,check;
p_start ff, gg;
p_set temp_set;
for (i = 0; i < sub_list_index; i++) {
fprintf(fgram, "%d, ", (sub_list + i)->sub_nt_num);
/* Compute the first set */
temp_set = setalloc();
for (ff = g_getsubparse((sub_list + i)->sub_action);
ff; ff = ff->ff_next){
s = setunion(temp_set,
(&nonterms[ff->ff_nont])->n_first);
check = 0;
for (gg =start; gg; gg = gg->ff_next)
if (ff->ff_nont == gg->ff_nont)
check = 1;
if (check == 0)
warning((sub_list + i)->sub_action->g_lineno,
"\"%s\" is not a startsymbol",
(&nonterms[ff->ff_nont])->n_name);
}
save_set(temp_set);
save_set(temp_set);
free(temp_set);
/* Produces empty */
fprintf(fgram, "1,\n");
ff = g_getsubparse((sub_list + i)->sub_action);
for (; ff; ff = ff->ff_next)
fprintf(fgram, "%d, %d, %d, \n", ff->ff_nont + assval,
(sub_list + i)->sub_nt_num,
LLALT);
fprintf(fgram, "%d, \n", 0);
}
}
fprintf(fgram, "%d\n};\n", 0);
fprintf(fgram, "#define LLNNONTERMINALS %d\n", nt_highest - assval + 1);
}
STATIC
save_rule(p, tail) register p_gram p; int tail; {
/*
Walk through rule p, saving it. The non-terminal tail is
appended to the rule. It needs to be appended in this function
to process alt-rules correctly. Tail == 0 means don't append.
*/
int in_alt;
int illegal_num;
/* Processing an alt needs some special care. When processing the
first alternative, we don't want to write the alt-code;
When appending something to the alt, it needs to be appended to
every alternative and not at the end of the rule.
*/
/* Look up the ILLEGAL token number */
illegal_num = tokens[g_getcont(illegal_gram)].t_tokno;
in_alt = 0;
for (;;) {
switch(g_gettype(p)) {
case ALTERNATION :
if (in_alt)
fprintf(fgram, "%d,\n", LLALT);
else
in_alt = 1;
save_rule(g_getlink(p)->l_rule, tail);
break;
case TERM :
/* Make entry in term list */
(t_list + t_list_index)->term = g_getterm(p);
/* Test for [] without specifier */
if (g_getterm(p) == (struct term *) 0) {
t_list_index++;
break;
}
(t_list + t_list_index++)->t_nt_num = ++nt_highest;
fprintf(fgram, "%d, ", nt_highest);
/* Check if repetition, if so handle here */
if (r_getkind(g_getterm(p)) == FIXED)
{
int k;
for (k = 1; k < r_getnum(g_getterm(p)); k++)
fprintf(fgram, "%d, ", nt_highest);
}
break;
case NONTERM :
fprintf(fgram, "%d, ", g_getcont(p) + assval);
break;
case TERMINAL:
if (g_getcont(p) == g_getcont(illegal_gram)) {
/* %illegal. Ignore. */
break;
}
if (p->g_erroneous)
fprintf(fgram, "%d, ", illegal_num);
else
fprintf(fgram, "%d, ",
tokens[g_getcont(p)].t_tokno);
break;
case LITERAL:
if (p->g_erroneous)
fprintf(fgram, "%d, ", illegal_num);
else
fprintf(fgram, "%d, ",
tokens[g_getcont(p)].t_tokno);
break;
case ACTION:
if (subpars_sim) {
fprintf(fgram, "%d, ", act_nt);
}
else if (g_getsubparse(p)) {
/* Allocate nonterminal that will simulate
subparser
*/
(sub_list + sub_list_index)->sub_nt_num =
++nt_highest;
(sub_list + sub_list_index++)->sub_action = p;
fprintf(fgram, "%d, ", nt_highest);
}
break;
case EORULE :
if ((! in_alt) && tail )
/* If this rule is not an alt, append tail now.
If it is an alt, the recursive call of this function
has appended tail to each alternative
*/
fprintf(fgram, "%d, ", tail);
return;
}
p++;
}
}
STATIC
save_set(p) p_set p; {
register int k;
register unsigned i;
int j;
j = nbytes;
for (;;) {
i = (unsigned) *p++;
for (k = 0; k < sizeof(int); k++) {
fprintf(fgram,"0%o,",(int)(i & 0377));
i >>= 8;
if (--j == 0) {
fputs("\n",fgram);
return;
}
}
}
/* NOTREACHED */
}
#endif

View file

@ -31,7 +31,7 @@ extern p_set setalloc();
extern p_set get_set(); extern p_set get_set();
extern int setunion(); extern int setunion();
extern int setintersect(); extern int setintersect();
extern setminus(); extern setminus();
extern int setempty(); extern int setempty();
extern int findindex(); extern int findindex();
extern int setcount(); extern int setcount();

View file

@ -14,7 +14,7 @@
/* /*
* tokens.g * tokens.g
* Defines the tokens for the grammar of LLgen. * Defines the tokens for the grammar of LLgen.
* The lexical analyser and LLmessage are also included here. * The lexical analyser and LLmessage are also included here.
*/ */
{ {
@ -30,7 +30,7 @@ static string rcsidc = "$Id$";
/* Here are defined : */ /* Here are defined : */
extern int scanner(); extern int scanner();
extern LLmessage(); extern LLmessage();
extern int input(); extern int input();
extern unput(); extern unput();
extern skipcomment(); extern skipcomment();
@ -39,12 +39,18 @@ STATIC linedirective();
# endif # endif
STATIC string cpy(); STATIC string cpy();
STATIC string vallookup(); STATIC string vallookup();
STATIC copyact();
static int nparams;
} }
/* Classes */ /* Classes */
%token C_IDENT ; /* lextoken.t_string contains the identifier read */ %token C_IDENT ; /* lextoken.t_string contains the identifier read */
%token C_NUMBER ; /* lextoken.t_num contains the number read */ %token C_NUMBER ; /* lextoken.t_num contains the number read */
%token C_LITERAL ; /* lextoken.t_string contains the literal read */ %token C_LITERAL ; /* lextoken.t_string contains the literal read */
%token C_EXPR ; /* A C expression (%if or %while) */
%token C_PARAMS ; /* formal or actual parameters */
%token C_ACTION ; /* a C action */
/* Keywords */ /* Keywords */
@ -60,6 +66,9 @@ STATIC string vallookup();
%token C_AVOID ; %token C_AVOID ;
%token C_PREFER ; %token C_PREFER ;
%token C_DEFAULT ; %token C_DEFAULT ;
%token C_SUBSTART ;
%token C_ERRONEOUS ;
%token C_ILLEGAL ;
%lexical scanner ; %lexical scanner ;
@ -80,26 +89,143 @@ typedef struct keyword {
*/ */
static t_keyw resword[] = { static t_keyw resword[] = {
{ "token", C_TOKEN }, { "token", C_TOKEN },
{ "avoid", C_AVOID }, { "avoid", C_AVOID },
{ "prefer", C_PREFER }, { "prefer", C_PREFER },
{ "persistent", C_PERSISTENT }, { "persistent", C_PERSISTENT },
{ "default", C_DEFAULT }, { "default", C_DEFAULT },
{ "if", C_IF }, { "if", C_IF },
{ "while", C_WHILE }, { "while", C_WHILE },
{ "first", C_FIRST }, { "first", C_FIRST },
{ "start", C_START }, { "start", C_START },
{ "lexical", C_LEXICAL }, { "lexical", C_LEXICAL },
{ "onerror", C_ONERROR }, { "onerror", C_ONERROR },
{ "prefix", C_PREFIX }, { "prefix", C_PREFIX },
{ 0, 0 } #ifdef NON_CORRECTING
{ "substart", C_SUBSTART },
{ "erroneous", C_ERRONEOUS },
{ "illegal", C_ILLEGAL },
#endif
{ 0, 0 }
}; };
static t_token savedtok; /* to save lextoken in case of an insertion */ static t_token savedtok; /* to save lextoken in case of an insertion */
# ifdef LINE_DIRECTIVE # ifdef LINE_DIRECTIVE
static int nostartline; /* = 0 if at the start of a line */ static int nostartline; /* = 0 if at the start of a line */
# endif # endif
STATIC
copyact(ch1,ch2,flag,level) char ch1,ch2; {
/*
* Copy an action to file f. Opening bracket is ch1, closing bracket
* is ch2.
* If flag & 1, copy opening and closing parameters too.
* If flag & 2, don't allow ','.
*/
static int text_seen = 0;
register FILE *f;
register ch; /* Current char */
register match; /* used to read strings */
int saved = linecount;
/* save linecount */
int sav_strip = strip_grammar;
f = fact;
if (ch1 == '{' || flag != 1) strip_grammar = 0;
if (!level) {
text_seen = 0;
nparams = 0; /* count comma's */
putc('\0',f);
fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
}
if (level || (flag & 1)) putc(ch1,f);
for (;;) {
ch = input();
if (ch == ch2) {
if (!level) {
if (text_seen) nparams++;
}
if (level || (flag & 1)) putc(ch,f);
if (strip_grammar != sav_strip) {
if (ch1 == '{' || flag != 1) putchar(ch);
}
strip_grammar = sav_strip;
return;
}
switch(ch) {
case ')':
case '}':
case ']':
error(linecount,"Parentheses mismatch");
break;
case '(':
text_seen = 1;
copyact('(',')',flag,level+1);
continue;
case '{':
text_seen = 1;
copyact('{','}',flag,level+1);
continue;
case '[':
text_seen = 1;
copyact('[',']',flag,level+1);
continue;
case '/':
ch = input();
unput(ch);
if (ch == '*') {
putc('/', f);
skipcomment(1);
continue;
}
ch = '/';
text_seen = 1;
break;
case ';':
case ',':
if (! level && text_seen) {
text_seen = 0;
nparams++;
if (ch == ',' && (flag & 2)) {
warning(linecount, "Parameters may not be separated with a ','");
ch = ';';
}
}
break;
case '\'':
case '"' :
/*
* watch out for brackets in strings, they do not
* count !
*/
text_seen = 1;
match = ch;
putc(ch,f);
while((ch = input())) {
if (ch == match) break;
if (ch == '\\') {
putc(ch,f);
ch = input();
}
if (ch == '\n') {
error(linecount,"Newline in string");
unput(match);
}
putc(ch,f);
}
if (ch == match) break;
/* Fall through */
case EOF :
if (!level) error(saved,"Action does not terminate");
strip_grammar = sav_strip;
return;
default:
if (c_class[ch] != ISSPA) text_seen = 1;
}
putc(ch,f);
}
}
scanner() { scanner() {
/* /*
* Lexical analyser, what else * Lexical analyser, what else
@ -108,7 +234,11 @@ scanner() {
register char *p = ltext; register char *p = ltext;
int reserved = 0; /* reserved word? */ int reserved = 0; /* reserved word? */
char *max = &ltext[LTEXTSZ - 1]; char *max = &ltext[LTEXTSZ - 1];
static int nextexpr;
int expect_expr = nextexpr;
long off;
nextexpr = 0;
if (savedtok.t_tokno) { if (savedtok.t_tokno) {
/* A token has been inserted. /* A token has been inserted.
* Now deliver the last lextoken again * Now deliver the last lextoken again
@ -127,6 +257,21 @@ scanner() {
} }
# endif # endif
switch(c_class[ch]) { switch(c_class[ch]) {
case ISACT :
if (ch == '{') {
copyact('{', '}', in_production, 0);
return C_ACTION;
}
assert(ch == '(');
if (expect_expr) {
copyact('(', ')', 1, 0);
return C_EXPR;
}
off = ftell(fact);
copyact('(', ')', in_production != 0 ? 0 : 2, 0);
if (nparams == 0) fseek(fact, off, 0);
lextoken.t_num = nparams;
return C_PARAMS;
case ISLIT : case ISLIT :
for (;;) { for (;;) {
ch = input(); ch = input();
@ -177,7 +322,7 @@ scanner() {
unput(ch); unput(ch);
*p = '\0'; *p = '\0';
if (reserved) { /* if (reserved) { /*
* Now search for the keyword * Now search for the keyword
*/ */
register p_keyw w; register p_keyw w;
@ -187,6 +332,10 @@ scanner() {
/* /*
* Return token number. * Return token number.
*/ */
if (w->w_value == C_IF ||
w->w_value == C_WHILE) {
nextexpr = 1;
}
return w->w_value; return w->w_value;
} }
w++; w++;
@ -208,11 +357,11 @@ input() {
*/ */
register c; register c;
if (c = backupc) { if (c = backupc) {
/* Last char was "unput()". Deliver it again /* Last char was "unput()". Deliver it again
*/ */
backupc = 0; backupc = 0;
return c; return c;
} }
if ((c = getc(finput)) == EOF) { if ((c = getc(finput)) == EOF) {
nonline = 0; nonline = 0;
@ -337,7 +486,7 @@ cpy(s,p,inserted) register string p; {
register string t = 0; register string t = 0;
switch(s) { switch(s) {
case C_IDENT : case C_IDENT :
if (!inserted) t = lextoken.t_string; if (!inserted) t = lextoken.t_string;
else t = "identifier"; else t = "identifier";
break; break;
@ -353,7 +502,7 @@ cpy(s,p,inserted) register string p; {
t = "literal"; t = "literal";
break; break;
case EOFILE : case EOFILE :
t = "endoffile"; t = "end-of-file";
break; break;
} }
if (!t && (t = vallookup(s))) { if (!t && (t = vallookup(s))) {
@ -382,13 +531,15 @@ cpy(s,p,inserted) register string p; {
case '\r' : *p++ = 'r'; break; case '\r' : *p++ = 'r'; break;
case '\t' : *p++ = 't'; break; case '\t' : *p++ = 't'; break;
default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07); default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
*p++='0'+(s&07); *p++='0'+(s&07);
} }
} }
*p++ = '\''; *p++ = '\'';
return p; return p;
} }
string strcpy();
LLmessage(d) { LLmessage(d) {
/* /*
* d is either 0, in which case the current token has been deleted, * d is either 0, in which case the current token has been deleted,
@ -400,9 +551,16 @@ LLmessage(d) {
nerrors++; nerrors++;
s = buf; s = buf;
if (d == 0) { if (d < 0) {
s = cpy(LLsymb,s,0); strcpy(buf, "end-of-file expected");
}
else if (d == 0) {
#ifdef LLNONCORR
t = " unexpected";
#else
t = " deleted"; t = " deleted";
#endif
s = cpy(LLsymb,s,0);
do *s++ = *t; while (*t++); do *s++ = *t; while (*t++);
} else { } else {
s = cpy(d,s,1); s = cpy(d,s,1);
@ -411,12 +569,7 @@ LLmessage(d) {
s = cpy(LLsymb,s,0); s = cpy(LLsymb,s,0);
*s = '\0'; *s = '\0';
} }
error(linecount, "%s", buf); if (d > 0) { /*
/* Don't change this line to
* error(linecount, buf).
* The string in "buf" might contain '%' ...
*/
if (d) { /*
* Save the current token and make up some * Save the current token and make up some
* attributes for the inserted token * attributes for the inserted token
*/ */
@ -426,5 +579,17 @@ LLmessage(d) {
else if (d == C_LITERAL) lextoken.t_string = "dummy_literal"; else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
else if (d == C_NUMBER) lextoken.t_num = 1; else if (d == C_NUMBER) lextoken.t_num = 1;
} }
#ifdef LLNONCORR
else
#endif
error(linecount, "%s", buf);
/* Don't change this line to
* error(linecount, buf).
* The string in "buf" might contain '%' ...
*/
#ifdef LLNONCORR
in_production = 1;
/* To prevent warnings from copyact */
#endif
} }
} }

View file

@ -40,12 +40,20 @@ typedef struct token {
* structure for the grammar elements * structure for the grammar elements
*/ */
typedef struct gram { typedef struct gram {
short x; /* for lay-out see comment below */ int x; /* for lay-out see comment below */
short g_lineno; /* element found on this line number */ int g_lineno; /* element found on this line number */
#ifdef NON_CORRECTING
int g_erroneous; /* 1 if element declared erroneous */
#endif
union { union {
int g_index; int g_index;
struct term * g_term; struct term * g_term;
struct link * g_link; struct link * g_link;
#ifdef NON_CORRECTING
/* If this is an action with a %substart g_subparse
points to the list of startsymbols of the subparser */
struct ff_firsts *g_subparse;
#endif
} g_i; } g_i;
} t_gram,*p_gram; } t_gram,*p_gram;
@ -78,7 +86,10 @@ typedef struct gram {
# define g_setterm(p,s) ((p)->g_i.g_term = (s)) # define g_setterm(p,s) ((p)->g_i.g_term = (s))
# define g_setlink(p,s) ((p)->g_i.g_link = (s)) # define g_setlink(p,s) ((p)->g_i.g_link = (s))
# define g_setnpar(p,s) { assert(((unsigned)(s))<=017);(p)->x=((p)->x&~0170)|((s)<<3);} # define g_setnpar(p,s) { assert(((unsigned)(s))<=017);(p)->x=((p)->x&~0170)|((s)<<3);}
#ifdef NON_CORRECTING
# define g_getsubparse(p) ((p)->g_i.g_subparse)
# define g_setsubparse(p,s) ((p)->g_i.g_subparse = (s))
#endif
/* /*
* Some constants to communicate with the symbol table search routine * Some constants to communicate with the symbol table search routine
*/ */
@ -101,7 +112,7 @@ typedef struct gram {
* nonterminal structure * nonterminal structure
*/ */
typedef struct { typedef struct {
short n_flags; /* low order four bits are reserved int n_flags; /* low order four bits are reserved
* the parameter count * the parameter count
*/ */
# define getntparams(p) ((p)->n_flags&017) # define getntparams(p) ((p)->n_flags&017)
@ -110,7 +121,7 @@ typedef struct {
# define RECURSIVE 02000 /* Set if the default rule is recursive */ # define RECURSIVE 02000 /* Set if the default rule is recursive */
# define PARAMS 04000 /* tells if a nonterminal has parameters */ # define PARAMS 04000 /* tells if a nonterminal has parameters */
# define EMPTY 010000 /* tells if a nonterminal produces empty */ # define EMPTY 010000 /* tells if a nonterminal produces empty */
# define LOCALS 020000 /* local declarations ? */ # define LOCALS 020000 /* local declarations ? */
# define REACHABLE 040000 /* can this nonterminal be reached ? */ # define REACHABLE 040000 /* can this nonterminal be reached ? */
# define VERBOSE 0100000 /* Set if in LL.output file */ # define VERBOSE 0100000 /* Set if in LL.output file */
char n_insafety; char n_insafety;
@ -119,8 +130,8 @@ typedef struct {
# define setntsafe(p,i) {assert(((unsigned)(i))<=NOSAFETY);(p)->n_insafety=(i);} # define setntsafe(p,i) {assert(((unsigned)(i))<=NOSAFETY);(p)->n_insafety=(i);}
# define getntout(p) ((p)->n_outsafety) # define getntout(p) ((p)->n_outsafety)
# define setntout(p,i) {assert(((unsigned)(i))<=NOSAFETY);(p)->n_outsafety=(i);} # define setntout(p,i) {assert(((unsigned)(i))<=NOSAFETY);(p)->n_outsafety=(i);}
short n_count; /* pieces of code before this rule */ int n_count; /* pieces of code before this rule */
short n_lineno; /* declared on line ... */ int n_lineno; /* declared on line ... */
p_gram n_rule; /* pointer to right hand side of rule */ p_gram n_rule; /* pointer to right hand side of rule */
union { union {
p_set n_f; /* ptr to "first" set */ p_set n_f; /* ptr to "first" set */
@ -131,6 +142,10 @@ typedef struct {
} n_x; } n_x;
# define n_first n_x.n_f # define n_first n_x.n_f
# define n_string n_x.n_s # define n_string n_x.n_s
#ifdef NON_CORRECTING
p_set n_nc_first; /* Pointer to non-corr first set */
p_set n_nc_follow; /* Pointer to non-corr follow set */
#endif
p_set n_follow; /* pointer to the "follow" set */ p_set n_follow; /* pointer to the "follow" set */
p_set n_contains; /* pointer to symbols that can be produced */ p_set n_contains; /* pointer to symbols that can be produced */
string n_name; /* name of nonterminal */ string n_name; /* name of nonterminal */
@ -138,7 +153,7 @@ typedef struct {
long n_off; /* index of parameters in action file */ long n_off; /* index of parameters in action file */
} t_nont, *p_nont; } t_nont, *p_nont;
/* /*
* hash table structure * hash table structure
*/ */
typedef struct h_entry { typedef struct h_entry {
@ -161,13 +176,16 @@ typedef struct link {
*/ */
p_gram l_rule; /* pointer to this rule */ p_gram l_rule; /* pointer to this rule */
p_set l_symbs; /* set, when to take this rule */ p_set l_symbs; /* set, when to take this rule */
#ifdef NON_CORRECTING
p_set l_nc_symbs;
#endif
p_set l_others; /* set, when to take another rule */ p_set l_others; /* set, when to take another rule */
} t_link, *p_link; } t_link, *p_link;
/* /*
* Structure for a repitition specification * Structure for a repitition specification
*/ */
typedef short t_reps,*p_reps; typedef int t_reps,*p_reps;
# define FIXED 00 /* a fixed number */ # define FIXED 00 /* a fixed number */
# define STAR 01 /* 0 or more times */ # define STAR 01 /* 0 or more times */
@ -187,7 +205,7 @@ typedef short t_reps,*p_reps;
*/ */
typedef struct term { typedef struct term {
t_reps t_repeats; t_reps t_repeats;
short t_flags; /* Low order three bits for safety */ int t_flags; /* Low order three bits for safety */
# define gettout(q) ((q)->t_flags&07) # define gettout(q) ((q)->t_flags&07)
# define settout(q,i) {assert(((unsigned)(i))<=NOSAFETY);(q)->t_flags&=~07;(q)->t_flags|=i;} # define settout(q,i) {assert(((unsigned)(i))<=NOSAFETY);(q)->t_flags&=~07;(q)->t_flags|=i;}
# define PERSISTENT 010 /* Set if this term has %persistent */ # define PERSISTENT 010 /* Set if this term has %persistent */
@ -199,6 +217,10 @@ typedef struct term {
p_gram t_rule; /* pointer to this term */ p_gram t_rule; /* pointer to this term */
p_set t_follow; /* set of followers */ p_set t_follow; /* set of followers */
p_set t_first; /* set of firsts */ p_set t_first; /* set of firsts */
#ifdef NON_CORRECTING
p_set t_nc_first; /* set of non corr firsts */
p_set t_nc_follow; /* set of non corr followers */
#endif
p_set t_contains; /* contains set */ p_set t_contains; /* contains set */
} t_term, *p_term; } t_term, *p_term;