/* * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. * * This product is part of the Amsterdam Compiler Kit. * * Permission to use, sell, duplicate or disclose this software must be * obtained in writing. Requests for such permissions may be sent to * * Dr. Andrew S. Tanenbaum * Wiskundig Seminarium * Vrije Universiteit * Postbox 7161 * 1007 MC Amsterdam * The Netherlands * */ /* * L L G E N * * An Extended LL(1) Parser Generator * * Author : Ceriel J.H. Jacobs */ /* * gencode.c * Defines the routine "gencode", which generates the parser * we wanted so badly. * This file is a mess, it should be cleaned up some time. */ # include "types.h" # include "io.h" # include "tunable.h" # include "extern.h" # include "sets.h" # include "assert.h" # ifndef NORCSID static string rcsid3 = "$Header$"; # endif NORCSID /* * Some codestrings used more than once */ static string c_arrend = "0 };\n"; static string c_close = "}\n"; static string c_LLptrmin = "LLptr++;\n"; static string c_break = "break;\n"; static string c_read = "LLread();\n"; /* Some constants used for reading from the action file */ # define ENDDECL 0400 # define IDENT 0401 static int nlabel; /* count for the generation of labels */ static int nvar; /* count for generation of variables */ static int pushlist[100]; static int *ppushlist; static int *lists,*maxlists,*plists; p_mem ralloc(),alloc(); /* In this file the following routines are defined: */ extern gencode(); STATIC opentemp(); STATIC geninclude(); STATIC genrecovery(); STATIC string genname(); STATIC generate(); STATIC prset(); STATIC macro(); STATIC controlline(); STATIC getparams(); STATIC gettok(); STATIC rulecode(); STATIC int dopush(); STATIC pushcode(); STATIC getaction(); STATIC codeforterm(); STATIC genifhead(); STATIC gencases(); STATIC genpush(); /* Macro to print a terminal */ # define PTERM(f,p) fprintf(f,(p)->h_num<0400?"'%s'":"%s",(p)->h_name) gencode(argc) { register p_file p = files; /* Generate include file Lpars.h */ geninclude(); /* Set up for code generation */ if ((fact = fopen(f_temp,"r")) == NULL) { fatal(0,e_noopen,f_temp); } lists = (int *) alloc(50 * sizeof(int)); plists = lists; maxlists = lists+49; /* For every source file .... */ while (argc--) { /* Open temporary */ f_input = p->f_name; opentemp(f_input); /* generate code ... */ copyfile(0); generate(p); getaction(2); if (ferror(fpars) != 0) { fatal(0,"Write error on temporary"); } fclose(fpars); /* And install */ install(genname(p->f_name),p->f_name); p++; } genrecovery(); } STATIC opentemp(str) string str; { if ((fpars = fopen(f_pars,"w")) == NULL) { fatal(0,e_noopen,f_pars); } fprintf(fpars,LLgenid,str ? str : "."); } STATIC geninclude() { register p_entry p; register FILE *f; opentemp((string) 0); f = fpars; for (p = h_entry; p < max_t_ent; p++) { if (p->h_num >= 0400) { fprintf(f,"# define %s %d\n", p->h_name,p->h_num); } } if (ferror(f) != 0) { fatal(0,"write error on temporary"); } fclose(f); install(HFILE, (string) 0); } STATIC genrecovery() { register FILE *f; register p_entry t; register int i; register int *q; int index[256+NTERMINALS]; register p_start st; opentemp((string) 0); f = fpars; copyfile(0); /* Now generate the routines that call the startsymbols */ fputs("#define LLSTSIZ 1024\n",f); for (i = 1, st = start; st; st = st->ff_next) { i++; fputs(st->ff_name, f); fputs("() {\n\tint LLstack[LLSTSIZ];\n\tLLnewlevel(LLstack);\n\tLLread();\n", f); if (g_gettype(st->ff_nont->n_rule) == ALTERNATION) { fprintf(f, "\tLLxx.LLx_p--; *LLxx.LLx_p = %d;\n", findindex(&(st->ff_nont->n_contains))); } fprintf(f, "\tL%d_%s();\n", st->ff_nont-nonterms, (min_nt_ent+(st->ff_nont-nonterms))->h_name); if (st->ff_nont->n_flags & NNOSCAN) { fputs("\tLLscan(EOFILE);\n",f); } else fputs("\tif (LLsymb != EOFILE) LLerror(EOFILE);\n",f); fputs("\tLLoldlevel();\n}\n",f); } fprintf(f,"#define LL_MAX %d\n#define LL_LEXI %s\n", i, lexical); fputs("static short LLlists[] = {\n", f); /* Now generate lists */ q = lists; while (q < plists) { fprintf(f,"%d,\n",*q++); } fputs(c_arrend, f); /* Now generate the sets */ fputs("char LLsets[] = {\n",f); for (i = 0; i < maxptr-setptr; i++) prset(setptr[i]); fputs(c_arrend, f); for (q = index; q <= &index[255 + NTERMINALS];) *q++ = -1; for (t = h_entry; t < max_t_ent; t++) { index[t->h_num] = t - h_entry; } fputs("short LLindex[] = {\n",f); for (q = index; q <= &index[assval-1]; q++) { fprintf(f, "%d,\n", *q); } fputs(c_arrend, f); copyfile(1); if (ferror(f) != 0) { fatal(0,"write error on temporary"); } fclose(f); install(RFILE, (string) 0); } STATIC generate(f) p_file f; { /* * Generates a parsing routine for every nonterminal */ register short *s; register p_nont p; register FILE *fd; int i; p_first ff; int mustpop; /* Generate first sets */ for (ff = f->f_firsts; ff; ff = ff->ff_next) { macro(ff->ff_name,ff->ff_nont); } /* For every nonterminal generate a function */ fd = fpars; for (s = f->f_start; s <= f->f_end; s++) { p = &nonterms[*s]; /* Generate functions in the order in which the nonterminals * were defined in the grammar. This is important, because * of synchronisation with the action file */ while (p->n_count--) getaction(1); if (p->n_flags & PARAMS) controlline(); fprintf(fd,"L%d_%s (",*s,(min_nt_ent + *s)->h_name); if (p->n_flags & PARAMS) getparams(); else fputs(") {\n", fd); fputs("register struct LLxx *LLx = &LLxx;\n#ifdef lint\nLLx=LLx;\n#endif\n", fd); if (p->n_flags & LOCALS) getaction(1); i = getntsafe(p->n_flags); mustpop = 0; if (g_gettype(p->n_rule) == ALTERNATION) { mustpop = 1; if (i == NOSCANDONE) { fputs(c_read, fd); i = SCANDONE; } } nlabel = 1; rulecode(p->n_rule, i, !(p->n_flags & NNOSCAN), mustpop); fputs(c_close, fd); } } STATIC prset(p) p_set p; { register int k; register unsigned i; int j; j = NBYTES(nterminals); for (;;) { i = (unsigned) *p++; for (k = 0; k < sizeof(int); k++) { fprintf(fpars,"0%o,",(i & 0377)); i >>= 8; if (--j == 0) { fputs("\n",fpars); return; } } } /* NOTREACHED */ } STATIC macro(s,n) string s; p_nont n; { register FILE *f; int i; f = fpars; i = findindex(&(n->n_first)); fprintf(f,"#define %s(x) ", s); if (i < 0) { fprintf(f, "((x) == %d)\n", -i); return; } fprintf(f,"LLfirst((x), %d)\n", i); } STATIC controlline() { /* Copy a compiler control line */ register int l; register FILE *f1,*f2; f1 = fact; f2 = fpars; l = getc(f1); assert(l == '\0'); l = getc(f1); putc(l,f2); assert( l == '#' ) ; do { l = getc(f1); putc(l,f2); } while ( l != '\n' ) ; } STATIC getparams() { /* getparams is called if a nonterminal has parameters. The names * of the parameters have to be found, and they should be declared */ long off; register int l; register FILE *f; long ftell(); char first; first = ' '; f = fpars; /* save offset in file to be able to copy the declaration later */ off = ftell(fact); /* First pass through declaration, find the parameter names */ while ((l = gettok()) != ENDDECL) { if (l == ';' || l == ',') { /* * The last identifier found before a ';' or a ',' * must be a parameter */ fprintf(f,"%c%s", first, ltext); first = ','; } } fputs(") ",f); /* * Now copy the declarations */ fseek(fact,off,0); getaction(0); fputs(" {\n",f); } STATIC gettok() { /* Read from the action file. */ register int ch; register string c; register FILE *f; f = fact; ch = getc(f); switch(ch) { case '\n': ch = getc(f); if (ch != EOF) { ungetc(ch,f); if (ch != '\0') return '\n'; } return ENDDECL; case '\0': ungetc(ch,f); /* Fall through */ case EOF : return ENDDECL; default : if (isalpha(ch) || ch == '_') { c = ltext; while (isalnum(ch) || ch == '_') { *c++ = ch; if (c-ltext >= LTEXTSZ) --c; ch = getc(f); } if (ch != EOF) ungetc(ch,f); *c = '\0'; return IDENT; } return ch; } } STATIC rulecode(p,safety,mustscan,mustpop) register p_gram p; { /* * Code for a production rule. */ register int toplevel = 1; register FILE *f; /* * First generate code to push the contains sets of this rule * on a stack */ ppushlist = pushlist; if (dopush(p,safety,1) > 0) pushcode(); f = fpars; for (;;) { switch (g_gettype(p)) { case EORULE : if (mustscan && safety == NOSCANDONE) { fputs(c_read,f); } return; case TERMINAL : { register p_entry t; t = &h_entry[g_getcont(p)]; if (toplevel == 0 && safety != NOSCANDONE) { fputs(c_LLptrmin,f); } if (safety == SAFE) { fputs("LL_SAFE(",f); } else if (safety <= SCANDONE) { fputs("LL_SCANDONE(",f); } else if (safety == NOSCANDONE) { if (toplevel != 0) { fputs("LL_T_NOSCANDONE(", f); } else fputs("LL_N_NOSCANDONE(", f); } PTERM(f,t); fputs(");\n", f); if (safety == SAFE && toplevel > 0) { safety = NOSCANDONE; toplevel = -1; p++; continue; } safety = NOSCANDONE; break; } case NONTERM : { p_entry t; register p_nont n; int params; n = &nonterms[g_getnont(p)]; t= min_nt_ent+(n-nonterms); if (safety == NOSCANDONE && getntsafe(n->n_flags) < NOSCANDONE) fputs(c_read, f); if (toplevel == 0 && g_gettype(n->n_rule) != ALTERNATION) { fputs(c_LLptrmin, f); } params = g_getnpar(p); if (params) controlline(); fprintf(f,"L%d_%s(",n-nonterms, t->h_name); if (params) getaction(0); fputs(");\n",f); safety = NOSCANDONE; if (!(n->n_flags & NNOSCAN)) safety = SCANDONE; break; } case TERM : safety = codeforterm((p_term) pentry[g_getcont(p)], safety, toplevel); break; case ACTION : getaction(1); p++; continue; case ALTERNATION : alternation(p, safety, mustscan,mustpop, 0); return; } p++; toplevel = 0; } } alternation(p, safety, mustscan, mustpop, lb) register p_gram p; { register FILE *f = fpars; register p_link l; int hulp, hulp1,hulp2; int var; int haddefault = 0; int unsafe = 1; p_set set; p_set setalloc(); assert(safety < NOSCANDONE); l = (p_link) pentry[g_getcont(p)]; hulp = nlabel++; hulp1 = nlabel++; hulp2 = nlabel++; var = nvar++; if (!lb) lb = hulp1; if (safety <= SAFESCANDONE) unsafe = 0; if (unsafe && hulp1 == lb) fprintf(f,"L_%d: ", hulp1); else if (mustpop) { mustpop = 0; fputs(c_LLptrmin, f); } if (unsafe) { fprintf(f,"{ int LL_%d = 0;\n", var); } fputs("switch(LLcsymb) {\n", f); while (g_gettype(p) != EORULE) { l = (p_link) pentry[g_getcont(p)]; if (unsafe && (l->l_flag & DEF)) { haddefault = 1; fprintf(f, "default: if (!LL_%d && LLskip()) {LL_%d = 1; goto L_%d;}\ngoto L_%d;\n", var, var, lb, hulp2); } if (l->l_flag & COND) { set = setalloc(tsetsize); setunion(set, l->l_others, tsetsize); setintersect(set, l->l_symbs, tsetsize); setminus(l->l_symbs, set, tsetsize); setminus(l->l_others, set, tsetsize); gencases(set); free((p_mem) set); controlline(); fputs("if (!",f); getaction(0); fprintf(f,") goto L_%d;\n", hulp); } if (!unsafe && (l->l_flag & DEF)) { fputs("default:\n", f); haddefault = 1; } else gencases(l->l_symbs); if (l->l_flag & DEF) { if (unsafe) { fprintf(f,"L_%d: ;\n", hulp2); } if (mustpop) fputs(c_LLptrmin, f); rulecode(l->l_rule, SAFESCANDONE, mustscan, 0); } else { if (mustpop) fputs(c_LLptrmin, f); rulecode(l->l_rule, SAFE, mustscan, 0); } fputs(c_break,f); if (l->l_flag & COND) { if (!haddefault) { fputs("default:\n", f); } else { gencases(l->l_others); safety = SAFE; } fprintf(f,"L_%d : ;\n",hulp); p++; if (!unsafe && g_gettype(p+1) == EORULE) { if (mustpop) fputs(c_LLptrmin, f); rulecode(((p_link)pentry[g_getcont(p)])->l_rule, safety,mustscan,0); } else alternation(p,safety,mustscan,mustpop,lb); break; } p++; } fputs(c_close, f); if (unsafe) fputs(c_close, f); return; } STATIC int dopush(p,safety,toplevel) register p_gram p; { /* * The safety only matters if toplevel != 0 */ register int count = 0; for (;;) { switch(g_gettype(p)) { case EORULE : case ALTERNATION : return count; case TERM : { register p_term q; q = (p_term) pentry[g_getcont(p)]; count += dopush(p+1,SCANDONE,0); *ppushlist++ = findindex(&(q->t_contains)); return count+1; } case TERMINAL : if (toplevel > 0 && safety == SAFE) { count += dopush(p+1,NOSCANDONE,-1); } else count += dopush(p+1, NOSCANDONE, 0); if (toplevel != 0) { return count; } *ppushlist++ = -h_entry[g_getcont(p)].h_num; return count + 1; case NONTERM : { register p_nont n; n = &nonterms[g_getnont(p)]; count += dopush(p+1, SCANDONE, 0); if (toplevel == 0 || g_gettype(n->n_rule) == ALTERNATION) { *ppushlist++ = findindex(&n->n_contains); count++; } return count; } } p++; } } # define max(a,b) ((a) < (b) ? (b) : (a)) STATIC pushcode() { register int i,j,k; register int *p = pushlist; if ((i = ppushlist - p) == 0) return; if (i <= 2) { genpush(p[0]); if (i == 2) genpush(p[1]); return; } fprintf(fpars,"\LLlpush(%d, %d);\n",plists-lists, i); if (maxlists - plists < i) { j = plists - lists; k = maxlists-lists+max(i+1,50); lists = (int *) ralloc( (p_mem)lists, (unsigned)(k+1)*sizeof(int)); plists = lists+j; maxlists = lists+k; } while (i--) { *plists++ = *p++; } } STATIC getaction(flag) { /* Read an action from the action file. * flag = 1 if it is an action, * 0 when reading parameters */ register int match,ch; register FILE *f; register int newline; int mark = 0; if (flag == 1) { controlline(); } f = fpars; newline = 0; for (;;) { ch = gettok(); switch(ch) { case ENDDECL: if (flag != 2) break; ch = getc(fact); assert(ch == '\0'); fputs("\n",f); if (mark) return; mark = 1; continue; case '\n': newline = 1; break; case '\'' : case '"' : if (newline) { newline = 0; } match = ch; putc(ch,f); while (ch = getc(fact)) { if (ch == match) break; if (ch == '\\') { putc(ch,f); ch = getc(fact); } putc(ch,f); } break; case IDENT : if (newline) { newline = 0; } fputs(ltext,f); continue; } mark = 0; if (ch == ENDDECL) break; if (newline && ch != '\n') { newline = 0; } putc(ch,f); } if (flag) fputs("\n",f); } STATIC codeforterm(q,safety,toplevel) register p_term q; { /* * Generate code for a term */ register FILE *f; register int i; register int rep; int persistent; int noscan; f = fpars; i = r_getnum(&(q->t_reps)); rep = r_getkind(&(q->t_reps)); persistent = (q->t_flags & PERSISTENT); noscan = (q->t_flags & TNOSCAN); if (safety == NOSCANDONE && (rep != FIXED || i == 0)) { fputs(c_read, f); if (rep == FIXED && g_gettype(q->t_rule) != ALTERNATION) { fputs(c_LLptrmin, f); } } if (i) { /* N > 0, so generate fixed forloop */ fprintf(f,"{\nregister LL_i = %d;\n",i); fputs("for (;;) {\nif (!LL_i--) {\nLLptr++;\n", f); fputs("break;\n}\n", f); if (rep == FIXED) { if (noscan && safety == NOSCANDONE) { fputs(c_read,f); } } } else if (rep != OPT && rep != FIXED) { /* '+' or '*', so generate infinite loop */ fputs("for (;;) {\n",f); } if (rep == STAR || rep == OPT) { genifhead(q,rep); } rulecode(q->t_rule,t_safety(rep,i,persistent,safety), rep != FIXED || !noscan, rep == FIXED && i == 0 && g_gettype(q->t_rule) == ALTERNATION); /* in the case of '+', the if is after the code for the rule */ if (rep == PLUS) { if (!persistent) { fprintf(f, "*LLptr = %d;\n", findindex(&(q->t_first))); } genifhead(q,rep); } if (rep != OPT && rep != FIXED) fputs("continue;\n", f); if (rep != FIXED) { fputs(c_close, f); /* Close switch */ if (rep != OPT) { fputs("break;\n", f); } } if (rep != OPT && (rep != FIXED || i > 0)) { fputs(c_close, f); /* Close for */ if (i > 0) { fputs(c_close, f);/* Close Register ... */ } } if (rep != FIXED || !noscan) return SCANDONE; return NOSCANDONE; } STATIC genifhead(q,rep) register p_term q; { /* * Generate if statement for term q */ register FILE *f; p_set p1; p_set setalloc(); int hulp, hulp1; f = fpars; hulp = nlabel++; hulp1 = nlabel++; fprintf(f, "L_%d : switch(LLcsymb) {\n", hulp); if (q->t_flags & RESOLVER) { p1 = setalloc(tsetsize); setunion(p1,q->t_first,tsetsize); setintersect(p1,q->t_follow,tsetsize); /* * p1 now points to a set containing the conflicting * symbols */ setminus(q->t_first, p1, tsetsize); setminus(q->t_follow, p1, tsetsize); gencases(p1); free((p_mem) p1); controlline(); fputs("if (", f); getaction(0); fprintf(f, ") goto L_%d;\n", hulp1); } gencases(q->t_follow); fputs("LLptr++; break;\n", f); fprintf(f, "default: if (!LLnext()) goto L_%d;\n", hulp); gencases(q->t_first); if (q->t_flags & RESOLVER) { fprintf(f, "L_%d : ;\n", hulp1); } if (rep == OPT) { fputs(c_LLptrmin,f); } } STATIC gencases(setp) register p_set setp; { /* * setp points to a bitset indicating which cases must * be generated. * YECH, the PCC compiler does not accept many cases without statements * inbetween, so after every case label an empty statement is * generated. * The C-grammar used by PCC is really stupid on this point : * it contains the rule * statement : label statement * which is right-recursive, and as is well known, LALR parsers don't * handle these things very good. * The grammar should have been written : * labeledstatement : labels statement ; * labels : labels label | ; */ register p_entry p; register i; p = h_entry; for (i=0; i < nterminals; i++) { if (IN(setp,i)) { fprintf(fpars, p->h_num<0400 ? "case /* '%s' */ %d : ;\n" : "case /* %s */ %d : ;\n", p->h_name, i); } p++; } } STATIC genpush(d) { fprintf(fpars, "LLptr--;\n*LLptr = %d;\n",d); } static char namebuf[20]; STATIC string genname(s) string s; { /* * Generate a target file name from the * source file name s. */ register string c,d; c = namebuf; while (*s) { if (*s == '/') { while (*s == '/') s++; if (*s) c = namebuf; else break; } *c++ = *s++; } for (d = c; --d > namebuf;) if (*d == '.') break; if (d == namebuf) d = c; if (d >= &namebuf[12]) { fatal(0,"%s : filename too long",namebuf); } *d++ = '.'; *d++ = 'c'; *d = '\0'; return namebuf; }