From 3b2d51a96b59b81e86ce2118d8110b12ba3a980f Mon Sep 17 00:00:00 2001 From: ceriel Date: Mon, 24 Nov 1986 20:42:13 +0000 Subject: [PATCH] Initial revision --- util/topgen/LLlex.c | 129 ++++++++++++++++++ util/topgen/Makefile | 40 ++++++ util/topgen/hash.c | 83 ++++++++++++ util/topgen/main.c | 56 ++++++++ util/topgen/misc.h | 10 ++ util/topgen/pattern.c | 129 ++++++++++++++++++ util/topgen/symtab.c | 48 +++++++ util/topgen/symtab.h | 18 +++ util/topgen/token.h | 10 ++ util/topgen/topgen.g | 307 ++++++++++++++++++++++++++++++++++++++++++ util/topgen/tunable.h | 11 ++ 11 files changed, 841 insertions(+) create mode 100644 util/topgen/LLlex.c create mode 100644 util/topgen/Makefile create mode 100644 util/topgen/hash.c create mode 100644 util/topgen/main.c create mode 100644 util/topgen/misc.h create mode 100644 util/topgen/pattern.c create mode 100644 util/topgen/symtab.c create mode 100644 util/topgen/symtab.h create mode 100644 util/topgen/token.h create mode 100644 util/topgen/topgen.g create mode 100644 util/topgen/tunable.h diff --git a/util/topgen/LLlex.c b/util/topgen/LLlex.c new file mode 100644 index 000000000..3b4b0f534 --- /dev/null +++ b/util/topgen/LLlex.c @@ -0,0 +1,129 @@ +/* L L l e x . c + * + * Very simple lexical analyzer. + * Also contains LLmessage(). + */ +# include +# include +# include "tunable.h" +# include "token.h" +# include "Lpars.h" + +struct token dot; /* current token */ +static struct token aside; /* to put currrent token aside, when a token + * is inserted + */ +int newline, lineno; /* To keep track of linenumbers */ +extern FILE *input; /* file descriptor of machine table */ + +LLlex() { + register c; + + if (aside.t_tokno) { /* A token was pushed aside, return it now */ + dot = aside; + aside.t_tokno = 0; + return dot.t_tokno; + } + if (newline) { /* delayed increment of lineno, needed to give + * correct line numbers in error messages + */ + lineno++; + newline = 0; + } + c = getc(input); + while (c == '/') { /* Could be a comment */ + if ((c = getc(input)) == '*') { + /* Yes, it is a comment */ + int l; + + l = lineno; + do { + do { + if (c == '\n') lineno++; + c = getc(input); + } while (c != '*' && c != EOF); + if (c != EOF) c = getc(input); + } while (c != '/' && c != EOF); + if (c == EOF) { + c = lineno; + lineno = l; + error("Unterminated comment"); + lineno = c; + c = EOF; + } + else c = getc(input); + } + else { + ungetc(c, stdin); + c = '/'; + break; + } + } + dot.t_tokno = c; + dot.t_attrib = c; + if (isupper(c) || islower(c) || c == '_') { + dot.t_tokno = LETTER; + return LETTER; + } + if (isdigit(c)) { + dot.t_tokno = DIGIT; + return DIGIT; + } + switch(c) { + case line_term : + dot.t_tokno = LINE_TERMINATOR; + return LINE_TERMINATOR; + case operand_sep : + dot.t_tokno = OPERAND_SEPARATOR; + return OPERAND_SEPARATOR; + case instruction_sep : + dot.t_tokno = INSTRUCTION_SEPARATOR; + return INSTRUCTION_SEPARATOR; + case '-' : + c = getc(input); + if (c == '>') { + dot.t_tokno = PATTERN_SEPARATOR; + return PATTERN_SEPARATOR; + } + ungetc(c,stdin); + dot.t_tokno = OTHER; + return OTHER; + case open_bracket : + dot.t_tokno = OPEN_BRACKET; + return OPEN_BRACKET; + case close_bracket : + dot.t_tokno = CLOSE_BRACKET; + return CLOSE_BRACKET; + case '\n' : + newline = 1; + /* Fall through */ + case ' ' : + case '\t' : + dot.t_tokno = SPACE; + return SPACE; + case '%' : + case EOF : + return c; + default : + /* Let the C-compiler find out what is illegal! */ + dot.t_tokno = OTHER; + return OTHER; + } +} + +LLmessage(d) { + static int savlineno; + + if (savlineno != lineno) { + /* Only an error message if on another line number */ + savlineno = lineno; + error("Syntax error"); + } + if (d > 0) { + /* "d" is the token to be inserted. + * This is the place to put the current token aside and + * give the inserted token an attribute ... but who cares + */ + aside = dot; + } +} diff --git a/util/topgen/Makefile b/util/topgen/Makefile new file mode 100644 index 000000000..2feeeae72 --- /dev/null +++ b/util/topgen/Makefile @@ -0,0 +1,40 @@ +EM = ../.. +CFLAGS = -O +SOURCE = token.h symtab.h misc.h tunable.h main.c topgen.g LLlex.c symtab.c pattern.c hash.c +CFILES = main.c topgen.c Lpars.c LLlex.c symtab.c pattern.c hash.c +OFILES = main.o topgen.o Lpars.o LLlex.o symtab.o pattern.o hash.o + +all: parser + @make topgen + +cmp: all + cmp topgen $(EM)/lib/topgen + +install: all + cp topgen $(EM)/lib/topgen + +clean: + rm -f topgen *.o Lpars.c Lpars.h topgen.c parser + +parser: topgen.g + $(EM)/bin/LLgen topgen.g + touch parser + +topgen.o: token.h Lpars.h symtab.h misc.h +Lpars.o: Lpars.h +LLlex.o: token.h Lpars.h tunable.h +symtab.o: symtab.h +hash.o: misc.h +pattern.o: misc.h symtab.h + +topgen: $(OFILES) + cc $(OFILES) -o topgen + +lint: parser + lint $(CFILES) + +pr: + @pr $(SOURCE) Makefile + +opr: + make pr ^ opr diff --git a/util/topgen/hash.c b/util/topgen/hash.c new file mode 100644 index 000000000..c8c458269 --- /dev/null +++ b/util/topgen/hash.c @@ -0,0 +1,83 @@ +/* h a s h . c + * + * maintains the the lists of hashed patterns + * Functions : addtohashtable() and printhashtable() + */ +# include +# include "misc.h" + +struct hlist { /* linear list of pattern numbers */ + int h_patno; + struct hlist *h_next; +}; + +static struct hlist *hashtable[129]; /* an array of ptr's to these lists, + * the index in the array is the + * result of hashing + */ + +static unsigned +hash(string) char *string; { + register char *p; + register unsigned i,sum; + + if (strcmp(string,"ANY") == 0) return 128; + for (sum=i=0,p=string;*p;i += 3) + sum += (*p++)<<(i&07); + return sum % 127; +} + + +addtohashtable(s,n) char *s; { + /* + * Add a new pattern number to the hashtable. + * s is the key, n the pattern number + */ + unsigned hval; + register struct hlist *p; + char *malloc(); + + hval = hash(s); + p = (struct hlist *) malloc(sizeof *p); + p->h_patno = n; + /* + * Now insert in front of the list + * This way, the list will be sorted from high to low, which is the + * wrong way around, but easy + */ + p->h_next = hashtable[hval]; + hashtable[hval] = p; +} + +static +prhlist(p) struct hlist *p; { + /* + * Print a list in reversed order (see comment above) + */ + + if (p) { + prhlist(p->h_next); + fprintf(genc,"%d, ",p->h_patno - 1); + } +} + +printhashtable() { + /* + * Print the linear lists, and also output an array of + * pointers to them + */ + register i; + register struct hlist *p; + + for (i = 1; i <= 128; i++) { + fprintf(genc,"int hash%d[] = { ",i); + prhlist(hashtable[i-1]); + fputs("-1};\n",genc); + } + fputs("int hashany[] = { ", genc); + prhlist(hashtable[128]); + fputs("-1 };\n",genc); + fputs("int *hashtab[] = {",genc); + for (i = 1; i <= 128; i++) fprintf(genc,"hash%d,",i); + fputs("hashany};\n",genc); +} diff --git a/util/topgen/main.c b/util/topgen/main.c new file mode 100644 index 000000000..b82b4afca --- /dev/null +++ b/util/topgen/main.c @@ -0,0 +1,56 @@ +/* m a i n . c + * + * Contains the main program, the error reporting routine, and a routine + * to check wether a constraint consists only of space + */ +# include + +extern int lineno, newline; + +FILE *genc, *genh, *input; +static int nerrors; +char *linedir = "#line %d \"%s\"\n"; /* format of line directive */ +char *inpfile; + +main(argc,argv) char *argv[]; { + + newline = 1; + if (argc != 2) { + fprintf(stderr,"Usage : %s targetoptimizerdescription\n",argv[0]); + return 1; + } + if ((input = fopen(argv[1],"r")) == NULL) { + fprintf(stderr,"Fatal error : couldn't open %s\n",argv[1]); + return 1; + } + if ((genc = fopen("gen.c","w")) == NULL) { + fputs("Fatal error : couldn't open gen.c\n",stderr); + return 1; + } + if ((genh = fopen("gen.h","w")) == NULL) { + fputs("Fatal error : couldn't open gen.h\n",stderr); + return 1; + } + inpfile = argv[1]; /* needed for line directives and errors */ + LLparse(); + if (nerrors) return 1; + return 0; +} + +/* VARARGS1 */ +error(s, s1) char *s, *s1; { + + nerrors++; + fprintf(stderr,"\"%s\", line %d: ",inpfile,lineno); + fprintf(stderr,s,s1); + putc('\n',stderr); +} + +onlyspace(s) register char *s; { + + while (*s) { + if (*s != ' ' && *s != '\t' && *s != '\n') return 0; + s++; + } + return 1; +} diff --git a/util/topgen/misc.h b/util/topgen/misc.h new file mode 100644 index 000000000..dcd406d1d --- /dev/null +++ b/util/topgen/misc.h @@ -0,0 +1,10 @@ +/* m i s c . h + * + * Some external declarations + */ +extern int countid; /* # of variables */ +extern int countpat; /* # of patterns */ +extern int lineno; /* line number */ +extern FILE *genc, *genh; /* Output files */ +extern char *inpfile; /* input file name */ +extern char *linedir; /* line directive format */ diff --git a/util/topgen/pattern.c b/util/topgen/pattern.c new file mode 100644 index 000000000..d53f73252 --- /dev/null +++ b/util/topgen/pattern.c @@ -0,0 +1,129 @@ +/* p a t t e r n . c + * + * Deals with the pattern stuff. + * it maintains a table of information about the patterns + * Functions : addpattern() and printpatterns() + */ +# include +# include +# include "misc.h" +# include "symtab.h" + +struct pattern { + char *p_constraint; /* constraint of this pattern */ + int p_lineno, /* line number of constraint */ + p_npat, /* # of instructions in pattern */ + p_nrepl; /* # of instructions in replacement */ +}; + +static struct pattern *pattable, /* ptr to pattern array */ + *current, /* ptr to first unoccupied el of + * pattern array + */ + *maxpat; /* if beyond this, new space must + * be allocated + */ + +addpattern(str,l,np,nr) char *str; { + /* + * Just add a pattern to the list. + * "str" is the constraint, "l" is the line number, + * "np" is the number of instructions in the pattern, + * "nr" is the number of instructions in the replacement + * Space is allocated in chunks of 50 + */ + char *malloc(), *realloc(); + register struct pattern *p; + + if (!pattable) { /* No space allocated yet */ + pattable = (struct pattern *) malloc(50 * sizeof *pattable); + current = pattable; + maxpat = pattable + 50; + } + if (current >= maxpat) { /* Allocate some new space */ + p = pattable; + pattable = (struct pattern *) realloc( + (char *) pattable, + (unsigned) (sizeof *pattable * (50 + (maxpat - pattable)))); + current = pattable + (current - p); + maxpat = pattable + (maxpat - p) + 50; + } + p = current++; + p->p_constraint = str; + p->p_lineno = l; + p->p_npat = np; + p->p_nrepl = nr; +} + +static +prconstraint(str) char *str; { + /* + * prints a constraint, with variable names replaced + */ + char c; + register char *p, *q; + struct symtab *name; + + p = str; + while (*p) { + if (isupper(*p) || islower(*p) || *p == '_') { + /* + * Start of identifier + */ + q = p + 1; + while (*q && ( + isupper(*q) || islower(*q) || isdigit(*q) || *q == '_')) { + q++; + } + c = *q; + *q = '\0'; + /* Temporarily let it end with null byte */ + name = findident(p,LOOKING,&idtable); + if (name) { /* yeah, it was a variable */ + fprintf(genc,"var[%d].value", name->s_num); + } + else fputs(p,genc); + /* Now replace null byte with whatever used to be there */ + *q = c; + p = q; + } + else { + putc(*p,genc); + p++; + } + } +} + +printpatterns() { + /* + * Prints the pattern_descr table and generates the routine + * "check_constraint" + */ + register struct pattern *p; + register i; + + p = pattable; + i = 1; + fputs("struct pattern_descr patterns[] = {\n", genc); + while (p != current) { + fprintf(genc," {%d,pat%d,%d,rep%d,},\n", + p->p_npat, i, p->p_nrepl, i); + p++; + i++; + } + fputs("};\n", genc); + fputs("int\ncheck_constraint(patno){\n\tint r;\n\tswitch(patno){\n",genc); + p = pattable; + while (p < current) { + if (p->p_constraint) { + /* The pattern has a constraint */ + fprintf(genc,"\tcase %d :\n",p - pattable); + fprintf(genc,linedir,p->p_lineno,inpfile); /* linedirective */ + fputs("\tr = (",genc); + prconstraint(p->p_constraint); + fputs("); break;\n",genc); + } + p++; + } + fputs("\tdefault :\n\t\tr = 1;\n\t}\n\treturn r;\n}\n\n",genc); +} diff --git a/util/topgen/symtab.c b/util/topgen/symtab.c new file mode 100644 index 000000000..1d1792bcc --- /dev/null +++ b/util/topgen/symtab.c @@ -0,0 +1,48 @@ +/* s y m t a b . c + * + * Contains the routine findident, which builds the symbol table and + * searches identifiers + */ +# include "symtab.h" + +struct symtab *idtable, *deftable; + +struct symtab * +findident(s, mode, table) char *s; struct symtab **table; { + /* + * Look for identifier s in the symboltable referred to by *table. + * If mode = LOOKING, no new entry's will be made. + * If mode = ENTERING, a new entry will be made if s is not in the + * table yet, otherwise an error results + */ + char *malloc(); + char *strcpy(); + register struct symtab *p; + register n; + + if (!*table) { /* No entry for this symbol */ + if (mode == LOOKING) return (struct symtab *) 0; + /* + * Make new entry + */ + p = (struct symtab *) malloc(sizeof *p); + p->s_left = p->s_right = (struct symtab *) 0; + p->s_name = malloc( (unsigned) (strlen(s) + 1)); + strcpy(p->s_name,s); + *table = p; + return p; + } + else { + p = *table; + if ((n = strcmp(p->s_name,s)) == 0) { /* This is it! */ + if (mode == ENTERING) { + error("Identifier %s redefined",s); + } + return p; + } + /* Binary tree ..... */ + if (n < 0) return findident(s,mode,&(p->s_left)); + return findident(s,mode,&(p->s_right)); + } + /* NOTREACHED */ +} diff --git a/util/topgen/symtab.h b/util/topgen/symtab.h new file mode 100644 index 000000000..db61fe059 --- /dev/null +++ b/util/topgen/symtab.h @@ -0,0 +1,18 @@ +/* s y m t a b . h + * + * Contains a structure declaration for a symbol table that is a + * binary tree + */ +struct symtab { + char *s_name; /* The name of the symbol */ + int s_num; /* it's number */ + struct symtab *s_left, *s_right; +}; + +extern struct symtab *idtable, /* table for variables */ + *deftable; /* table for tunable defines */ +struct symtab *findident(); + +/* Options to "findident" */ +#define LOOKING 1 +#define ENTERING 0 diff --git a/util/topgen/token.h b/util/topgen/token.h new file mode 100644 index 000000000..3c5af9fcb --- /dev/null +++ b/util/topgen/token.h @@ -0,0 +1,10 @@ +/* t o k e n . h + * + * Contains the structure declaration for a token + */ +struct token { + int t_tokno; /* token number */ + char t_attrib; /* it's attribute (always a character) */ +}; + +extern struct token dot; /* current token */ diff --git a/util/topgen/topgen.g b/util/topgen/topgen.g new file mode 100644 index 000000000..d3413b6cd --- /dev/null +++ b/util/topgen/topgen.g @@ -0,0 +1,307 @@ +/* t o p g e n . g + * + * Grammar of optimizer description, and some code generation + */ + +%token LETTER, DIGIT, OTHER, SPACE; +%token LINE_TERMINATOR, OPERAND_SEPARATOR, INSTRUCTION_SEPARATOR, + PATTERN_SEPARATOR, OPEN_BRACKET, CLOSE_BRACKET; +%lexical LLlex; +%start LLparse, optim_description; + +{ +# include +# include "token.h" +# include "symtab.h" +# include "misc.h" + +char idbuf[BUFSIZ], buf[BUFSIZ]; +int countid; /* # of variables */ +int countpat; /* # of patterns */ +char * malloc(); +char * strcpy(); +static int patlen; /* Maximum number of instructions in pattern */ +static int maxoperand; /* Maximum number of operands of instruction */ +extern FILE *input; /* file descriptor of inputfile */ +} + +optim_description + { struct symtab *p; } : + SPACE* parameter_line* + { p = findident("MAXOP",LOOKING,&deftable); + if (p == 0) maxoperand = 2; /* default */ + else maxoperand = p->s_num; + } + separator SPACE* mode_definitions + separator SPACE* patterns + separator user_routines +; + +parameter_line + { struct symtab *p;} : + identifier + { p = findident(idbuf,ENTERING,&deftable);} + SPACE + value + { p->s_num = atoi(buf);} + /* This action in fact only needed for MAXOP */ + LINE_TERMINATOR + SPACE* + { fprintf(genh,"#define %s %s\n",p->s_name,buf);} +; + +value + { char *p1 = buf;} : + [ + [ OPEN_BRACKET + | CLOSE_BRACKET + | OPERAND_SEPARATOR + | PATTERN_SEPARATOR + | INSTRUCTION_SEPARATOR + | SPACE + | LETTER + | DIGIT + | OTHER + | '%' + ] + { *p1++ = dot.t_attrib;} + ]* + { *p1 = '\0';} +; + +mode_definitions + { int lin; } : + { fputs("tok_chk(varno) {\n\tint r;\n", genc); + fputs("\tchar *VAL;\n\n",genc); + fputs("\tVAL = var[varno].value;\n",genc); + fputs("\tswitch(varno) {\n",genc); + } + [ + token_list + constraint(&lin) + { fprintf(genc,linedir,lin,inpfile); + fprintf(genc,"\t\tr = (%s); break;\n",buf); + } + LINE_TERMINATOR + SPACE* + ]* + { fputs("\tdefault :\n\t\tassert(0);\n",genc); + fputs("\t}\n\treturn r;\n}\n\n",genc); + } +; + +token_list : + new_identifier + SPACE* + [ + OPERAND_SEPARATOR + SPACE* + new_identifier + SPACE* + ]* +; + +new_identifier + { struct symtab *p;} : + identifier + { p = findident(idbuf,ENTERING,&idtable); + p->s_num = ++countid; + fprintf(genc,"\tcase %d:\n", countid); + } +; + +constraint (int *lin;) + { char *p = buf; } : + OPEN_BRACKET + { *lin = lineno;} + [ + [ LINE_TERMINATOR + | OPERAND_SEPARATOR + | PATTERN_SEPARATOR + | INSTRUCTION_SEPARATOR + | LETTER + | DIGIT + | SPACE + | OTHER + | '%' + ] + { *p++ = dot.t_attrib;} + ]* + { *p = '\0'; + if (onlyspace(buf)) strcpy(buf,"TRUE"); + } + CLOSE_BRACKET + SPACE* +; + +patterns + { int lin; + char *constr; + int np, nr; + } : +[ + { countpat++; + constr = (char *) 0; + fprintf(genc,"struct instr_descr pat%d[] = {\n", + countpat); + } + instruction_list(&np) + { if (np > patlen) patlen = np; + fputs("\n};\n\n",genc); + } + [ + constraint(&lin) + { /* Save the constraint, we need it later on */ + constr = malloc((unsigned)(strlen(buf)+1)); + strcpy(constr,buf); + } + ]? + PATTERN_SEPARATOR + { fprintf(genc,"struct instr_descr rep%d[] = {\n", + countpat); + } + replacement(&nr) + { fputs("\n};\n\n",genc);} + LINE_TERMINATOR + SPACE* + { addpattern(constr,lin,np,nr);} +]* + { printhashtable(); + printpatterns(); + fprintf(genh,"#define NRVARS %d\n",countid); + fprintf(genh,"#define NRPATTERNS %d\n",countpat); + fprintf(genh,"#define MIN_WINDOW_SIZE %d\n", + patlen+3); + fclose(genh); + } +; + +instruction_list(int *n;) : + instruction(1) + { *n = 1;} + [ + INSTRUCTION_SEPARATOR + { fputs(",\n",genc);} + SPACE* + instruction(0) + { *n += 1;} + ]* +; + +instruction(int opt;) + { int count;} : + opcode(opt) + { if (strcmp(buf,"ANY") != 0) { + fprintf(genc,"\t{\"%s\"",buf); + } + else fputs("\t{(char *) 0",genc); + count = 0; + } + [ + operand + { count = 1;} + [ + OPERAND_SEPARATOR + { count++;} + SPACE* + operand + ]* + { if (count > maxoperand) { + error("Too many operands"); + } + } + ]? + { while (count++ < maxoperand) { + fputs(",{\"\",-1,\"\"}",genc); + } + putc('}',genc); + } +; + +opcode(int opt;) + { char *p = buf;} : + [ + [ LETTER + | DIGIT + | OTHER + ] + { *p++ = dot.t_attrib;} + ]+ + SPACE+ + { *p = '\0'; + if (opt) addtohashtable(buf,countpat); + } +; + +operand + { register struct symtab *p = 0;} : + { fputs(",{\"",genc);} + [ + identifier + { if (!p) { + p = findident(idbuf,LOOKING,&idtable); + if (p) fprintf(genc,"\",%d,\"",p->s_num); + else fputs(idbuf,genc); + } + else fputs(idbuf,genc); + } + | DIGIT + { putc(dot.t_attrib,genc);} + | OTHER + { putc(dot.t_attrib,genc);} + ]+ + { if (p) fputs("\"}",genc); + else fputs("\",0,\"\"}",genc); + } + SPACE* +; + +replacement (int *n;) + {register i;} : + SPACE* + { *n = 0;} + [ + instruction(0) + { *n = 1;} + [ + INSTRUCTION_SEPARATOR + { fputs(",\n", genc);} + SPACE* + instruction(0) + { *n += 1;} + ]* + | /* empty replacement, but there must be a + * structure initializer anyway + */ + { fputs("\t{\"\"",genc); + for (i = 0; i < maxoperand; i++) { + fputs(",{\"\",-1,\"\"}",genc); + } + putc('}',genc); + } + ] +; + +user_routines + {register c;} : + { fprintf(genc, linedir, lineno, inpfile); + while ((c = getc(input)) != EOF) { + putc(c,genc); + } + } +; + +identifier + { char *p = idbuf; } : + LETTER + { *p++ = dot.t_attrib;} + [ %while (1) + LETTER { *p++ = dot.t_attrib;} + | DIGIT { *p++ = dot.t_attrib;} + ]* + { *p = '\0';} +; + +separator : + '%' '%' SPACE* LINE_TERMINATOR +; diff --git a/util/topgen/tunable.h b/util/topgen/tunable.h new file mode 100644 index 000000000..e79f3820b --- /dev/null +++ b/util/topgen/tunable.h @@ -0,0 +1,11 @@ +/* t u n a b l e . h + * + * Contains some tunable constants. + * They should all differ, not be a letter or digit or '_', not be a '%', + * and not be space (' ','\t','\n' and the like). + */ +# define line_term ';' +# define operand_sep ',' +# define instruction_sep ':' +# define open_bracket '{' +# define close_bracket '}'