From 4546dd5f2251524f3df717018a97973ce0e13199 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 21 Sep 2016 00:43:10 +0200 Subject: [PATCH] Massive grammar overhaul and refactor. Hacked in support for predicates, where instructions can be turned on and off based on their parameters. New lexer using a lexer. Now quite a lot of the way towards being a real instruction selector. --- mach/proto/mcg/build.lua | 1 + util/mcgg/build.lua | 25 ++++ util/mcgg/gram.y | 186 ++++++++++++------------------ util/mcgg/iburg.c | 165 ++++++++++++++++++-------- util/mcgg/iburg.h | 39 +++++-- util/mcgg/mcgg_generated_footer.h | 43 +++++++ util/mcgg/scan.l | 69 +++++++++++ 7 files changed, 360 insertions(+), 168 deletions(-) create mode 100644 util/mcgg/build.lua create mode 100644 util/mcgg/mcgg_generated_footer.h create mode 100644 util/mcgg/scan.l diff --git a/mach/proto/mcg/build.lua b/mach/proto/mcg/build.lua index c94d44ece..46394d510 100644 --- a/mach/proto/mcg/build.lua +++ b/mach/proto/mcg/build.lua @@ -27,6 +27,7 @@ cprogram { "modules/src/read_em+lib_kv", "modules/src/system+lib", "./*.h", + "util/mcgg+mcgg", }, vars = { ["+cflags"] = { diff --git a/util/mcgg/build.lua b/util/mcgg/build.lua new file mode 100644 index 000000000..d0022baa9 --- /dev/null +++ b/util/mcgg/build.lua @@ -0,0 +1,25 @@ +include("first/yacc.lua") + +flex { + name = "flex", + srcs = { "./*.l" }, +} + +yacc { + name = "yacc", + srcs = { "./*.y" }, +} + +cprogram { + name = "mcgg", + srcs = { + "./*.c", + matching(filenamesof("+flex"), "%.c$"), + matching(filenamesof("+yacc"), "%.c$") + }, + deps = { + "./*.h", + "+yacc" + } +} + diff --git a/util/mcgg/gram.y b/util/mcgg/gram.y index 965c9e9df..ea0abbc75 100644 --- a/util/mcgg/gram.y +++ b/util/mcgg/gram.y @@ -3,65 +3,105 @@ #include #include #include "iburg.h" + +#define YYDEBUG 1 + static char rcsid[] = "$Id$"; -static int yylineno = 0; +static int nextesn = 0; +static int nextern = 0; + %} %union { int n; - char *string; + char* string; Tree tree; + Stringlist stringlist; } %term TERMINAL %term START %term PPERCENT -%token ID -%token INT -%type lhs -%type tree -%type cost +%term PATTERNS +%term PAT +%term WHEN +%term EMIT +%term COST + +%token ID +%token CFRAGMENT +%token INT + +%type lhs +%type rhs +%type cost +%type when +%type stringlist %% -spec : decls PPERCENT rules { yylineno = 0; } - | decls { yylineno = 0; } +spec + : decls PPERCENT patterns + | decls ; decls : /* lambda */ | decls decl ; -decl : TERMINAL blist '\n' - | START lhs '\n' { - if (nonterm($2)->number != 1) - yyerror("redeclaration of the start symbol\n"); - } - | '\n' - | error '\n' { yyerrok; } +decl + : TERMINAL blist ';' + | START lhs ';' + { + if (nonterm($2)->number != 1) + yyerror("redeclaration of the start symbol\n"); + } + | ';' + | error ';' { yyerrok; } ; -blist : /* lambda */ - | blist ID '=' INT { term($2, $4); } +blist + : /* nothing */ + | blist ID { term($2, nextesn++); } + ; + +patterns + : /* nothing */ + | patterns pattern ';' + | patterns ';' + | patterns error ';' { yyerrok; } ; -rules : /* lambda */ - | rules lhs ':' tree '=' INT cost ';' '\n' { rule($2, $4, $6, $7); } - | rules '\n' - | rules error '\n' { yyerrok; } +pattern + : lhs '=' rhs when cost { rule($1, $3, nextern++, $4, $5); } + ; + +lhs + : ID { $$ = $1; nonterm($$); } ; -lhs : ID { nonterm($$ = $1); } +rhs + : ID { $$ = tree($1, NULL, NULL); } + | ID '(' rhs ')' { $$ = tree($1, $3, NULL); } + | ID '(' rhs ',' rhs ')' { $$ = tree($1, $3, $5); } ; -tree : ID { $$ = tree($1, NULL, NULL); } - | ID '(' tree ')' { $$ = tree($1, $3, NULL); } - | ID '(' tree ',' tree ')' { $$ = tree($1, $3, $5); } - ; +when + : /* nothing */ { $$ = NULL; } + | WHEN stringlist { $$ = $2; } + ; -cost : /* lambda */ { $$ = 0; } - | '(' INT ')' { if ($2 > maxcost) { - yyerror("%d exceeds maximum cost of %d\n", $2, maxcost); - $$ = maxcost; - } else - $$ = $2; } +stringlist + : /* nothing */ { $$ = NULL; } + | CFRAGMENT stringlist { $$ = pushstring($1, $2); } + ; + +cost + : /* lambda */ { $$ = 0; } + | COST INT { + if ($2 > maxcost) { + yyerror("%d exceeds maximum cost of %d\n", $2, maxcost); + $$ = maxcost; + } else + $$ = $2; + } ; %% #include @@ -73,32 +113,6 @@ FILE *outfp = NULL; static char buf[BUFSIZ], *bp = buf; static int ppercent = 0; -static int get(void) { - if (*bp == 0) { - bp = buf; - *bp = 0; - if (fgets(buf, sizeof buf, infp) == NULL) - return EOF; - yylineno++; - while (buf[0] == '%' && buf[1] == '{' && (buf[2] == '\n' || buf[2] == '\r')) { - for (;;) { - if (fgets(buf, sizeof buf, infp) == NULL) { - yywarn("unterminated %{...%}\n"); - return EOF; - } - yylineno++; - if (strcmp(buf, "%}\n") == 0 || strcmp(buf, "%}\r\n") == 0) - break; - fputs(buf, outfp); - } - if (fgets(buf, sizeof buf, infp) == NULL) - return EOF; - yylineno++; - } - } - return *bp++; -} - void yyerror(char *fmt, ...) { va_list ap; @@ -111,58 +125,6 @@ void yyerror(char *fmt, ...) { errcnt++; } -int yylex(void) { - int c; - - while ((c = get()) != EOF) { - switch (c) { - case ' ': case '\f': case '\t': case '\r': - continue; - case '\n': - case '(': case ')': case ',': - case ';': case '=': case ':': - return c; - } - if (c == '%' && *bp == '%') { - bp++; - return ppercent++ ? 0 : PPERCENT; - } else if (c == '%' && strncmp(bp, "term", 4) == 0 - && isspace(bp[4])) { - bp += 4; - return TERMINAL; - } else if (c == '%' && strncmp(bp, "start", 5) == 0 - && isspace(bp[5])) { - bp += 5; - return START; - } else if (isdigit(c)) { - int n = 0; - do { - int d = c - '0'; - if (n > (INT_MAX - d)/10) - yyerror("integer greater than %d\n", INT_MAX); - else - n = 10*n + d; - c = get(); - } while (c != EOF && isdigit(c)); - bp--; - yylval.n = n; - return INT; - } else if (isalpha(c)) { - char *p = bp - 1; - while (isalpha(*bp) || isdigit(*bp) || *bp == '_') - bp++; - yylval.string = alloc(bp - p + 1); - strncpy(yylval.string, p, bp - p); - yylval.string[bp - p] = 0; - return ID; - } else if (isprint(c)) - yyerror("invalid character `%c'\n", c); - else - yyerror("invalid character `\\%03o'\n", (unsigned char)c); - } - return 0; -} - void yywarn(char *fmt, ...) { va_list ap; @@ -172,3 +134,5 @@ void yywarn(char *fmt, ...) { fprintf(stderr, "warning: "); vfprintf(stderr, fmt, ap); } + +/* vim: set sw=4 ts=4 expandtab : */ diff --git a/util/mcgg/iburg.c b/util/mcgg/iburg.c index 13c187d3b..833d3e0d4 100644 --- a/util/mcgg/iburg.c +++ b/util/mcgg/iburg.c @@ -20,7 +20,6 @@ static Nonterm nts; static Rule rules; static int nrules; -static char* stringf(char* fmt, ...); static void print(char* fmt, ...); static void ckreach(Nonterm p); static void emitclosure(Nonterm nts); @@ -34,6 +33,8 @@ static void emitleaf(Term p, int ntnumber); static void emitnts(Rule rules, int nrules); static void emitrecord(char* pre, Rule r, int cost); static void emitrule(Nonterm nts); +static void emitpredicatedefinitions(Rule rules); +static void emitpredicatecall(Rule rule); static void emitstate(Term terms, Nonterm start, int ntnumber); static void emitstring(Rule rules); static void emitstruct(Nonterm nts, int ntnumber); @@ -45,6 +46,11 @@ int main(int argc, char* argv[]) int c, i; Nonterm p; + #if 0 + extern int yydebug; + yydebug = 1; + #endif + if (sizeof(short) == sizeof(int)) maxcost = SHRT_MAX / 2; for (i = 1; i < argc; i++) @@ -88,7 +94,10 @@ int main(int argc, char* argv[]) infp = stdin; if (outfp == NULL) outfp = stdout; + + yyin = infp; yyparse(); + if (start) ckreach(start); for (p = nts; p; p = p->link) @@ -103,6 +112,7 @@ int main(int argc, char* argv[]) emitstring(rules); emitrule(nts); emitclosure(nts); + emitpredicatedefinitions(rules); if (start) emitstate(terms, start, ntnumber); print("#ifdef STATE_LABEL\n"); @@ -111,35 +121,30 @@ int main(int argc, char* argv[]) emitkids(rules, nrules); emitfuncs(); print("#endif\n"); - if (!feof(infp)) - while ((c = getc(infp)) != EOF) - putc(c, outfp); + print("#include \"mcgg_generated_footer.h\"\n"); return errcnt > 0; } -/* alloc - allocate nbytes or issue fatal error */ -void* alloc(int nbytes) -{ - void* p = calloc(1, nbytes); - - if (p == NULL) - { - yyerror("out of memory\n"); - exit(1); - } - return p; -} - /* stringf - format and save a string */ -static char* stringf(char* fmt, ...) +char* stringf(char* fmt, ...) { - va_list ap; - char* s, buf[512]; + int n; + char* p; + va_list ap; - va_start(ap, fmt); - vsprintf(buf, fmt, ap); - va_end(ap); - return strcpy(alloc(strlen(buf) + 1), buf); + va_start(ap, fmt); + n = vsnprintf(NULL, 0, fmt, ap) + 1; + va_end(ap); + + p = malloc(n); + if (!p) + return NULL; + + va_start(ap, fmt); + vsnprintf(p, n, fmt, ap); + va_end(ap); + + return p; } struct entry @@ -178,7 +183,7 @@ static void* lookup(char* name) /* install - install symbol name */ static void* install(char* name) { - struct entry* p = alloc(sizeof *p); + struct entry* p = calloc(1, sizeof *p); int i = hash(name) % HASHSIZE; p->sym.name = name; @@ -228,13 +233,16 @@ Term term(char* id, int esn) p->name, p->esn); p->link = *q; *q = p; + + if (esn != -1) + print("enum { %s = %d };\n", id, esn); return p; } /* tree - create & initialize a tree node with the given fields */ Tree tree(char* id, Tree left, Tree right) { - Tree t = alloc(sizeof *t); + Tree t = calloc(1, sizeof *t); Term p = lookup(id); int arity = 0; @@ -268,12 +276,17 @@ Tree tree(char* id, Tree left, Tree right) } /* rule - create & initialize a rule with the given fields */ -Rule rule(char* id, Tree pattern, int ern, int cost) +Rule rule(char* id, Tree pattern, int ern, Stringlist when, int cost) { - Rule r = alloc(sizeof *r), *q; + Rule r = calloc(1, sizeof *r); + Rule *q; Term p = pattern->op; + if (when && (p->arity == 0)) + yyerror("can't have a when clause on leaf nodes"); + nrules++; + r->when = when; r->lhs = nonterm(id); r->packed = ++r->lhs->lhscount; for (q = &r->lhs->rules; *q; q = &(*q)->decode) @@ -302,6 +315,14 @@ Rule rule(char* id, Tree pattern, int ern, int cost) return r; } +Stringlist pushstring(const char* data, Stringlist list) +{ + Stringlist sl = calloc(1, sizeof *sl); + sl->payload = data; + sl->next = list; + return sl; +} + /* print - formatted output */ static void print(char* fmt, ...) { @@ -360,6 +381,11 @@ static void print(char* fmt, ...) va_end(ap); } +void printlineno(void) +{ + print("#line %d\n", yylineno); +} + /* reach - mark all non-terminals in tree t as reachable */ static void reach(Tree t) { @@ -415,17 +441,26 @@ static void emitcase(Term p, int ntnumber) { case 0: case -1: - print("%2{%1/* %R */\n%3c = ", r); + print("%2if ("); + emitpredicatecall(r); + print(")\n%2{%1/* %R */\n%3c = ", r); break; case 1: if (r->pattern->nterms > 1) { print("%2if (%1/* %R */\n", r); emittest(r->pattern->left, "l", " "); + print("%3&& "); + emitpredicatecall(r); + print("\n"); print("%2) {\n%3c = "); } else - print("%2{%1/* %R */\n%3c = ", r); + { + print("%2if ("); + emitpredicatecall(r); + print(")\n%2{%1/* %R */\n%3c = ", r); + } emitcost(r->pattern->left, "l"); break; case 2: @@ -435,10 +470,17 @@ static void emitcase(Term p, int ntnumber) emittest(r->pattern->left, "l", r->pattern->right->nterms ? " && " : " "); emittest(r->pattern->right, "r", " "); + print("%3&& "); + emitpredicatecall(r); + print("\n"); print("%2) {\n%3c = "); } else - print("%2{%1/* %R */\n%3c = ", r); + { + print("%2if ("); + emitpredicatecall(r); + print(")\n%2{%1/* %R */\n%3c = ", r); + } emitcost(r->pattern->left, "l"); emitcost(r->pattern->right, "r"); break; @@ -555,8 +597,8 @@ static char* computekids(Tree t, char* v, char* bp, int* ip) static void emitkids(Rule rules, int nrules) { int i; - Rule r, * rc = alloc((nrules + 1) * sizeof *rc); - char** str = alloc((nrules + 1) * sizeof *str); + Rule r, * rc = calloc(nrules+1, sizeof *rc); + char** str = calloc(nrules+1, sizeof *str); for (i = 0, r = rules; r; r = r->link) { @@ -566,7 +608,7 @@ static void emitkids(Rule rules, int nrules) for (j = 0; str[j] && strcmp(str[j], buf); j++) ; if (str[j] == NULL) - str[j] = strcpy(alloc(strlen(buf) + 1), buf); + str[j] = strdup(buf); r->kids = rc[j]; rc[j] = r; } @@ -592,16 +634,16 @@ static void emitlabel(Nonterm start) "%1case 0:\n"); if (Tflag) print("%2%Pnp = p;\n"); - print("%2STATE_LABEL(p) = %Pstate(OP_LABEL(p), 0, 0);\n%2break;\n" + print("%2STATE_LABEL(p) = %Pstate(p, 0, 0);\n%2break;\n" "%1case 1:\n%2%Plabel1(LEFT_CHILD(p));\n"); if (Tflag) print("%2%Pnp = p;\n"); - print("%2STATE_LABEL(p) = %Pstate(OP_LABEL(p),\n" + print("%2STATE_LABEL(p) = %Pstate(p,\n" "%3STATE_LABEL(LEFT_CHILD(p)), 0);\n%2break;\n" "%1case 2:\n%2%Plabel1(LEFT_CHILD(p));\n%2%Plabel1(RIGHT_CHILD(p));\n"); if (Tflag) print("%2%Pnp = p;\n"); - print("%2STATE_LABEL(p) = %Pstate(OP_LABEL(p),\n" + print("%2STATE_LABEL(p) = %Pstate(p,\n" "%3STATE_LABEL(LEFT_CHILD(p)),\n%3STATE_LABEL(RIGHT_CHILD(p)));\n%2break;\n" "%1}\n}\n\n"); print( @@ -635,8 +677,8 @@ static void emitleaf(Term p, int ntnumber) if (cost == NULL) { - cost = alloc((ntnumber + 1) * sizeof *cost); - rule = alloc((ntnumber + 1) * sizeof *rule); + cost = calloc(ntnumber+1, sizeof *cost); + rule = calloc(ntnumber+1, sizeof *rule); } for (i = 0; i <= ntnumber; i++) { @@ -686,8 +728,8 @@ static char* computents(Tree t, char* bp) static void emitnts(Rule rules, int nrules) { Rule r; - int i, j, * nts = alloc(nrules * sizeof *nts); - char** str = alloc(nrules * sizeof *str); + int i, j, * nts = calloc(nrules, sizeof *nts); + char** str = calloc(nrules, sizeof *str); for (i = 0, r = rules; r; r = r->link) { @@ -698,7 +740,7 @@ static void emitnts(Rule rules, int nrules) if (str[j] == NULL) { print("static short %Pnts_%d[] = { %s0 };\n", j, buf); - str[j] = strcpy(alloc(strlen(buf) + 1), buf); + str[j] = strdup(buf); } nts[i++] = j; } @@ -752,15 +794,48 @@ static void emitrule(Nonterm nts) print("%1default:\n%2%Passert(0, PANIC(\"Bad goal nonterminal %%d in %Prule\\n\", goalnt));\n%1}\n%1return 0;\n}\n\n"); } +/* emitpredicates - emit predicates for rules */ +static void emitpredicatedefinitions(Rule r) +{ + while (r) + { + Stringlist s = r->when; + if (s) + { + print("static int %Ppredicate_%d(NODEPTR_TYPE n) {\n", r->ern); + while (s) + { + print("%s", s->payload); + s = s->next; + } + print("\n}\n\n"); + } + r = r->link; + } +} + +/* emitpredicatecall - emit a call to a predicate */ +static void emitpredicatecall(Rule r) +{ + if (r->when) + print("%Ppredicate_%d(node)", r->ern); + else + print("1"); +} + /* emitstate - emit state function */ static void emitstate(Term terms, Nonterm start, int ntnumber) { int i; Term p; - print("STATE_TYPE %Pstate(int op, STATE_TYPE left, STATE_TYPE right) {\n%1int c;\n" - "%1struct %Pstate *p, *l = (struct %Pstate *)left,\n" - "%2*r = (struct %Pstate *)right;\n\n%1assert(sizeof (STATE_TYPE) >= sizeof (void *));\n%1"); + print("STATE_TYPE %Pstate(NODEPTR_TYPE node, STATE_TYPE left, STATE_TYPE right) {\n%1int c;\n" + "%1int op = OP_LABEL(node);\n" + "%1struct %Pstate* p;\n" + "%1struct %Pstate* l = (struct %Pstate *)left;\n" + "%1struct %Pstate* r = (struct %Pstate *)right;\n" + "\n" + "%1assert(sizeof (STATE_TYPE) >= sizeof (void *));\n%1"); if (!Tflag) print("if (%Parity[op] > 0) "); print("{\n%2p = ALLOC(sizeof *p);\n" diff --git a/util/mcgg/iburg.h b/util/mcgg/iburg.h index cd3663dad..6b5f9affc 100644 --- a/util/mcgg/iburg.h +++ b/util/mcgg/iburg.h @@ -3,7 +3,14 @@ /* $Id$ */ /* iburg.c: */ -extern void* alloc(int nbytes); +extern char* stringf(char* fmt, ...); + +typedef struct stringlist* Stringlist; +struct stringlist { + const char* payload; + Stringlist next; +}; +extern Stringlist pushstring(const char* data, Stringlist list); typedef enum { @@ -48,18 +55,19 @@ extern Tree tree(char* op, Tree left, Tree right); struct rule { /* rules: */ - Nonterm lhs; /* lefthand side non-terminal */ - Tree pattern; /* rule pattern */ - int ern; /* external rule number */ - int packed; /* packed external rule number */ - int cost; /* associated cost */ - Rule link; /* next rule in ern order */ - Rule next; /* next rule with same pattern root */ - Rule chain; /* next chain rule with same rhs */ - Rule decode; /* next rule with same lhs */ - Rule kids; /* next rule with same burm_kids pattern */ + Nonterm lhs; /* lefthand side non-terminal */ + Tree pattern; /* rule pattern */ + int ern; /* external rule number */ + int packed; /* packed external rule number */ + int cost; /* associated cost */ + Rule link; /* next rule in ern order */ + Rule next; /* next rule with same pattern root */ + Rule chain; /* next chain rule with same rhs */ + Rule decode; /* next rule with same lhs */ + Rule kids; /* next rule with same burm_kids pattern */ + Stringlist when; /* C predicate string */ }; -extern Rule rule(char* id, Tree pattern, int ern, int cost); +extern Rule rule(char* id, Tree pattern, int ern, Stringlist when, int cost); extern int maxcost; /* maximum cost */ /* gram.y: */ @@ -70,4 +78,11 @@ extern int errcnt; extern FILE* infp; extern FILE* outfp; +/* Stupid flex imports --- why mo header file? */ + +extern FILE* yyin; +extern int yylineno; + +extern void printlineno(void); + #endif diff --git a/util/mcgg/mcgg_generated_footer.h b/util/mcgg/mcgg_generated_footer.h new file mode 100644 index 000000000..7b9bbdb46 --- /dev/null +++ b/util/mcgg/mcgg_generated_footer.h @@ -0,0 +1,43 @@ +static void dumpCover(NODEPTR_TYPE p, int goalnt, int indent) { +#ifdef TRACE + int eruleno = burm_rule(STATE_LABEL(p), goalnt); + short *nts = burm_nts[eruleno]; + NODEPTR_TYPE kids[10]; + int i; + + for (i = 0; i < indent; i++) + fprintf(stderr, " "); + fprintf(stderr, "%s\n", burm_string[eruleno]); + burm_kids(p, eruleno, kids); + for (i = 0; nts[i]; i++) + dumpCover(kids[i], nts[i], indent + 1); +#endif +} + +static NODEPTR_TYPE tree(int op, NODEPTR_TYPE l, NODEPTR_TYPE r) { + NODEPTR_TYPE p = malloc(sizeof *p); + + assert(p); + p->op = op; + p->kids[0] = l; p->kids[1] = r; + return p; +} + +int main(void) { + NODEPTR_TYPE p; + + p = tree(STORE, + tree(LABEL, 0, 0), + tree(ADD, + tree(LOAD, + tree(LABEL, 0, 0), + 0 + ), + tree(CONST, 0, 0) + ) + ); + burm_label(p); + dumpCover(p, 1, 0); + return 0; +} + diff --git a/util/mcgg/scan.l b/util/mcgg/scan.l new file mode 100644 index 000000000..3a067685b --- /dev/null +++ b/util/mcgg/scan.l @@ -0,0 +1,69 @@ +%{ +#include "iburg.h" +#include "y.tab.h" + +static int braces = 0; + +%} +%option warn +%option nodefault +%option noyywrap +%option yylineno + +%x CSTRING +%x ECHO + +%% + +"%{" { printlineno(); BEGIN(ECHO); } + +"%}" BEGIN(INITIAL); +[%\n] fputc(yytext[0], outfp); +[^%\n]* fputs(yytext, outfp); + +"{" { + yylval.string = stringf("#line %d\n", yylineno); + braces = 1; + BEGIN(CSTRING); + return CFRAGMENT; + } + +"{" { + braces++; + yylval.string = strdup(yytext); + return CFRAGMENT; + } + +"}" { + braces--; + if (braces == 0) + BEGIN(INITIAL); + else + { + yylval.string = strdup(yytext); + return CFRAGMENT; + } + } + +[^{}]+ { + yylval.string = strdup(yytext); + return CFRAGMENT; + } + +"%%" return PPERCENT; +"%term" return TERMINAL; +"%start" return START; + +"PATTERNS" return PATTERNS; +"pat" return PAT; +"when" return WHEN; +"emit" return EMIT; +"cost" return COST; + +[A-Za-z_][A-Za-z0-9_]* { yylval.string = strdup(yytext); return ID; } +[0-9]+ { yylval.n = atoi(yytext); return INT; } +[ \t\r\n]* ; +. return yytext[0]; + +%% +/* vim: set sw=4 ts=4 expandtab : */