From 707585b67d238b8943bc2d5be0f0bcd75ee5f1a9 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 27 Nov 2016 20:28:19 +0100 Subject: [PATCH] Perform initial (i.e. feature complete and compiling, but not necessarily working) port of the B compiler to EM. --- lang/b/compiler/b.h | 12 +- lang/b/compiler/b0.c | 258 ++++++++++++++++++++++---------------- lang/b/compiler/b1.c | 179 +++++++++++++------------- lang/b/compiler/build.lua | 7 +- 4 files changed, 261 insertions(+), 195 deletions(-) diff --git a/lang/b/compiler/b.h b/lang/b/compiler/b.h index 9e108325f..61548b6e8 100644 --- a/lang/b/compiler/b.h +++ b/lang/b/compiler/b.h @@ -1,11 +1,11 @@ #include #include #include +#include #include +#include #define NCPS 8 /* chars per symbol */ -#define NCPW 4 /* chars per word */ -#define ALIGN 4 /* Passed directly to the assembler's .align */ #define HSHSIZ 400 /* hash table size */ #define SWSIZ 230 /* switch table size */ #define CMSIZ 40 /* symbol stack size */ @@ -40,6 +40,7 @@ struct swtab { }; extern int wordsize; +int paramsize; struct hshtab hshtab[HSHSIZ]; int hshused; int eof; @@ -71,9 +72,12 @@ void error(char *s, ...); void printtoken(int tok, FILE *out); struct tnode * block(int op, int value, struct tnode *tr1, struct tnode *tr2); void rcexpr(struct tnode *tr); -void cbranch(struct tnode *t, int lab, int val); +void cbranch(struct tnode *t, int lab); void jump(int lab); -void label(int l); +void fnlabel(int l); +void tonativeaddr(void); +void fromnativeaddr(void); +char* manglename(char* name, char prefix); #define EOFC 0 #define SEMI 1 diff --git a/lang/b/compiler/b0.c b/lang/b/compiler/b0.c index 25aa59c6b..abbeeba32 100644 --- a/lang/b/compiler/b0.c +++ b/lang/b/compiler/b0.c @@ -4,7 +4,6 @@ void extdef(void); struct hshtab * lookup(void); void blkhed(void); void blkend(void); -void retseq(void); void statement(int d); struct tnode * tree(void); void errflush(int o); @@ -15,6 +14,9 @@ int contlab = -1; int brklab = -1; int wordsize = 4; +int bsymb_part; +int code_part; +int string_part; void init(char *s, int val) @@ -76,10 +78,33 @@ main(int argc, char *argv[]) init("return", RETURN); init("default", DEFAULT); init("break", BREAK); + + C_init(wordsize, wordsize); + C_open(NULL); + C_magic(); + C_ms_emx(wordsize, wordsize); + bsymb_part = 0; + string_part = 0; + code_part = C_getid(); + C_beginpart(code_part); while (!eof) { extdef(); blkend(); } + C_endpart(code_part); + C_insertpart(code_part); + + if (string_part) + C_insertpart(string_part); + + C_exp("bsymb_start"); + C_exp("bsymb_end"); + if (bsymb_part) + C_insertpart(bsymb_part); + C_df_dnam("bsymb_end"); + + C_close(); + return nerror != 0; } @@ -192,9 +217,9 @@ getcc(void) cc = 0; cp = (char*) &cval; while ((c = mapch('\'')) >= 0) - if (cc++ < NCPW) + if (cc++ < wordsize) *cp++ = c; - if (cc > NCPW) + if (cc > wordsize) error("Long character constant"); return CON; } @@ -204,17 +229,31 @@ getstr(void) { int c; int i; + char b; + int partid; - printf("\t.align %d\n", wordsize); - printf("L%d:", cval = isn++); - if ((c = mapch('"')) >= 0) - printf("\t.byte %04o", c); - for (i = 2; (c = mapch('"')) >= 0; i++) - printf(",%04o", c); - printf(",04"); + partid = C_getid(); + C_beginpart(partid); + if (string_part) + C_insertpart(string_part); + + cval = isn++; + C_df_dlb(cval); + for (i = 1; (c = mapch('"')) >= 0; i++) { + b = c; + C_con_scon(&b, 1); + } + + b = 04; + C_con_scon(&b, 1); + + b = 0; while ((i++%4) != 0) - printf(",00"); - printf("\n"); + C_con_scon(&b, 1); + + C_endpart(partid); + string_part = partid; + return STRING; } @@ -447,26 +486,30 @@ declist(void) void function(void) { - printf("\tpush\t%%ebp\n"); - printf("\tmov\t%%esp,%%ebp\n"); - declare(ARG); statement(1); - retseq(); + C_ret(0); + C_end(paramsize); } void global(char *s) { - printf("\t.globl\t_%s\n", s); - printf("\t.data\n"); - printf("\t.align %d\n", ALIGN); + C_exa_dnam(manglename(s, 'b')); } void -bsymb(char *s, int und) +bsymb(char *s) { - printf("\t.section .bsymb; .long %s%s; .data\n", und?"_":"", s); + int newpart = C_getid(); + C_beginpart(newpart); + if (bsymb_part != 0) + C_insertpart(bsymb_part); + C_rom_dlb(isn, 0); + C_endpart(newpart); + + bsymb_part = newpart; + C_df_dlb(isn++); } void @@ -474,6 +517,7 @@ extdef(void) { int o, dim, i; char *bs; + char *ms; if ((o = symbol()) == EOFC || o == SEMI) return; @@ -484,7 +528,9 @@ extdef(void) switch(o = symbol()) { case SEMI: - printf("\t.comm\t_%s,%d,%d\n", bs, NCPW, ALIGN); + global(bs); + C_df_dnam(manglename(bs, 'b')); + C_bss_cst(wordsize, 0, 1); goto done; /* init */ @@ -492,8 +538,8 @@ extdef(void) case STRING: global(bs); if (o == STRING) - bsymb(bs,1); - printf("_%s:", bs); + bsymb(bs); + C_df_dnam(manglename(bs, 'b')); pushsym(o); goto init; @@ -507,13 +553,17 @@ extdef(void) goto syntax; global(bs); if ((o=symbol()) == SEMI) { - printf("\t.comm\tL%d,%d,%d\n", isn, dim*NCPW, ALIGN); - bsymb(bs,1); - printf("_%s:\t.long L%d\n", bs, isn++); + bsymb(bs); + C_df_dnam(manglename(bs, 'b')); + C_con_dlb(isn, 0); + C_df_dlb(isn++); + C_bss_cst(wordsize*dim, 0, 1); goto done; } - bsymb(bs,1); - printf("_%s:\t.long 1f\n1:", bs); + bsymb(bs); + C_df_dnam(manglename(bs, 'b')); + C_con_dlb(isn, 0); + C_df_dlb(isn++); pushsym(o); init: @@ -521,15 +571,26 @@ extdef(void) if ((o=symbol()) != CON && o != STRING && o != NAME) goto syntax; if (o == NAME) { - bsymb("1f",0); - printf("1:\t.long _%s\n", bsym->name); - } else - printf("\t.long %s%d\n", o==STRING?"L":"",cval); + bsymb(NULL); + C_con_dnam(manglename(bsym->name, 'b'), 0); + } else { + if (o == STRING) { + bsymb(NULL); + C_con_dlb(cval, 0); + } else + C_con_cst(cval); + } i++; } while ((o=symbol()) == COMMA); dim = (i > dim) ? i : dim; - if (dim - i) - printf("\t.zero %d\n", (dim-i)*NCPW); + if (i == 0) + C_bss_cst((dim-i)*wordsize, 0, 1); + else { + while (dim -i) { + C_con_cst(0); + i++; + } + } if (o == SEMI) goto done; goto syntax; @@ -537,12 +598,15 @@ extdef(void) /* function */ case LPARN: global(bs); - bsymb(bs,1); - printf("_%s:\t.long 1f\n", bs); - printf("\t.text\n\t.align %s\n1:", wordsize); + ms = manglename(bs, 'b'); + bsymb(ms); + C_df_dnam(ms); + ms = manglename(bs, 'i'); + C_con_pnam(ms); + C_inp(ms); + C_pro_narg(ms); function(); done: - printf("\n"); return; case EOFC: @@ -555,26 +619,6 @@ syntax: statement(0); } -void -setstk(int a) -{ - int dif; - - dif = stack-a; - stack = a; - if (dif) - printf("\tsub\t$%d, %%esp\n", dif); -} - -void -defvec(void) -{ - stack -= NCPW; - printf("\tmov\t%%esp,%%eax\n"); - printf("\tshr\t$2,%%eax\n"); - printf("\tpush\t%%eax\n"); -} - void blkhed(void) { @@ -583,27 +627,29 @@ blkhed(void) declist(); stack = al = -wordsize; - pl = wordsize*2; + pl = 0; /* EM parameters start at offset 0. */ while (paraml) { paraml = (bs = paraml)->next; bs->offset = pl; - pl += NCPW; + pl += wordsize; } for (bs = hshtab; bs < &hshtab[HSHSIZ]; bs++) if (bs->name[0]) { if (bs->class == AUTO) { bs->offset = al; if (bs->dim) { - al -= bs->dim*NCPW; - setstk(al); - defvec(); + al -= bs->dim*wordsize; + C_lal(al); + al -= wordsize; + fromnativeaddr(); + C_stl(al); bs->offset = al; } - al -= NCPW; + al -= wordsize; } else if (bs->class == ARG) bs->class = AUTO; } - setstk(al); + paramsize = -al - wordsize; } void @@ -641,33 +687,23 @@ syntax: } void -label(int l) +fnlabel(int l) { - printf("L%d:\n", l); + C_ilb(l); } +/* Jump to "lab", if the expression "t" evaluated to 0. */ void -retseq(void) -{ - printf("\tjmp\tretrn\n"); -} - -/* Jump to "lab", if the expression "t" evaluated to "val". */ -void -cbranch(struct tnode *t, int lab, int val) +cbranch(struct tnode *t, int lab) { rcexpr(t); - if (val == 0) - printf("\ttest\t%%eax,%%eax\n"); - else - printf("\tcmp\t%%eax,$%d\n", val); - printf("\tje\tL%d\n", lab); + C_zeq(lab); } void jump(int lab) { - printf("\tjmp\tL%d\n", lab); + C_bra(lab); } void @@ -676,26 +712,31 @@ pswitch(void) struct swtab *sswp; int dl, swlab; - sswp = swp; if (swp == NULL) swp = swtab; + sswp = swp; swlab = isn++; - printf("\tmov\t$L%d,%%ebx\n", swlab); - printf("\tjmp\tbswitch\n"); + C_lae_dlb(swlab, 0); + C_csb(wordsize); + dl = deflab; deflab = 0; statement(0); - if (!deflab) { - deflab = isn++; - label(deflab); - } - printf("L%d:\n\t.data\nL%d:", brklab, swlab); + if (!deflab) + deflab = brklab; + + C_df_dlb(swlab); + C_con_ilb(deflab); + C_con_cst(swp - sswp); + while (swp > sswp && swp > swtab) { --swp; - printf("\t.long %d,L%d\n", swp->swval, swp->swlab); + C_con_cst(swp->swval); + C_con_ilb(swp->swlab); } - printf("\t.long L%d,0\n", deflab); - printf("\t.text\n"); + + C_df_dlb(brklab); + deflab = dl; swp = sswp; } @@ -742,33 +783,36 @@ stmt: goto semi; case RETURN: - if (pushsym(symbol()) == LPARN) + if (pushsym(symbol()) == LPARN) { rcexpr(pexpr()); - retseq(); + C_ret(wordsize); + } else { + C_ret(0); + } goto semi; case IF: - cbranch(pexpr(), o1=isn++, 0); + cbranch(pexpr(), o1=isn++); statement(0); if ((o = symbol()) == KEYW && cval == ELSE) { jump(o2 = isn++); - label(o1); + fnlabel(o1); statement(0); - label(o2); + fnlabel(o2); return; } pushsym(o); - label(o1); + fnlabel(o1); return; case WHILE: o1 = contlab; o2 = brklab; - label(contlab = isn++); - cbranch(pexpr(), brklab=isn++, 0); + fnlabel(contlab = isn++); + cbranch(pexpr(), brklab=isn++); statement(0); jump(contlab); - label(brklab); + fnlabel(brklab); contlab = o1; brklab = o2; return; @@ -811,7 +855,7 @@ stmt: else { swp->swlab = isn; (swp++)->swval = cval; - label(isn++); + fnlabel(isn++); } goto stmt; @@ -821,7 +865,7 @@ stmt: if ((o = symbol()) != COLON) goto syntax; deflab = isn++; - label(deflab); + fnlabel(deflab); goto stmt; } @@ -837,12 +881,13 @@ stmt: } bsym->class = INTERN; bsym->offset = isn++; - label(bsym->offset); + fnlabel(bsym->offset); goto stmt; } } pushsym(o); rcexpr(tree()); + C_asp(wordsize); goto semi; semi: @@ -948,7 +993,8 @@ advanc: switch (o=symbol()) { case NAME: if (pushsym(symbol()) == LPARN) { /* function */ - bsym->class = EXTERN; + if (bsym->class == 0) + bsym->class = EXTERN; } else if (bsym->class == 0) { error("%s undefined", bsym->name); bsym->class = EXTERN; diff --git a/lang/b/compiler/b1.c b/lang/b/compiler/b1.c index 6295810ab..30ce5c7f7 100644 --- a/lang/b/compiler/b1.c +++ b/lang/b/compiler/b1.c @@ -1,26 +1,51 @@ #include "b.h" /* - * Code generation (x86 assembly) + * Code generation (EM) */ -void -push(void) +static int +shiftsize(void) { - printf("\tpush\t%%eax\n"); + switch (wordsize) { + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 8: return 3; + default: + error("unsupported word size"); + exit(1); + } } void -pop(char *s) +tonativeaddr(void) { - printf("\tpop\t%%%s\n", s); + C_loc(shiftsize()); + C_slu(wordsize); +} + +void +fromnativeaddr(void) +{ + C_loc(shiftsize()); + C_sru(wordsize); +} + +char* +manglename(char* name, char prefix) +{ + static char buffer[NCPS+3]; + buffer[0] = prefix; + buffer[1] = '_'; + strcpy(buffer+2, name); + return buffer; } void binary(struct tnode *tr) { rcexpr(tr->tr1); - push(); rcexpr(tr->tr2); } @@ -33,13 +58,11 @@ pushargs(struct tnode *tr) return 0; if (tr->op == COMMA) { rcexpr(tr->tr2); - push(); stk = pushargs(tr->tr1); - return stk+NCPW; + return stk+wordsize; } rcexpr(tr); - push(); - return NCPW; + return wordsize; } void @@ -56,42 +79,50 @@ lvalexp(struct tnode *tr) case INCAFT: if (tr->tr1->op == STAR) { rcexpr(tr->tr1->tr1); - printf("\tmov\t%%eax,%%ebx\n"); - sprintf(memloc,"(,%%ebx,4)"); + tonativeaddr(); } else { /* NAME, checked in "build" */ bs = (struct hshtab *) tr->tr1->tr1; if (bs->class == EXTERN) - sprintf(memloc,"_%s", bs->name); + C_lae_dnam(manglename(bs->name, 'b'), 0); else if (bs->class == AUTO) - sprintf(memloc,"%d(%%ebp)", bs->offset); + C_lal(bs->offset); else goto classerror; } if (tr->op == DECBEF || tr->op == INCBEF) { - printf("\t%s\t%s\n", tr->op == DECBEF ? "decl" : "incl", - memloc); - printf("\tmov\t%s,%%eax\n", memloc); + C_dup(wordsize); /* ( addr addr -- ) */ + C_loi(wordsize); /* ( addr val -- ) */ + C_adp((tr->op == DECBEF) ? -1 : 1); /* ( addr newval -- ) */ + C_exg(wordsize); /* ( newval addr -- ) */ + C_dup(wordsize*2); /* ( newval addr newval addr -- ) */ + C_sti(wordsize); /* ( newval addr -- ) */ + C_asp(wordsize); /* ( newval -- ) */ } else { - printf("\tmov\t%s,%%eax\n", memloc); - printf("\t%s\t%s\n", tr->op == DECAFT ? "decl" : "incl", - memloc); + C_dup(wordsize); /* ( addr addr -- ) */ + C_loi(wordsize); /* ( addr val -- ) */ + C_exg(wordsize); /* ( val addr -- ) */ + C_dup(wordsize*2); /* ( val addr val addr -- ) */ + C_asp(wordsize); /* ( val addr val -- ) */ + C_adp((tr->op == DECAFT) ? -1 : 1); /* ( val addr newval -- ) */ + C_exg(wordsize); /* ( val newval addr -- ) */ + C_sti(wordsize); /* ( val -- ) */ } return; case ASSIGN: rcexpr(tr->tr2); + C_dup(wordsize); if (tr->tr1->op == STAR) { - push(); rcexpr(tr->tr1->tr1); - pop("ebx"); - printf("\tmov\t%%ebx,(,%%eax,4)\n"); + tonativeaddr(); + C_sti(wordsize); } else { /* NAME */ bs = (struct hshtab *) tr->tr1->tr1; - if (bs->class == EXTERN) - printf("\tmov\t%%eax,_%s\n", bs->name); - else if (bs->class == AUTO) - printf("\tmov\t%%eax,%d(%%ebp)\n", bs->offset); - else + if (bs->class == EXTERN) { + C_ste_dnam(bs->name, 0); + } else if (bs->class == AUTO) { + C_stl(bs->offset); + } else goto classerror; } return; @@ -138,20 +169,20 @@ rcexpr(struct tnode *tr) switch (tr->op) { case CON: - printf("\tmov\t$%d,%%eax\n", tr->value); + C_loc(tr->value); return; case STRING: - printf("\tmov\t$L%d,%%eax\n", tr->value); - printf("\tshr\t$2,%%eax\n"); + C_lae_dlb(tr->value, 0); + fromnativeaddr(); return; case NAME: /* only rvalue */ bs = (struct hshtab *) tr->tr1; if (bs->class == EXTERN) - printf("\tmov\t_%s,%%eax\n", bs->name); + C_loe_dnam(manglename(bs->name, 'b'), 0); else if (bs->class == AUTO) - printf("\tmov\t%d(%%ebp),%%eax\n", bs->offset); + C_lol(bs->offset); else goto classerror; return; @@ -159,89 +190,73 @@ rcexpr(struct tnode *tr) case CALL: stk = pushargs(tr->tr2); rcexpr(tr->tr1); - printf("\tshl\t$2,%%eax\n"); - printf("\tcall\t*%%eax\n"); + tonativeaddr(); + C_cai(); if (stk) - printf("\tadd\t$%d,%%esp\n",stk); + C_asp(stk); + C_lfr(wordsize); return; case AMPER: bs = (struct hshtab *) tr->tr1->tr1; if (bs->class == EXTERN) { - printf("\tmov\t$_%s,%%eax\n", bs->name); - printf("\tshr\t$2,%%eax\n"); + C_lae_dnam(manglename(bs->name, 'b'), 0); } else if (bs->class == AUTO) { - printf("\tlea\t%d(%%ebp),%%eax\n", bs->offset); - printf("\tshr\t$2,%%eax\n"); + C_lal(bs->offset); } else goto classerror; + fromnativeaddr(); return; case STAR: /* only rvalue */ rcexpr(tr->tr1); - printf("\tmov\t(,%%eax,4),%%eax\n"); + tonativeaddr(); + C_loi(wordsize); return; case PLUS: binary(tr); - pop("ebx"); - printf("\tadd\t%%ebx,%%eax\n"); + C_adi(wordsize); return; case MINUS: binary(tr); - printf("\tmov\t%%eax,%%ebx\n"); - pop("eax"); - printf("\tsub\t%%ebx,%%eax\n"); + C_sbi(wordsize); return; case TIMES: binary(tr); - pop("ebx"); - printf("\tmul\t%%ebx\n"); + C_mli(wordsize); return; case DIVIDE: binary(tr); - printf("\tmov\t%%eax,%%ebx\n"); - pop("eax"); - printf("\txor\t%%edx,%%edx\n"); - printf("\tdiv\t%%ebx\n"); + C_dvi(wordsize); return; case MOD: binary(tr); - printf("\tmov\t%%eax,%%ebx\n"); - pop("eax"); - printf("\txor\t%%edx,%%edx\n"); - printf("\tdiv\t%%ebx\n"); - printf("\tmov\t%%edx,%%eax\n"); + C_rmi(wordsize); return; case AND: binary(tr); - pop("ebx"); - printf("\tand\t%%ebx,%%eax\n"); + C_and(wordsize); return; case OR: binary(tr); - pop("ebx"); - printf("\tor\t%%ebx,%%eax\n"); + C_ior(wordsize); return; case LSHIFT: binary(tr); - printf("\tmov\t%%eax,%%ecx\n"); - pop("eax"); - printf("\tshl\t%%cl,%%eax\n"); + C_sli(wordsize); return; case RSHIFT: binary(tr); - printf("\tmov\t%%eax,%%ecx\n"); - pop("eax"); - printf("\tshr\t%%cl,%%eax\n"); + C_sri(wordsize); return; case EQUAL: @@ -251,50 +266,46 @@ rcexpr(struct tnode *tr) case GREAT: case GREATEQ: binary(tr); - pop("ebx"); - printf("\tcmp\t%%eax,%%ebx\n"); + C_cmi(wordsize); switch (tr->op) { case EQUAL: - printf("\tsete\t%%al\n"); + C_teq(); break; case NEQUAL: - printf("\tsetne\t%%al\n"); + C_tne(); break; case LESS: - printf("\tsetl\t%%al\n"); + C_tlt(); break; case LESSEQ: - printf("\tsetle\t%%al\n"); + C_tle(); break; case GREAT: - printf("\tsetg\t%%al\n"); + C_tgt(); break; case GREATEQ: - printf("\tsetge\t%%al\n"); + C_tge(); break; } - printf("\tmovzb\t%%al,%%eax\n"); return; case EXCLA: rcexpr(tr->tr1); - printf("\ttest\t%%eax,%%eax\n"); - printf("\tsete\t%%al\n"); - printf("\tmovzb\t%%al,%%eax\n"); + C_tne(); return; case NEG: rcexpr(tr->tr1); - printf("\tneg\t%%eax\n"); + C_ngi(wordsize); return; case QUEST: - cbranch(tr->tr1, o1=isn++, 0); + cbranch(tr->tr1, o1=isn++); rcexpr(tr->tr2->tr1); jump(o2 = isn++); - label(o1); + fnlabel(o1); rcexpr(tr->tr2->tr2); - label(o2); + fnlabel(o2); return; default: diff --git a/lang/b/compiler/build.lua b/lang/b/compiler/build.lua index 76fac395f..00afc81ef 100644 --- a/lang/b/compiler/build.lua +++ b/lang/b/compiler/build.lua @@ -5,10 +5,15 @@ cprogram { "./*.c", }, deps = { + "./*.h", "modules+headers", - "modules/src/em_code+lib_k", + "modules/src/alloc+lib", + "modules/src/em_code+lib_e", "modules/src/em_data+lib", "modules/src/em_mes+lib", + "modules/src/print+lib", + "modules/src/string+lib", + "modules/src/system+lib", } }