From 24380e2a933c450916bd4667e5dd28ff32e411eb Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 18 Sep 2016 00:02:16 +0200 Subject: [PATCH] Abstract out the EM reader; skeleton of the tree builder. --- mach/proto/mcg/main.c | 233 +-------------------------- mach/proto/mcg/mcg.h | 47 ++++++ mach/proto/mcg/parse_em.c | 295 +++++++++++++++++++++++++++++++++++ mach/proto/mcg/push_pop.awk | 4 +- mach/proto/mcg/push_pop.h | 2 +- mach/proto/mcg/treebuilder.c | 94 +++++++++++ modules/h/em_label.h | 3 +- 7 files changed, 446 insertions(+), 232 deletions(-) create mode 100644 mach/proto/mcg/mcg.h create mode 100644 mach/proto/mcg/parse_em.c create mode 100644 mach/proto/mcg/treebuilder.c diff --git a/mach/proto/mcg/main.c b/mach/proto/mcg/main.c index 2c04bca56..035259d54 100644 --- a/mach/proto/mcg/main.c +++ b/mach/proto/mcg/main.c @@ -1,19 +1,7 @@ -#include -#include -#include -#include -#include "em.h" +#include "mcg.h" #include "em_comp.h" -#include "em_pseu.h" -#include "em_mnem.h" -#include "em_flag.h" -#include "em_ptyp.h" -extern char em_pseu[][4]; -extern char em_mnem[][4]; -extern char em_flag[]; - -static void fatal(const char* msg, ...) +void fatal(const char* msg, ...) { va_list ap; va_start(ap, msg); @@ -22,226 +10,15 @@ static void fatal(const char* msg, ...) fprintf(stderr, "\n"); va_end(ap); - exit(1); + abort(); } -static const char* type_to_str(int type) +int main(int argc, char* argv[]) { - switch (type) - { - case EM_MNEM: return "EM_MNEM"; - case EM_PSEU: return "EM_PSEU"; - case EM_STARTMES: return "EM_STARTMES"; - case EM_MESARG: return "EM_MESARG"; - case EM_ENDMES: return "EM_ENDMES"; - case EM_DEFILB: return "EM_DEFILB"; - case EM_DEFDLB: return "EM_DEFDLB"; - case EM_DEFDNAM: return "EM_DEFDNAM"; - case EM_ERROR: return "EM_ERROR"; - case EM_FATAL: return "EM_FATAL"; - case EM_EOF: return "EM_EOF"; - } - - assert(0 && "invalid EM type"); -} - -static const char* argtype_to_str(int type) -{ - if (type == 0) return "..."; - if (type == ilb_ptyp) return "ilb"; - if (type == nof_ptyp) return "nof"; - if (type == sof_ptyp) return "sof"; - if (type == cst_ptyp) return "cst"; - if (type == pro_ptyp) return "pro"; - if (type == str_ptyp) return "str"; - if (type == ico_ptyp) return "ico"; - if (type == uco_ptyp) return "uco"; - if (type == fco_ptyp) return "fco"; - return "???"; -} - -int main(int argc, const char* argv[]) -{ - struct e_instr insn; - if (!EM_open(argv[1])) fatal("Couldn't open input file: %s", EM_error); - EM_getinstr(&insn); - printf("; word size = %d\n", EM_wordsize); - printf("; pointer size = %d\n", EM_pointersize); - - while (insn.em_type != EM_EOF) - { - printf("%s %s ", - type_to_str(insn.em_type), - argtype_to_str(insn.em_arg.ema_argtype)); - - switch (insn.em_type) - { - case EM_PSEU: - printf("%s ", em_pseu[insn.em_opcode - sp_fpseu]); - switch (insn.em_opcode) - { - case ps_exp: /* external proc */ - case ps_exa: /* external array */ - case ps_inp: /* internal proc */ - case ps_ina: /* internal array */ - switch (insn.em_arg.ema_argtype) - { - case pro_ptyp: - printf("name=%s\n", insn.em_pnam); - break; - - case sof_ptyp: - printf("name=%s offset=0x%x\n", - insn.em_dnam, - insn.em_off); - break; - - default: - printf("name=?\n"); - } - break; - - case ps_con: /* .data */ - case ps_rom: /* .rom */ - printf("size=%d ", - insn.em_size); - - switch (insn.em_arg.ema_argtype) - { - case ico_ptyp: - case uco_ptyp: - case fco_ptyp: - case str_ptyp: - printf("val=%s\n", insn.em_string); - break; - - default: - printf("val=?\n"); - } - break; - - case ps_pro: /* procedure start */ - printf("\n\n%s %d\n", - insn.em_arg.ema_pnam, - insn.em_arg.ema_szoroff); - break; - - case ps_end: /* procedure end */ - printf("%d\n\n\n", - insn.em_arg.ema_szoroff); - break; - - default: - printf("???\n"); - } - break; - - case EM_DEFILB: - printf("code label %d\n", insn.em_ilb); - break; - - case EM_DEFDLB: - printf("data label %d\n", insn.em_dlb); - break; - - case EM_DEFDNAM: - printf("data label %s\n", insn.em_dnam); - break; - - case EM_STARTMES: - for (;;) - { - switch (insn.em_arg.ema_argtype) - { - case cst_ptyp: - printf("%d ", insn.em_cst); - break; - - case str_ptyp: - printf("%s ", insn.em_string); - break; - - default: - printf("(unknown %s) ", - argtype_to_str(insn.em_arg.ema_argtype)); - } - - EM_getinstr(&insn); - if (insn.em_type == EM_ENDMES) - break; - assert(insn.em_type == EM_MESARG); - } - printf("\n"); - break; - - case EM_MNEM: - { - int flag = em_flag[insn.em_opcode - sp_fmnem]; - printf("%s %c%c%c%c ", - em_mnem[insn.em_opcode - sp_fmnem], - "/CDNFLGWSZOPBR"[flag & EM_PAR], - (flag & FLO_C) ? 'c' : '.', - (flag & FLO_P) ? 'p' : '.', - (flag & FLO_T) ? 't' : '.'); - - if (flag & EM_PAR) - { - switch (insn.em_argtype) - { - case ilb_ptyp: - printf("ilb "); - break; - - case nof_ptyp: - printf("nof "); - break; - - case sof_ptyp: - printf("sof "); - break; - - case cst_ptyp: - printf("cst 0x%08x ", insn.em_cst); - break; - - case pro_ptyp: - printf("pro "); - break; - - case str_ptyp: - printf("str "); - break; - - case ico_ptyp: - printf("ico "); - break; - - case uco_ptyp: - printf("uco "); - break; - - case fco_ptyp: - printf("fco "); - break; - - default: - printf("???"); - } - } - printf("\n"); - break; - } - - default: - printf("%d\n", insn.em_opcode); - break; - } - - EM_getinstr(&insn); - } + parse_em(); EM_close(); return 0; diff --git a/mach/proto/mcg/mcg.h b/mach/proto/mcg/mcg.h new file mode 100644 index 000000000..e008249ed --- /dev/null +++ b/mach/proto/mcg/mcg.h @@ -0,0 +1,47 @@ +#ifndef MCG_H +#define MCG_H + +#include +#include +#include +#include +#include +#include +#include +#include "em_arith.h" +#include "em_label.h" +#include "em.h" +#include "em_comp.h" +#include "em_pseu.h" +#include "em_mnem.h" +#include "em_flag.h" +#include "em_ptyp.h" + +extern char em_pseu[][4]; +extern char em_mnem[][4]; +extern char em_flag[]; + + +extern void fatal(const char* s, ...); + +extern void parse_em(void); + +extern void tb_filestart(void); +extern void tb_fileend(void); +extern void tb_symbol(const char* name, bool is_exported, bool is_proc); +extern void tb_dlabel(const char* label); +extern void tb_ilabel(const char* label); +extern void tb_data(const uint8_t* data, size_t size, bool is_ro); +extern void tb_data_offset(const char* label, arith offset, bool is_ro); +extern void tb_bss(size_t size, uint8_t init); +extern void tb_procstart(const char* label, size_t nlocals); +extern void tb_procend(void); +extern void tb_regvar(arith offset, int size, int type, int priority); + +extern void tb_insn_simple(int opcode, int flags); +extern void tb_insn_label(int opcode, int flags, const char* label, arith offset); +extern void tb_insn_value(int opcode, int flags, arith value); + +#endif + +/* vim: set sw=4 ts=4 expandtab : */ diff --git a/mach/proto/mcg/parse_em.c b/mach/proto/mcg/parse_em.c new file mode 100644 index 000000000..11aeb5c17 --- /dev/null +++ b/mach/proto/mcg/parse_em.c @@ -0,0 +1,295 @@ +#include "mcg.h" + +static struct e_instr insn; + +static const char* type_to_str(int type) +{ + switch (type) + { + case EM_MNEM: return "EM_MNEM"; + case EM_PSEU: return "EM_PSEU"; + case EM_STARTMES: return "EM_STARTMES"; + case EM_MESARG: return "EM_MESARG"; + case EM_ENDMES: return "EM_ENDMES"; + case EM_DEFILB: return "EM_DEFILB"; + case EM_DEFDLB: return "EM_DEFDLB"; + case EM_DEFDNAM: return "EM_DEFDNAM"; + case EM_ERROR: return "EM_ERROR"; + case EM_FATAL: return "EM_FATAL"; + case EM_EOF: return "EM_EOF"; + } + + assert(0 && "invalid EM type"); +} + +static const char* argtype_to_str(int type) +{ + if (type == 0) return "..."; + if (type == ilb_ptyp) return "ilb"; + if (type == nof_ptyp) return "nof"; + if (type == sof_ptyp) return "sof"; + if (type == cst_ptyp) return "cst"; + if (type == pro_ptyp) return "pro"; + if (type == str_ptyp) return "str"; + if (type == ico_ptyp) return "ico"; + if (type == uco_ptyp) return "uco"; + if (type == fco_ptyp) return "fco"; + return "???"; +} + +static void unknown_type(const char* s) +{ + fatal("%s with unknown type '%s'", + s, + argtype_to_str(insn.em_arg.ema_argtype)); +} + +static const uint8_t* arith_to_bytes(arith a, size_t sz) +{ + uint8_t* p = malloc(8); + + switch (sz) + { + case 1: *(uint8_t*)p = a; break; + case 2: *(uint16_t*)p = a; break; + case 4: *(uint32_t*)p = a; break; + case 8: *(uint64_t*)p = a; break; + default: + fatal("bad constant size '%d'", sz); + } + + return p; +} + +static const char* ilabel_to_str(label l) +{ + char s[16]; + sprintf(s, "__I%d", l); + return strdup(s); +} + +static const char* dlabel_to_str(label l) +{ + char s[16]; + sprintf(s, ".%d", l); + return strdup(s); +} + +static void parse_pseu(void) +{ + switch (insn.em_opcode) + { + case ps_exp: /* external proc */ + case ps_exa: /* external array */ + case ps_inp: /* internal proc */ + case ps_ina: /* internal array */ + { + bool export = (insn.em_opcode == ps_exp) || (insn.em_opcode == ps_exa); + bool proc = (insn.em_opcode == ps_exp) || (insn.em_opcode == ps_inp); + + switch (insn.em_arg.ema_argtype) + { + case pro_ptyp: + tb_symbol(strdup(insn.em_pnam), export, proc); + break; + + case sof_ptyp: + assert(insn.em_off == 0); + tb_symbol(strdup(insn.em_dnam), export, proc); + break; + + case nof_ptyp: + assert(insn.em_off == 0); + tb_symbol(dlabel_to_str(insn.em_dlb), export, proc); + break; + + default: + unknown_type("exp, exa, inp, ina"); + } + break; + } + + case ps_con: /* .data */ + case ps_rom: /* .rom */ + { + bool ro = (insn.em_opcode == ps_rom); + + switch (insn.em_arg.ema_argtype) + { + case ico_ptyp: + case uco_ptyp: + { + arith val = atol(insn.em_string); + tb_data(arith_to_bytes(val, insn.em_size), insn.em_size, ro); + break; + } + + case str_ptyp: + tb_data(strdup(insn.em_string), insn.em_size, ro); + break; + + case cst_ptyp: + tb_data(arith_to_bytes(insn.em_cst, EM_wordsize), EM_wordsize, ro); + break; + + case nof_ptyp: + tb_data_offset(dlabel_to_str(insn.em_dlb), insn.em_off, ro); + break; + + case ilb_ptyp: + tb_data_offset(ilabel_to_str(insn.em_ilb), 0, ro); + break; + + default: + unknown_type("con, rom"); + } + break; + } + + case ps_bss: + { + switch (insn.em_arg.ema_argtype) + { + case cst_ptyp: + tb_bss(EM_bsssize, EM_bssinit); + break; + + default: + unknown_type("bss"); + } + break; + } + + case ps_pro: /* procedure start */ + if (insn.em_nlocals == -1) + fatal("procedures with unspecified number of locals are not supported yet"); + + tb_procstart(strdup(insn.em_pnam), insn.em_nlocals); + break; + + case ps_end: /* procedure end */ + tb_procend(); + break; + + default: + fatal("unknown pseudo with opcode %d\n", insn.em_opcode); + } +} + +static arith mes_get_cst(void) +{ + EM_getinstr(&insn); + if (insn.em_type != EM_MESARG) + fatal("malformed MES"); + return insn.em_cst; +} + +static void parse_mes(void) +{ + assert(insn.em_arg.ema_argtype == cst_ptyp); + switch (insn.em_cst) + { + case 0: /* error */ + fatal("MES 0 received (explicit halt)"); + + case 3: /* register variable */ + { + arith offset = mes_get_cst(); + int size = mes_get_cst(); + int type = mes_get_cst(); + int priority = mes_get_cst(); + tb_regvar(offset, size, type, priority); + break; + } + } + + while ((insn.em_type == EM_STARTMES) || (insn.em_type == EM_MESARG)) + EM_getinstr(&insn); + + if (insn.em_type != EM_ENDMES) + fatal("malformed MES"); +} + +void parse_em(void) +{ + EM_getinstr(&insn); + tb_filestart(); + + while (insn.em_type != EM_EOF) + { + switch (insn.em_type) + { + case EM_PSEU: + parse_pseu(); + break; + + case EM_DEFILB: + tb_ilabel(ilabel_to_str(insn.em_ilb)); + break; + + case EM_DEFDLB: + tb_dlabel(dlabel_to_str(insn.em_dlb)); + break; + + case EM_DEFDNAM: + tb_dlabel(strdup(insn.em_dnam)); + break; + + case EM_STARTMES: + parse_mes(); + break; + + case EM_MNEM: + { + int flags = em_flag[insn.em_opcode - sp_fmnem]; + + if (flags & EM_PAR) + { + switch (insn.em_argtype) + { + case ilb_ptyp: + tb_insn_label(insn.em_opcode, flags, + ilabel_to_str(insn.em_ilb), 0); + break; + + case nof_ptyp: + tb_insn_label(insn.em_opcode, flags, + dlabel_to_str(insn.em_dlb), insn.em_off); + break; + + case sof_ptyp: + tb_insn_label(insn.em_opcode, flags, + strdup(insn.em_dnam), insn.em_off); + break; + + case pro_ptyp: + tb_insn_label(insn.em_opcode, flags, + strdup(insn.em_pnam), 0); + break; + + case cst_ptyp: + tb_insn_value(insn.em_opcode, flags, + insn.em_cst); + break; + + default: + unknown_type("instruction"); + } + } + else + tb_insn_simple(insn.em_opcode, flags); + + break; + } + + default: + fatal("unrecognised instruction type '%d'", insn.em_type); + } + + EM_getinstr(&insn); + } + + tb_fileend(); +} + +/* vim: set sw=4 ts=4 expandtab : */ diff --git a/mach/proto/mcg/push_pop.awk b/mach/proto/mcg/push_pop.awk index 125e6a807..83560912e 100644 --- a/mach/proto/mcg/push_pop.awk +++ b/mach/proto/mcg/push_pop.awk @@ -44,9 +44,9 @@ END { print ""; } - print "const struct stackop* stackops[] = {"; + print "const struct stackop* const stackops[] = {"; for (i=0; i