Abstract out the EM reader; skeleton of the tree builder.

This commit is contained in:
David Given 2016-09-18 00:02:16 +02:00
parent 2eee391aef
commit 24380e2a93
7 changed files with 446 additions and 232 deletions

View file

@ -1,19 +1,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include "em.h"
#include "mcg.h"
#include "em_comp.h"
#include "em_pseu.h"
#include "em_mnem.h"
#include "em_flag.h"
#include "em_ptyp.h"
extern char em_pseu[][4];
extern char em_mnem[][4];
extern char em_flag[];
static void fatal(const char* msg, ...)
void fatal(const char* msg, ...)
{
va_list ap;
va_start(ap, msg);
@ -22,226 +10,15 @@ static void fatal(const char* msg, ...)
fprintf(stderr, "\n");
va_end(ap);
exit(1);
abort();
}
static const char* type_to_str(int type)
int main(int argc, char* argv[])
{
switch (type)
{
case EM_MNEM: return "EM_MNEM";
case EM_PSEU: return "EM_PSEU";
case EM_STARTMES: return "EM_STARTMES";
case EM_MESARG: return "EM_MESARG";
case EM_ENDMES: return "EM_ENDMES";
case EM_DEFILB: return "EM_DEFILB";
case EM_DEFDLB: return "EM_DEFDLB";
case EM_DEFDNAM: return "EM_DEFDNAM";
case EM_ERROR: return "EM_ERROR";
case EM_FATAL: return "EM_FATAL";
case EM_EOF: return "EM_EOF";
}
assert(0 && "invalid EM type");
}
static const char* argtype_to_str(int type)
{
if (type == 0) return "...";
if (type == ilb_ptyp) return "ilb";
if (type == nof_ptyp) return "nof";
if (type == sof_ptyp) return "sof";
if (type == cst_ptyp) return "cst";
if (type == pro_ptyp) return "pro";
if (type == str_ptyp) return "str";
if (type == ico_ptyp) return "ico";
if (type == uco_ptyp) return "uco";
if (type == fco_ptyp) return "fco";
return "???";
}
int main(int argc, const char* argv[])
{
struct e_instr insn;
if (!EM_open(argv[1]))
fatal("Couldn't open input file: %s", EM_error);
EM_getinstr(&insn);
printf("; word size = %d\n", EM_wordsize);
printf("; pointer size = %d\n", EM_pointersize);
while (insn.em_type != EM_EOF)
{
printf("%s %s ",
type_to_str(insn.em_type),
argtype_to_str(insn.em_arg.ema_argtype));
switch (insn.em_type)
{
case EM_PSEU:
printf("%s ", em_pseu[insn.em_opcode - sp_fpseu]);
switch (insn.em_opcode)
{
case ps_exp: /* external proc */
case ps_exa: /* external array */
case ps_inp: /* internal proc */
case ps_ina: /* internal array */
switch (insn.em_arg.ema_argtype)
{
case pro_ptyp:
printf("name=%s\n", insn.em_pnam);
break;
case sof_ptyp:
printf("name=%s offset=0x%x\n",
insn.em_dnam,
insn.em_off);
break;
default:
printf("name=?\n");
}
break;
case ps_con: /* .data */
case ps_rom: /* .rom */
printf("size=%d ",
insn.em_size);
switch (insn.em_arg.ema_argtype)
{
case ico_ptyp:
case uco_ptyp:
case fco_ptyp:
case str_ptyp:
printf("val=%s\n", insn.em_string);
break;
default:
printf("val=?\n");
}
break;
case ps_pro: /* procedure start */
printf("\n\n%s %d\n",
insn.em_arg.ema_pnam,
insn.em_arg.ema_szoroff);
break;
case ps_end: /* procedure end */
printf("%d\n\n\n",
insn.em_arg.ema_szoroff);
break;
default:
printf("???\n");
}
break;
case EM_DEFILB:
printf("code label %d\n", insn.em_ilb);
break;
case EM_DEFDLB:
printf("data label %d\n", insn.em_dlb);
break;
case EM_DEFDNAM:
printf("data label %s\n", insn.em_dnam);
break;
case EM_STARTMES:
for (;;)
{
switch (insn.em_arg.ema_argtype)
{
case cst_ptyp:
printf("%d ", insn.em_cst);
break;
case str_ptyp:
printf("%s ", insn.em_string);
break;
default:
printf("(unknown %s) ",
argtype_to_str(insn.em_arg.ema_argtype));
}
EM_getinstr(&insn);
if (insn.em_type == EM_ENDMES)
break;
assert(insn.em_type == EM_MESARG);
}
printf("\n");
break;
case EM_MNEM:
{
int flag = em_flag[insn.em_opcode - sp_fmnem];
printf("%s %c%c%c%c ",
em_mnem[insn.em_opcode - sp_fmnem],
"/CDNFLGWSZOPBR"[flag & EM_PAR],
(flag & FLO_C) ? 'c' : '.',
(flag & FLO_P) ? 'p' : '.',
(flag & FLO_T) ? 't' : '.');
if (flag & EM_PAR)
{
switch (insn.em_argtype)
{
case ilb_ptyp:
printf("ilb ");
break;
case nof_ptyp:
printf("nof ");
break;
case sof_ptyp:
printf("sof ");
break;
case cst_ptyp:
printf("cst 0x%08x ", insn.em_cst);
break;
case pro_ptyp:
printf("pro ");
break;
case str_ptyp:
printf("str ");
break;
case ico_ptyp:
printf("ico ");
break;
case uco_ptyp:
printf("uco ");
break;
case fco_ptyp:
printf("fco ");
break;
default:
printf("???");
}
}
printf("\n");
break;
}
default:
printf("%d\n", insn.em_opcode);
break;
}
EM_getinstr(&insn);
}
parse_em();
EM_close();
return 0;

47
mach/proto/mcg/mcg.h Normal file
View file

@ -0,0 +1,47 @@
#ifndef MCG_H
#define MCG_H
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "em_arith.h"
#include "em_label.h"
#include "em.h"
#include "em_comp.h"
#include "em_pseu.h"
#include "em_mnem.h"
#include "em_flag.h"
#include "em_ptyp.h"
extern char em_pseu[][4];
extern char em_mnem[][4];
extern char em_flag[];
extern void fatal(const char* s, ...);
extern void parse_em(void);
extern void tb_filestart(void);
extern void tb_fileend(void);
extern void tb_symbol(const char* name, bool is_exported, bool is_proc);
extern void tb_dlabel(const char* label);
extern void tb_ilabel(const char* label);
extern void tb_data(const uint8_t* data, size_t size, bool is_ro);
extern void tb_data_offset(const char* label, arith offset, bool is_ro);
extern void tb_bss(size_t size, uint8_t init);
extern void tb_procstart(const char* label, size_t nlocals);
extern void tb_procend(void);
extern void tb_regvar(arith offset, int size, int type, int priority);
extern void tb_insn_simple(int opcode, int flags);
extern void tb_insn_label(int opcode, int flags, const char* label, arith offset);
extern void tb_insn_value(int opcode, int flags, arith value);
#endif
/* vim: set sw=4 ts=4 expandtab : */

295
mach/proto/mcg/parse_em.c Normal file
View file

@ -0,0 +1,295 @@
#include "mcg.h"
static struct e_instr insn;
static const char* type_to_str(int type)
{
switch (type)
{
case EM_MNEM: return "EM_MNEM";
case EM_PSEU: return "EM_PSEU";
case EM_STARTMES: return "EM_STARTMES";
case EM_MESARG: return "EM_MESARG";
case EM_ENDMES: return "EM_ENDMES";
case EM_DEFILB: return "EM_DEFILB";
case EM_DEFDLB: return "EM_DEFDLB";
case EM_DEFDNAM: return "EM_DEFDNAM";
case EM_ERROR: return "EM_ERROR";
case EM_FATAL: return "EM_FATAL";
case EM_EOF: return "EM_EOF";
}
assert(0 && "invalid EM type");
}
static const char* argtype_to_str(int type)
{
if (type == 0) return "...";
if (type == ilb_ptyp) return "ilb";
if (type == nof_ptyp) return "nof";
if (type == sof_ptyp) return "sof";
if (type == cst_ptyp) return "cst";
if (type == pro_ptyp) return "pro";
if (type == str_ptyp) return "str";
if (type == ico_ptyp) return "ico";
if (type == uco_ptyp) return "uco";
if (type == fco_ptyp) return "fco";
return "???";
}
static void unknown_type(const char* s)
{
fatal("%s with unknown type '%s'",
s,
argtype_to_str(insn.em_arg.ema_argtype));
}
static const uint8_t* arith_to_bytes(arith a, size_t sz)
{
uint8_t* p = malloc(8);
switch (sz)
{
case 1: *(uint8_t*)p = a; break;
case 2: *(uint16_t*)p = a; break;
case 4: *(uint32_t*)p = a; break;
case 8: *(uint64_t*)p = a; break;
default:
fatal("bad constant size '%d'", sz);
}
return p;
}
static const char* ilabel_to_str(label l)
{
char s[16];
sprintf(s, "__I%d", l);
return strdup(s);
}
static const char* dlabel_to_str(label l)
{
char s[16];
sprintf(s, ".%d", l);
return strdup(s);
}
static void parse_pseu(void)
{
switch (insn.em_opcode)
{
case ps_exp: /* external proc */
case ps_exa: /* external array */
case ps_inp: /* internal proc */
case ps_ina: /* internal array */
{
bool export = (insn.em_opcode == ps_exp) || (insn.em_opcode == ps_exa);
bool proc = (insn.em_opcode == ps_exp) || (insn.em_opcode == ps_inp);
switch (insn.em_arg.ema_argtype)
{
case pro_ptyp:
tb_symbol(strdup(insn.em_pnam), export, proc);
break;
case sof_ptyp:
assert(insn.em_off == 0);
tb_symbol(strdup(insn.em_dnam), export, proc);
break;
case nof_ptyp:
assert(insn.em_off == 0);
tb_symbol(dlabel_to_str(insn.em_dlb), export, proc);
break;
default:
unknown_type("exp, exa, inp, ina");
}
break;
}
case ps_con: /* .data */
case ps_rom: /* .rom */
{
bool ro = (insn.em_opcode == ps_rom);
switch (insn.em_arg.ema_argtype)
{
case ico_ptyp:
case uco_ptyp:
{
arith val = atol(insn.em_string);
tb_data(arith_to_bytes(val, insn.em_size), insn.em_size, ro);
break;
}
case str_ptyp:
tb_data(strdup(insn.em_string), insn.em_size, ro);
break;
case cst_ptyp:
tb_data(arith_to_bytes(insn.em_cst, EM_wordsize), EM_wordsize, ro);
break;
case nof_ptyp:
tb_data_offset(dlabel_to_str(insn.em_dlb), insn.em_off, ro);
break;
case ilb_ptyp:
tb_data_offset(ilabel_to_str(insn.em_ilb), 0, ro);
break;
default:
unknown_type("con, rom");
}
break;
}
case ps_bss:
{
switch (insn.em_arg.ema_argtype)
{
case cst_ptyp:
tb_bss(EM_bsssize, EM_bssinit);
break;
default:
unknown_type("bss");
}
break;
}
case ps_pro: /* procedure start */
if (insn.em_nlocals == -1)
fatal("procedures with unspecified number of locals are not supported yet");
tb_procstart(strdup(insn.em_pnam), insn.em_nlocals);
break;
case ps_end: /* procedure end */
tb_procend();
break;
default:
fatal("unknown pseudo with opcode %d\n", insn.em_opcode);
}
}
static arith mes_get_cst(void)
{
EM_getinstr(&insn);
if (insn.em_type != EM_MESARG)
fatal("malformed MES");
return insn.em_cst;
}
static void parse_mes(void)
{
assert(insn.em_arg.ema_argtype == cst_ptyp);
switch (insn.em_cst)
{
case 0: /* error */
fatal("MES 0 received (explicit halt)");
case 3: /* register variable */
{
arith offset = mes_get_cst();
int size = mes_get_cst();
int type = mes_get_cst();
int priority = mes_get_cst();
tb_regvar(offset, size, type, priority);
break;
}
}
while ((insn.em_type == EM_STARTMES) || (insn.em_type == EM_MESARG))
EM_getinstr(&insn);
if (insn.em_type != EM_ENDMES)
fatal("malformed MES");
}
void parse_em(void)
{
EM_getinstr(&insn);
tb_filestart();
while (insn.em_type != EM_EOF)
{
switch (insn.em_type)
{
case EM_PSEU:
parse_pseu();
break;
case EM_DEFILB:
tb_ilabel(ilabel_to_str(insn.em_ilb));
break;
case EM_DEFDLB:
tb_dlabel(dlabel_to_str(insn.em_dlb));
break;
case EM_DEFDNAM:
tb_dlabel(strdup(insn.em_dnam));
break;
case EM_STARTMES:
parse_mes();
break;
case EM_MNEM:
{
int flags = em_flag[insn.em_opcode - sp_fmnem];
if (flags & EM_PAR)
{
switch (insn.em_argtype)
{
case ilb_ptyp:
tb_insn_label(insn.em_opcode, flags,
ilabel_to_str(insn.em_ilb), 0);
break;
case nof_ptyp:
tb_insn_label(insn.em_opcode, flags,
dlabel_to_str(insn.em_dlb), insn.em_off);
break;
case sof_ptyp:
tb_insn_label(insn.em_opcode, flags,
strdup(insn.em_dnam), insn.em_off);
break;
case pro_ptyp:
tb_insn_label(insn.em_opcode, flags,
strdup(insn.em_pnam), 0);
break;
case cst_ptyp:
tb_insn_value(insn.em_opcode, flags,
insn.em_cst);
break;
default:
unknown_type("instruction");
}
}
else
tb_insn_simple(insn.em_opcode, flags);
break;
}
default:
fatal("unrecognised instruction type '%d'", insn.em_type);
}
EM_getinstr(&insn);
}
tb_fileend();
}
/* vim: set sw=4 ts=4 expandtab : */

View file

@ -44,9 +44,9 @@ END {
print "";
}
print "const struct stackop* stackops[] = {";
print "const struct stackop* const stackops[] = {";
for (i=0; i<count; i++)
print "\t&so_" opcode[i] ","
print "\tso_" opcode[i] ","
print "};"
}

View file

@ -6,7 +6,7 @@ struct stackop {
char type : 7;
};
extern const struct stackop* stackops[];
extern const struct stackop* const stackops[];
#endif

View file

@ -0,0 +1,94 @@
#include "mcg.h"
void tb_filestart(void)
{
}
void tb_fileend(void)
{
}
void tb_symbol(const char* name, bool is_exported, bool is_proc)
{
printf("; symbol name=%s, exported=%s, is_proc=%s\n",
name,
is_exported ? "yes" : "no",
is_proc ? "yes" : "no");
}
void tb_dlabel(const char* label)
{
printf("; dlabel name=%s\n", label);
}
void tb_ilabel(const char* label)
{
printf("; ilabel name=%s\n", label);
}
void tb_data(const uint8_t* data, size_t size, bool is_ro)
{
printf("; data size=%d ro=%s\n",
size,
is_ro ? "yes" : "no");
}
void tb_data_offset(const char* label, arith offset, bool is_ro)
{
printf("; data label=%s offset=%d ro=%s\n",
label, offset,
is_ro ? "yes" : "no");
}
void tb_bss(size_t size, uint8_t init)
{
printf("; bss size=%d init=0x%x\n",
size, init);
}
void tb_procstart(const char* label, size_t nlocals)
{
printf("; proc name=%s nlocals=%d\n", label, nlocals);
}
void tb_procend(void)
{
printf("; endproc\n");
}
void tb_regvar(arith offset, int size, int type, int priority)
{
printf("; regvar offset=%d size=%d type=%d priority=%d\n",
offset, size, type, priority);
}
static void printinsn(int opcode, int flags)
{
printf("; insn %s %c%c%c%c ",
em_mnem[opcode - sp_fmnem],
"/CDNFLGWSZOPBR"[flags & EM_PAR],
(flags & FLO_C) ? 'c' : '.',
(flags & FLO_P) ? 'p' : '.',
(flags & FLO_T) ? 't' : '.');
}
void tb_insn_simple(int opcode, int flags)
{
printinsn(opcode, flags);
printf("\n");
}
void tb_insn_label(int opcode, int flags, const char* label, arith offset)
{
printinsn(opcode, flags);
printf("label=%s offset=%d\n", label, offset);
}
void tb_insn_value(int opcode, int flags, arith value)
{
printinsn(opcode, flags);
printf("value=%d\n", value);
}
/* vim: set sw=4 ts=4 expandtab : */

View file

@ -4,4 +4,5 @@
*/
/* $Id$ */
#define label unsigned int
typedef unsigned int label;