Merge pull request #102 from davidgiven/dtrg-powerpc

Add a PowerPC simulator for running the tests.
2018-06-17 11:45:26 +02:00 · 2018-06-17 11:45:26 +02:00 · ddca7276c0
parent 39a49265ce 89e8956bb2
commit ddca7276c0
8 changed files with 968 additions and 1 deletions
--- a/plat/linuxppc/emu/README.md
+++ b/plat/linuxppc/emu/README.md
@ -0,0 +1,28 @@
+This is just a naive domestic PowerPC simulator, but I think you'll be amused
+by its presumption.
+
+The simulator implements just enough of the instruction set to make the tests
+pass. Certain features aren't supported at all (and an effort has been made
+to detect this and error out). The FPU is crudely approximated using the
+native floating-point support, doesn't support reading and writing FPSCR, and
+will almost certainly produce incorrect results. Plus, there are bugs. It's
+also likely to be very, very slow.
+
+However, it should be easily extensible and the emulator core is only about
+500 lines of code.
+
+Instructions are defined in `instructions.dat`; `mkdispatcher.lua` reads
+these in and generates the instruction decoder. `emu.c` contains the main
+emulator core. `main.c` contains the application front end and the incredibly
+crude syscall interface.
+
+TODO:
+
+  - overflow bit support (instructions that try to set OV error out)
+  - mtcrf
+  - read string / write string
+  - factor out the ELF loader, and linux68k/emu uses it too
+  - floating point condition bits
+  - bit-for-bit FPU emulation, although this looks like a huge amount of work
+
+It was written from scratch for the ACK by me, David Given.
--- a/plat/linuxppc/emu/build.lua
+++ b/plat/linuxppc/emu/build.lua
@ -0,0 +1,28 @@
+normalrule {
+	name = "dispatcher",
+	ins = {
+		"./mkdispatcher.lua",
+		"./instructions.dat"
+	},
+	outleaves = {
+		"dispatcher.h"
+	},
+	commands = {
+		"$(LUA) %{ins[1]} < %{ins[2]} > %{outs}"
+	}
+}
+
+clibrary {
+	name = "dispatcher_lib",
+	srcs = {},
+	hdrs = { "+dispatcher" }
+}
+
+cprogram {
+	name = "emuppc",
+	srcs = { "./*.c" },
+	deps = {
+		"+dispatcher_lib"
+	}
+}
+
--- a/plat/linuxppc/emu/emu.c
+++ b/plat/linuxppc/emu/emu.c
@ -0,0 +1,328 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <strings.h>
+#include <byteswap.h>
+#include <math.h>
+#include "emu.h"
+
+#define BO4 (1<<0)
+#define BO3 (1<<1)
+#define BO2 (1<<2)
+#define BO1 (1<<3)
+#define BO0 (1<<4)
+
+#define XER_SO (1<<31)
+#define XER_OV (1<<30)
+#define XER_CA (1<<29)
+
+cpu_t cpu;
+
+static inline bool carry(void)
+{
+	fatal("carry() not supported yet");
+}
+
+#define swb16(x) bswap_16(x)
+#define swb32(x) bswap_32(x)
+
+/* Returns the state of a carry flag after a three-way add. */
+static inline bool carry_3(uint32_t a, uint32_t b, uint32_t c)
+{
+	if ((a+b) < a)
+		return true;
+	if ((a+b+c) < c)
+		return true;
+	return false;
+}
+
+static inline uint32_t reg(uint8_t n)
+{
+	return cpu.gpr[n];
+}
+
+static inline uint32_t reg0(uint8_t n)
+{
+	if (n == 0)
+		return 0;
+	return cpu.gpr[n];
+}
+
+/* Double to bytes */
+static inline uint64_t d2b(double n)
+{
+	return *(uint64_t*)&n;
+}
+
+/* Float to bytes */
+static inline uint32_t f2b(float n)
+{
+	return *(uint32_t*)&n;
+}
+
+/* Bytes to double */
+static inline double b2d(uint64_t n)
+{
+	return *(double*)&n;
+}
+
+/* Bytes to float */
+static inline float b2f(uint32_t n)
+{
+	return *(float*)&n;
+}
+
+static inline double fpr(uint8_t n)
+{
+	return b2d(cpu.fpr[n]);
+}
+
+static inline uint32_t ext8(int8_t n)
+{
+	return (n << 24) >> 24;
+}
+
+static inline uint32_t ext16(int16_t n)
+{
+	return (n << 16) >> 16;
+}
+
+static inline uint32_t ext26(int32_t n)
+{
+	return (n << 6) >> 6;
+}
+
+static bool getcr(uint8_t bit)
+{
+	bit = 31 - bit; /* note PowerPC bit numbering */
+	return cpu.cr & (1<<bit);
+}
+
+static void setcr(uint8_t bit, bool value)
+{
+	bit = 31 - bit; /* note PowerPC bit numbering */
+	cpu.cr = cpu.cr & ~(1<<bit) | (value<<bit);
+}
+
+static void setcr0(bool setcr0, uint32_t value)
+{
+	if (setcr0)
+	{
+		setcr(0, (int32_t)value < 0);
+		setcr(1, (int32_t)value > 0);
+		setcr(2, value == 0);
+		setcr(3, cpu.xer & (1<<31));
+	}
+}
+
+static void setcr1(bool setcr1, uint64_t value)
+{
+	if (setcr1)
+		fatal("setcr1 not implemented yet");
+}
+
+static void mcrf(uint8_t destfield, uint8_t srcfield)
+{
+	fatal("mcrf not supported yet");
+}
+
+static void branch(uint8_t bo, uint8_t bi, uint32_t dest, bool a_bit, bool l_bit)
+{
+	bool bo0 = bo & BO0;
+	bool bo1 = bo & BO1;
+	bool bo2 = bo & BO2;
+	bool bo3 = bo & BO3;
+	bool ctr_ok;
+	bool cond_ok;
+
+	if (!bo2)
+		cpu.ctr--;
+	ctr_ok = bo2 || (!!cpu.ctr ^ bo3);
+	cond_ok = bo0 || (!!(cpu.cr & (1<<(31-bi))) == bo1);
+	if (ctr_ok && cond_ok)
+	{
+		if (a_bit)
+			cpu.nia = dest;
+		else
+			cpu.nia = dest + cpu.cia;
+	}
+	if (l_bit)
+		cpu.lr = cpu.cia + 4;
+}
+
+static void read_multiple(uint32_t address, uint8_t reg)
+{
+	while (reg != 32)
+	{
+		cpu.gpr[reg] = read_long(address);
+		reg++;
+		address += 4;
+	}
+}
+
+static void write_multiple(uint32_t address, uint8_t reg)
+{
+	while (reg != 32)
+	{
+		write_long(address, cpu.gpr[reg]);
+		reg++;
+		address += 4;
+	}
+}
+
+static void read_string(uint32_t address, uint8_t reg, uint8_t bytes)
+{
+	fatal("read_string not supported yet");
+}
+
+static void write_string(uint32_t address, uint8_t reg, uint8_t bytes)
+{
+	fatal("write_string not supported yet");
+}
+
+static uint32_t addo(uint32_t a, uint32_t b, uint32_t c, bool set_o, bool set_c)
+{
+	if (set_o)
+		fatal("can't use O bit in add yet");
+	
+	if (set_c)
+	{
+		cpu.xer = cpu.xer & ~XER_CA;
+		if (carry_3(a, b, c))
+			cpu.xer = cpu.xer | XER_CA;
+	}
+
+	return a + b + c;
+}
+
+static uint32_t mulo(uint32_t a, uint32_t b, bool set_o)
+{
+	if (set_o)
+		fatal("can't use O bit in mul yet");
+	
+	return a * b;
+}
+
+static int32_t divo(int32_t a, int32_t b, bool set_o)
+{
+	if (set_o)
+		fatal("can't use O bit in div yet");
+	
+	if (b == 0)
+		return 0;
+	return a / b;
+}
+
+static uint32_t divuo(uint32_t a, uint32_t b, bool set_o)
+{
+	if (set_o)
+		fatal("can't use O bit in divu yet");
+	
+	if (b == 0)
+		return 0;
+	return a / b;
+}
+
+static void compares(int32_t a, int32_t b, uint8_t field)
+{
+	uint8_t bit = field*4;
+	setcr(bit+0, a<b);
+	setcr(bit+1, a>b);
+	setcr(bit+2, a==b);
+	setcr(bit+3, cpu.xer & (1<<31));
+}
+
+static void compareu(uint32_t a, uint32_t b, uint8_t field)
+{
+	uint8_t bit = field*4;
+	setcr(bit+0, a<b);
+	setcr(bit+1, a>b);
+	setcr(bit+2, a==b);
+	setcr(bit+3, cpu.xer & (1<<31));
+}
+
+static void comparef(double a, double b, uint8_t field)
+{
+	uint8_t c;
+	if (isnan(a) || isnan(b))
+		c = 0x1;
+	else if (a < b)
+		c = 0x8;
+	else if (a > b)
+		c = 0x4;
+	else
+		c = 0x2;
+	
+	uint8_t bit = 28 - field*4; /* note PowerPC bit numbering */
+	cpu.cr = cpu.cr & ~(0xf<<bit) | (c<<bit);
+
+	/* TODO: ordered/unordered, FSPCR, etc. */
+}
+
+static uint32_t cntlzw(uint32_t source)
+{
+	return 32 - ffs(source);
+}
+
+static uint32_t popcntb(uint32_t source)
+{
+	fatal("popcntb not supported");
+}
+
+static uint32_t rotate(uint32_t i, uint32_t shift)
+{
+	return (i << shift) | (i >> (32-shift));
+}
+
+static uint32_t rlwnm(uint32_t source, uint8_t shift, uint8_t mb, uint8_t me)
+{
+	uint8_t masksize = 1 + me - mb; /* me and mb are inclusive */
+	uint32_t mask = (((uint64_t)1<<masksize)-1) << (31 - me);
+	return rotate(source, shift) & mask;
+}
+
+static uint32_t rlwmi(uint32_t source, uint8_t shift, uint8_t mb, uint8_t me)
+{
+	fatal("rlwmi not supported");
+}
+
+static void mtcrf(uint8_t fxm, uint32_t value)
+{
+	fatal("mtcrf not supported");
+}
+
+static void dispatch(uint32_t value)
+{
+	#include "dispatcher.h"
+	fatal("unimplemented instruction 0x%0x (major opcode %d)", value, value>>26);
+}
+
+void dump_state(FILE* stream)
+{
+	int i;
+
+	fprintf(stream, "\n");
+	fprintf(stream, "pc=0x%08x lr=0x%08x ctr=0x%08x xer=0x%08x cr=0x%08x",
+		cpu.cia, cpu.lr, cpu.ctr, cpu.xer, cpu.cr);
+	for (i=0; i<32; i++)
+	{
+		if ((i % 4) == 0)
+			fprintf(stream, "\n");
+		fprintf(stream, "gpr%02d=0x%08x ", i, cpu.gpr[i]);
+	}
+	fprintf(stderr, "\n");
+
+	/* This might fail and cause a reentrant trap if cia is invalid, so
+	 * do it last. */
+	fprintf(stream, "insn=0x%08x\n", read_long(cpu.cia));
+}
+
+void single_step(void)
+{
+	uint32_t value = read_long(cpu.cia);
+	cpu.nia = cpu.cia + 4;
+	dispatch(value);
+	cpu.cia = cpu.nia;
+}
+
--- a/plat/linuxppc/emu/emu.h
+++ b/plat/linuxppc/emu/emu.h
@ -0,0 +1,37 @@
+#ifndef EMU_H
+#define EMU_H
+
+extern void fatal(char* fmt, ...);
+
+typedef struct
+{
+	uint32_t gpr[32];
+	uint64_t fpr[32];
+	uint32_t cr;
+	uint32_t ctr;
+	uint32_t lr;
+	uint32_t xer;
+	uint32_t fpscr;
+	uint32_t cia;
+	uint32_t nia;
+}
+cpu_t;
+
+extern cpu_t cpu;
+
+extern uint32_t read_byte(uint32_t address);
+extern uint32_t read_word(uint32_t address);
+extern uint32_t read_long(uint32_t address);
+extern uint64_t read_double(uint32_t address);
+extern void write_byte(uint32_t address, uint32_t value);
+extern void write_word(uint32_t address, uint32_t value);
+extern void write_long(uint32_t address, uint32_t value);
+extern void write_double(uint32_t address, uint64_t value);
+
+extern void system_call(uint8_t trapno);
+
+extern void dump_state(FILE* stream);
+extern void single_step(void);
+
+#endif
+
--- a/plat/linuxppc/emu/instructions.dat
+++ b/plat/linuxppc/emu/instructions.dat
@ -0,0 +1,211 @@
+# Syntax:
+# <thing---->: a field occupying so many bits.
+# T: a field occupying one bit.
+# .: a bit we don't care about.
+
+# Branch processor instructions.
+
+<18--><LI-------------------->AL branch(0x1f, 0x00, ext26(LI<<2),   A, L);
+<16--><BO-><BI-><BD---------->AL branch(BO,   BI,   ext16(BD<<2),   A, L);
+<19--><BO-><BI-><BH-><16------>L branch(BO,   BI,   cpu.lr,         1, L);
+<19--><BO-><BI-><BH-><528----->L branch(BO,   BI,   cpu.ctr,        1, L);
+<17-->..............<LEV-->...1. system_call(LEV);
+
+# Condition register instructions.
+
+<19--><BT-><BA-><BB-><257----->. setcr(BT, getcr(BA) & getcr(BB));
+<19--><BT-><BA-><BB-><449----->. setcr(BT, getcr(BA) | getcr(BB));
+<19--><BT-><BA-><BB-><193----->. setcr(BT, getcr(BA) ^ getcr(BB));
+<19--><BT-><BA-><BB-><225----->. setcr(BT, !(getcr(BA) & getcr(BB)));
+<19--><BT-><BA-><BB-><33------>. setcr(BT, !(getcr(BA) | getcr(BB)));
+<19--><BT-><BA-><BB-><289----->. setcr(BT, getcr(BA) == getcr(BB));
+<19--><BT-><BA-><BB-><129----->. setcr(BT, getcr(BA) & !getcr(BB));
+<19--><BT-><BA-><BB-><417----->. setcr(BT, getcr(BA) | !getcr(BB));
+<19--><BF>.<BA>......<0------->. mcrf(BF, BA);
+
+# Fixed point loads
+
+<34--><RT-><RA-><D-------------> cpu.gpr[RT] = read_byte(reg0(RA) + ext16(D));
+<31--><RT-><RA-><RB-><87------>. cpu.gpr[RT] = read_byte(reg0(RA) + reg(RB));
+<35--><RT-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.gpr[RT] = read_byte(ea); cpu.gpr[RA] = ea;
+<31--><RT-><RA-><RB-><119----->. uint32_t ea = reg(RA) + reg(RB); cpu.gpr[RT] = read_byte(ea); cpu.gpr[RA] = ea;
+<40--><RT-><RA-><D-------------> cpu.gpr[RT] = read_word(reg0(RA) + ext16(D));
+<31--><RT-><RA-><RB-><279----->. cpu.gpr[RT] = read_word(reg0(RA) + reg(RB));
+<41--><RT-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.gpr[RT] = read_word(ea); cpu.gpr[RA] = ea;
+<31--><RT-><RA-><RB-><311----->. uint32_t ea = reg(RA) + reg(RB); cpu.gpr[RT] = read_word(ea); cpu.gpr[RA] = ea;
+<42--><RT-><RA-><D-------------> cpu.gpr[RT] = ext16(read_word(reg0(RA) + ext16(D)));
+<31--><RT-><RA-><RB-><343----->. cpu.gpr[RT] = ext16(read_word(reg0(RA) + reg(RB)));
+<43--><RT-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.gpr[RT] = ext16(read_word(ea)); cpu.gpr[RA] = ea;
+<31--><RT-><RA-><RB-><375----->. uint32_t ea = reg(RA) + reg(RB); cpu.gpr[RT] = ext16(read_word(ea)); cpu.gpr[RA] = ea;
+<32--><RT-><RA-><D-------------> cpu.gpr[RT] = read_long(reg0(RA) + ext16(D));
+<31--><RT-><RA-><RB-><23------>. cpu.gpr[RT] = read_long(reg0(RA) + reg(RB));
+<33--><RT-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.gpr[RT] = read_long(ea); cpu.gpr[RA] = ea;
+<31--><RT-><RA-><RB-><55------>. uint32_t ea = reg(RA) + reg(RB); cpu.gpr[RT] = read_long(ea); cpu.gpr[RA] = ea;
+<58--><RT-><RA-><DS---------->10 cpu.gpr[RT] = read_long(reg0(RA) + ext16(DS<<2));
+<31--><RT-><RA-><RB-><341----->. cpu.gpr[RT] = read_long(reg0(RA) + reg(RB));
+<31--><RT-><RA-><RB-><373----->. uint32_t ea = reg(RA) + reg(RB); cpu.gpr[RT] = read_long(ea); cpu.gpr[RA] = ea;
+
+# Fixed point stores
+
+<38--><RS-><RA-><D-------------> write_byte(reg0(RA) + ext16(D), reg(RS));
+<31--><RS-><RA-><RB-><215----->. write_byte(reg0(RA) + reg(RB), reg(RS));
+<39--><RS-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_byte(ea, reg(RS)); cpu.gpr[RA] = ea;
+<31--><RS-><RA-><RB-><247----->. uint32_t ea = reg(RA) + reg(RB); write_byte(ea, reg(RS)); cpu.gpr[RA] = ea;
+<44--><RS-><RA-><D-------------> write_word(reg0(RA) + ext16(D), reg(RS));
+<31--><RS-><RA-><RB-><407----->. write_word(reg0(RA) + reg(RB), reg(RS));
+<45--><RS-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_word(ea, reg(RS)); cpu.gpr[RA] = ea;
+<31--><RS-><RA-><RB-><439----->. uint32_t ea = reg(RA) + reg(RB); write_word(ea, reg(RS)); cpu.gpr[RA] = ea;
+<36--><RS-><RA-><D-------------> write_long(reg0(RA) + ext16(D), reg(RS));
+<31--><RS-><RA-><RB-><151----->. write_long(reg0(RA) + reg(RB), reg(RS));
+<37--><RS-><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_long(ea, reg(RS)); cpu.gpr[RA] = ea;
+<31--><RS-><RA-><RB-><183----->. uint32_t ea = reg(RA) + reg(RB); write_long(ea, reg(RS)); cpu.gpr[RA] = ea;
+
+# Fixed point load/stores with byte reversal
+
+<31--><RT-><RA-><RB-><790----->. cpu.gpr[RT] = swb16(read_word(reg0(RA) + reg(RB)));
+<31--><RT-><RA-><RB-><534----->. cpu.gpr[RT] = swb32(read_long(reg0(RA) + reg(RB)));
+<31--><RS-><RA-><RB-><918----->. write_word(reg0(RA) + reg(RB), swb16(reg(RS)));
+<31--><RS-><RA-><RB-><662----->. write_long(reg0(RA) + reg(RB), swb32(reg(RS)));
+
+# Load/store multiple
+
+<46--><RT-><RA-><D-------------> read_multiple(reg0(RA) + ext16(D), RT);
+<47--><RS-><RA-><D-------------> write_multiple(reg0(RA) + ext16(D), RS);
+<31--><RT-><RA-><NB-><597----->. read_string(reg0(RA), RT, NB ? NB : 32);
+<31--><RT-><RA-><RB-><533----->. read_string(reg0(RA) + reg(RB), RT, cpu.xer & 0x1f);
+<31--><RS-><RA-><NB-><725----->. write_string(reg0(RA), RS, NB ? NB : 32);
+<31--><RS-><RA-><RB-><661----->. write_string(reg0(RA) + reg(RB), RS, cpu.xer & 0x1f);
+
+# ALU instructions
+
+<14--><RT-><RA-><SI------------>           cpu.gpr[RT] = reg0(RA) + ext16(SI);
+<15--><RT-><RA-><SI------------>           cpu.gpr[RT] = reg0(RA) + (SI<<16);
+<31--><RT-><RA-><RB->O<266---->R setcr0(R, cpu.gpr[RT] = addo(reg(RA), reg(RB), 0, O, 0));
+<31--><RT-><RA-><RB->O<40----->R setcr0(R, cpu.gpr[RT] = addo(~reg(RA), reg(RB), 1, O, 0));
+<6-->R<RT-><RA-><SI------------>           cpu.gpr[RT] = addo(reg(RA), ext16(SI), 0, 0, 1);
+<8---><RT-><RA-><SI------------>           cpu.gpr[RT] = addo(~reg(RA), ext16(SI), 1, 0, 1);
+<31--><RT-><RA-><RB->O<10----->R setcr0(R, cpu.gpr[RT] = addo(reg(RA), reg(RB), 0, O, 1));
+<31--><RT-><RA-><RB->O<8------>R setcr0(R, cpu.gpr[RT] = addo(~reg(RA), reg(RB), 1, O, 1));
+<31--><RT-><RA-><RB->O<138---->R setcr0(R, cpu.gpr[RT] = addo(reg(RA), reg(RB), carry(), O, 1));
+<31--><RT-><RA-><RB->O<136---->R setcr0(R, cpu.gpr[RT] = addo(~reg(RA), reg(RB), carry(), O, 1));
+<31--><RT-><RA->.....O<234---->R setcr0(R, cpu.gpr[RT] = addo(reg(RA), -1, carry(), O, 1));
+<31--><RT-><RA->.....O<232---->R setcr0(R, cpu.gpr[RT] = addo(~reg(RA), -1, carry(), O, 1));
+<31--><RT-><RA->.....O<202---->R setcr0(R, cpu.gpr[RT] = addo(reg(RA), 0, carry(), O, 1));
+<31--><RT-><RA->.....O<200---->R setcr0(R, cpu.gpr[RT] = addo(~reg(RA), 0, carry(), O, 1));
+<31--><RT-><RA->.....O<104---->R setcr0(R, cpu.gpr[RT] = addo(~reg(RA), 0, 1, O, 0));
+<7---><RT-><RA-><SI------------>           cpu.gpr[RT] = reg(RA) * ext16(SI);
+<31--><RT-><RA-><RB->O<235---->R setcr0(R, cpu.gpr[RT] = mulo(reg(RA), reg(RB), O));
+<31--><RT-><RA-><RB->O<491---->R setcr0(R, cpu.gpr[RT] = divo(reg(RA), reg(RB), O));
+<31--><RT-><RA-><RB->O<459---->R setcr0(R, cpu.gpr[RT] = divuo(reg(RA), reg(RB), O));
+
+# Comparison instructions
+
+<11--><F>.0<RA-><SI------------> compares(reg(RA), ext16(SI), F);
+<31--><F>.0<RA-><RB-><0------->. compares(reg(RA), reg(RB), F);
+<10--><F>.0<RA-><UI------------> compareu(reg(RA), UI, F);
+<31--><F>.0<RA-><RB-><32------>. compareu(reg(RA), reg(RB), F);
+
+# Logical instructions
+
+<28--><RS-><RA-><UI------------> setcr0(1, cpu.gpr[RA] = reg(RS) & UI);
+<29--><RS-><RA-><UI------------> setcr0(1, cpu.gpr[RA] = reg(RS) & (UI<<16));
+<24--><RS-><RA-><UI------------>           cpu.gpr[RA] = reg(RS) | UI;
+<25--><RS-><RA-><UI------------>           cpu.gpr[RA] = reg(RS) | (UI<<16);
+<26--><RS-><RA-><UI------------>           cpu.gpr[RA] = reg(RS) ^ UI;
+<27--><RS-><RA-><UI------------>           cpu.gpr[RA] = reg(RS) ^ (UI<<16);
+<31--><RS-><RA-><RB-><28------>R setcr0(R, cpu.gpr[RA] = reg(RS) & reg(RB));
+<31--><RS-><RA-><RB-><444----->R setcr0(R, cpu.gpr[RA] = reg(RS) | reg(RB));
+<31--><RS-><RA-><RB-><316----->R setcr0(R, cpu.gpr[RA] = reg(RS) ^ reg(RB));
+<31--><RS-><RA-><RB-><476----->R setcr0(R, cpu.gpr[RA] = ~(reg(RS) & reg(RB)));
+<31--><RS-><RA-><RB-><124----->R setcr0(R, cpu.gpr[RA] = ~(reg(RS) | reg(RB)));
+<31--><RS-><RA-><RB-><284----->R setcr0(R, cpu.gpr[RA] = ~(reg(RS) ^ reg(RB)));
+<31--><RS-><RA-><RB-><60------>R setcr0(R, cpu.gpr[RA] = reg(RS) & ~reg(RB));
+<31--><RS-><RA-><RB-><412----->R setcr0(R, cpu.gpr[RA] = reg(RS) | ~reg(RB));
+<31--><RS-><RA->.....<954----->R setcr0(R, cpu.gpr[RA] = ext8(reg(RS)));
+<31--><RS-><RA->.....<922----->R setcr0(R, cpu.gpr[RA] = ext16(reg(RS)));
+<31--><RS-><RA->.....<26------>R setcr0(R, cpu.gpr[RA] = cntlzw(reg(RS)));
+<31--><RS-><RA->.....<122----->R setcr0(R, cpu.gpr[RA] = popcntb(reg(RS)));
+
+# Rotation/shift instructions
+
+<21--><RS-><RA-><SH-><MB-><ME->R setcr0(R, cpu.gpr[RA] = rlwnm(reg(RS), SH, MB, ME));
+<23--><RS-><RA-><RB-><MB-><ME->R setcr0(R, cpu.gpr[RA] = rlwnm(reg(RS), reg(RB), MB, ME));
+<20--><RS-><RA-><SH-><MB-><ME->R setcr0(R, cpu.gpr[RA] = rlwmi(reg(RS), SH, MB, ME));
+<31--><RS-><RA-><RB-><24------>R setcr0(R, cpu.gpr[RA] = reg(RS) << (reg(RB) & 0x1f));
+<31--><RS-><RA-><RB-><536----->R setcr0(R, cpu.gpr[RA] = reg(RS) >> (reg(RB) & 0x1f));
+<31--><RS-><RA-><SH-><824----->R setcr0(R, cpu.gpr[RA] = ((int32_t)reg(RS)) >> SH);
+<31--><RS-><RA-><RB-><792----->R setcr0(R, cpu.gpr[RA] = ((int32_t)reg(RS)) >> (reg(RB) & 0x1f));
+
+# Move to/from special registers
+
+<31--><RS-><1-->00000<467----->. cpu.xer = reg(RS);
+<31--><RS-><8-->00000<467----->. cpu.lr = reg(RS);
+<31--><RS-><9-->00000<467----->. cpu.ctr = reg(RS);
+<31--><RT-><1-->00000<339----->. cpu.gpr[RT] = cpu.xer;
+<31--><RT-><8-->00000<339----->. cpu.gpr[RT] = cpu.lr;
+<31--><RT-><9-->00000<339----->. cpu.gpr[RT] = cpu.ctr;
+<31--><RS->0<FXM--->.<144----->. mtcrf(FXM, reg(RS));
+<31--><RT->0.........<19------>. cpu.gpr[RT] = cpu.cr;
+
+# Floating pointer operations follow.
+#
+# These are extremely crude, and just enough has been implemented to make the
+# tests pass. The FPSCR bits are all ignored completely.
+
+# FPSCR manipulation
+
+<63--><FRT>..........<583----->R fatal("mffs not supported");
+<63--><F>..<B>.......<64------>. fatal("mcrfs not supported");
+<63--><F>.......<U->.<134----->R fatal("mtsfsfi not supported");
+<63-->.<FLM--->.<FRB><711----->R fatal("mtfsf not supported");
+<63--><BT->..........<70------>R fatal("mtfsb0 not supported");
+<63--><BT->..........<38------>R fatal("mtfsb1 not supported");
+
+# Floating point double loads (raw bits)
+
+<50--><FRT><RA-><D-------------> cpu.fpr[FRT] = read_double(reg0(RA) + ext16(D));
+<31--><FRT><RA-><RB-><599----->. cpu.fpr[FRT] = read_double(reg0(RA) + reg(RB));
+<51--><FRT><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.fpr[FRT] = read_double(ea); cpu.gpr[RA] = ea;
+<31--><FRT><RA-><RB-><631----->. uint32_t ea = reg(RA) + reg(RB); cpu.fpr[FRT] = read_double(ea); cpu.gpr[RA] = ea;
+
+# Floating point double stores (raw bits)
+
+<54--><FRS><RA-><D-------------> write_double(reg0(RA) + ext16(D), cpu.fpr[FRS]);
+<31--><FRS><RA-><RB-><727----->. write_double(reg0(RA) + reg(RB), cpu.fpr[FRS]);
+<55--><FRS><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_double(ea, cpu.fpr[FRS]); cpu.gpr[RA] = ea;
+<31--><FRS><RA-><RB-><759----->. uint32_t ea = reg(RA) + reg(RB); write_double(ea, cpu.fpr[FRS]); cpu.gpr[RA] = ea;
+
+# Floating point single loads (convert from single to double)
+
+<48--><FRT><RA-><D-------------> cpu.fpr[FRT] = d2b(b2f(read_long(reg0(RA) + ext16(D))));
+<31--><FRT><RA-><RB-><535----->. cpu.fpr[FRT] = d2b(b2f(read_long(reg0(RA) + reg(RB))));
+<49--><FRT><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.fpr[FRT] = d2b(b2f(read_long(ea))); cpu.gpr[RA] = ea;
+<31--><FRT><RA-><RB-><567----->. uint32_t ea = reg(RA) + reg(RB); cpu.fpr[FRT] = d2b(b2f(read_long(ea))); cpu.gpr[RA] = ea;
+
+# Floating point single stores (convert from double to single)
+
+<52--><FRS><RA-><D-------------> write_long(reg0(RA) + ext16(D), f2b(fpr(FRS)));
+<31--><FRS><RA-><RB-><663----->. write_long(reg0(RA) + reg(RB), f2b(fpr(FRS)));
+<53--><FRS><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_long(ea, f2b(fpr(FRS))); cpu.gpr[RA] = ea;
+<31--><FRS><RA-><RB-><695----->. uint32_t ea = reg(RA) + reg(RB); write_long(ea, f2b(fpr(FRS))); cpu.gpr[RA] = ea;
+
+# Floating point arithmetic
+
+<63--><FRT>.....<FRB><72------>R setcr1(R, cpu.fpr[FRT] = cpu.fpr[FRB]);
+<63--><FRT>.....<FRB><40------>R setcr1(R, cpu.fpr[FRT] = d2b(-fpr(FRB)));
+<63--><FRT>.....<FRB><264----->R setcr1(R, cpu.fpr[FRT] = d2b(fabs(fpr(FRB))));
+<63--><FRT>.....<FRB><136----->R setcr1(R, cpu.fpr[FRT] = d2b(-fabs(fpr(FRB))));
+<63--><FRT><FRA><FRB><21------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) + fpr(FRB)));
+<63--><FRT><FRA><FRB><20------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) - fpr(FRB)));
+<63--><FRT><FRA><FRB><25------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) * fpr(FRB)));
+<63--><FRT><FRA><FRB><18------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) / fpr(FRB)));
+
+# Floating point comparisons
+
+<63--><F>..<FRA><FRB><0------->. comparef(fpr(FRA), fpr(FRB), F);
+<63--><F>..<FRA><FRB><32------>. comparef(fpr(FRA), fpr(FRB), F);
+
+# Floating point conversions
+
+<63--><FRT>.....<FRB><14------>R setcr1(R, fpr(FRB)); cpu.fpr[FRT] = (uint32_t)fpr(FRB);
+<63--><FRT>.....<FRB><15------>R setcr1(R, fpr(FRB)); cpu.fpr[FRT] = (uint32_t)fpr(FRB);
--- a/plat/linuxppc/emu/main.c
+++ b/plat/linuxppc/emu/main.c
@ -0,0 +1,258 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+#include "emu.h"
+
+#define RAM_BASE 0x10000000
+#define RAM_TOP  0x10100000
+
+#define BRK_TOP (RAM_TOP - 0x1000)
+
+#define INIT_SP RAM_TOP
+#define INIT_PC 0x08000054
+
+#define EXIT_PC 0xdeaddead
+
+/* Read/write macros */
+#define READ_BYTE(BASE, ADDR) (BASE)[ADDR]
+#define READ_WORD(BASE, ADDR) (((BASE)[ADDR]<<8) |			\
+							  (BASE)[(ADDR)+1])
+#define READ_LONG(BASE, ADDR) (((BASE)[ADDR]<<24) |			\
+							  ((BASE)[(ADDR)+1]<<16) |		\
+							  ((BASE)[(ADDR)+2]<<8) |		\
+							  (BASE)[(ADDR)+3])
+
+#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[ADDR] = (VAL)&0xff
+#define WRITE_WORD(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>8) & 0xff;		\
+									(BASE)[(ADDR)+1] = (VAL)&0xff
+#define WRITE_LONG(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>24) & 0xff;		\
+									(BASE)[(ADDR)+1] = ((VAL)>>16)&0xff;	\
+									(BASE)[(ADDR)+2] = ((VAL)>>8)&0xff;		\
+									(BASE)[(ADDR)+3] = (VAL)&0xff
+
+
+static void emulated_syscall(void);
+
+static unsigned char ram[RAM_TOP - RAM_BASE];
+uint32_t brkbase = RAM_BASE;
+uint32_t brkpos = RAM_BASE;
+uint32_t entrypoint = RAM_BASE;
+
+void fatal(char* fmt, ...)
+{
+	static bool guard = false;
+
+	va_list ap;
+
+	va_start(ap, fmt);
+	fprintf(stderr, "fatal: ");
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, "\n");
+	va_end(ap);
+
+	if (!guard)
+	{
+		guard = true;
+		dump_state(stderr);
+	}
+
+	exit(EXIT_FAILURE);
+}
+
+static uint32_t transform_address(uint32_t address)
+{
+	uint32_t a = address - RAM_BASE;
+	if (a >= (RAM_TOP-RAM_BASE))
+		fatal("address 0x%x out of bounds", address);
+	return a;
+}
+
+uint64_t read_double(uint32_t address)
+{
+	return ((uint64_t)read_long(address+0) << 32) | read_long(address+4);
+}
+
+uint32_t read_long(uint32_t address)
+{
+	uint32_t v = READ_LONG(ram, transform_address(address));
+	#if 0
+	fprintf(stderr, "read 0x%08x: 0x%08x\n", address, v);
+	#endif
+	return v;
+}
+
+uint32_t read_word(uint32_t address)
+{
+	return READ_WORD(ram, transform_address(address));
+}
+
+uint32_t read_byte(uint32_t address)
+{
+	return READ_BYTE(ram, transform_address(address));
+}
+
+void write_byte(uint32_t address, uint32_t value)
+{
+	WRITE_BYTE(ram, transform_address(address), value);
+}
+
+void write_word(uint32_t address, uint32_t value)
+{
+	WRITE_WORD(ram, transform_address(address), value);
+}
+
+void write_long(uint32_t address, uint32_t value)
+{
+	#if 0
+	fprintf(stderr, "write 0x%08x: 0x%08x\n", address, value);
+	#endif
+	WRITE_LONG(ram, transform_address(address), value);
+}
+
+void write_double(uint32_t address, uint64_t value)
+{
+	write_long(address+0, value>>32);
+	write_long(address+4, value);
+}
+
+void system_call(uint8_t trapno)
+{
+	cpu.cr &= ~(1<<28); /* reset summary overflow (for success) */
+	switch (cpu.gpr[0])
+	{
+		case 1: /* exit */
+			exit(cpu.gpr[3]);
+
+		case 4: /* write */
+		{
+			int fd = cpu.gpr[3];
+			uint32_t address = cpu.gpr[4];
+			uint32_t len = cpu.gpr[5];
+			void* ptr = ram + transform_address(address);
+			transform_address(address+len); /* bounds check */
+			cpu.gpr[3] = write(fd, ptr, len);
+			if (cpu.gpr[3] == -1)
+				goto error;
+			break;
+		}
+
+		case 45: /* brk */
+		{
+			uint32_t newpos = cpu.gpr[3];
+			if (newpos == 0)
+				cpu.gpr[3] = brkpos;
+			else if ((newpos < brkbase) || (newpos >= BRK_TOP))
+				cpu.gpr[3] = -ENOMEM;
+			else
+			{
+				brkpos = newpos;
+				cpu.gpr[3] = 0;
+			}
+			break;
+		}
+
+		case 20: /* getpid */
+		case 48: /* signal */
+		case 54: /* ioctl */
+		case 67: /* sigaction */
+		case 78: /* gettimeofday */
+		case 126: /* sigprocmask */
+			cpu.gpr[3] = 0;
+			break;
+
+		error:
+			cpu.gpr[3] = errno;
+			cpu.cr |= (1<<28); /* set summary overflow (for failure) */
+			return;
+
+		default:
+			fatal("unimplemented system call %d", cpu.gpr[0]);
+	}
+}
+
+static void load_program(FILE* fd)
+{
+	fseek(fd, 0, SEEK_SET);
+	if (fread(ram, 1, 0x34, fd) != 0x34)
+		fatal("couldn't read ELF header");
+	
+	uint32_t phoff = READ_LONG(ram, 0x1c);
+	uint16_t phentsize = READ_WORD(ram, 0x2a);
+	uint16_t phnum = READ_WORD(ram, 0x2c);
+	entrypoint = READ_LONG(ram, 0x18);
+	if ((phentsize != 0x20) || (phnum != 1))
+		fatal("unsupported ELF file");
+
+	fseek(fd, phoff, SEEK_SET);
+	if (fread(ram, 1, phentsize, fd) != phentsize)
+		fatal("couldn't read program header");
+
+	uint32_t offset = READ_LONG(ram, 0x04);
+	uint32_t vaddr = READ_LONG(ram, 0x08);
+	uint32_t filesz = READ_LONG(ram, 0x10);
+	uint32_t memsz = READ_LONG(ram, 0x14);
+	brkbase = brkpos = vaddr + memsz;
+
+	uint32_t vaddroffset = transform_address(vaddr);
+	transform_address(vaddr + memsz); /* bounds check */
+	memset(ram+vaddroffset, 0, memsz);
+	fseek(fd, offset, SEEK_SET);
+	if (fread(ram+vaddroffset, 1, filesz, fd) != filesz)
+		fatal("couldn't read program data");
+}
+
+/* The main loop */
+int main(int argc, char* argv[])
+{
+	if(argc != 2)
+		fatal("syntax: emuppc <program file>");
+
+	FILE* fd = fopen(argv[1], "rb");
+	if (!fd)
+		fatal("Unable to open %s", argv[1]);
+	load_program(fd);
+	fclose(fd);
+
+	/* On entry, the Linux stack looks like this.
+	 * 
+	 * sp+..           NULL
+     * sp+8+(4*argc)   env (X quads)
+     * sp+4+(4*argc)   NULL
+     * sp+4            argv (argc quads)
+     * sp              argc
+	 *
+	 * We'll set it up with a bodgy stack frame with argc=0 just to keep the
+	 * startup code happy.
+	 */
+
+	{
+		uint32_t sp = INIT_SP;
+		write_long(sp -= 4, 0);
+		uint32_t envp = sp;
+		write_long(sp -= 4, envp);
+		write_long(sp -= 4, 0);
+		unsigned long argv = sp;
+		write_long(sp -= 4, argv);
+		write_long(sp -= 4, 0);
+		cpu.gpr[1] = sp;
+		cpu.cia = entrypoint;
+	}
+
+	cpu.lr = EXIT_PC;
+	while (cpu.cia != EXIT_PC)
+	{
+		#if 0
+		dump_state(stderr);
+		#endif
+		single_step();
+	}
+
+	return 0;
+}
+
--- a/plat/linuxppc/emu/mkdispatcher.lua
+++ b/plat/linuxppc/emu/mkdispatcher.lua
@ -0,0 +1,77 @@
+local function decode(line)
+	local _, _, bits = line:find("^([^ ]+) ")
+	if #bits ~= 32 then
+		error("'"..bits.."' isn't 32 bits long")
+	end
+
+	local fields = {}
+	local i = 1
+	while i ~= 33 do
+		local c = line:sub(i, i)
+		if c ~= "." then
+			local f = { pos=i }
+			if c:find("%w") then
+				f.size = 1
+				f.value = c
+			elseif c == "<" then
+				local _, newi, name = line:find("^<%-*(%w+)%-*>", i)
+				f.size = 1 + newi - i
+				f.value = name
+				i = newi
+			else
+				error("bad field char '"..c.."' in '"..line.."'")
+			end
+			if f.value:find("[0-9]+") then
+				f.literal = true
+				f.variable = false
+			else
+				f.literal = false
+				f.variable = true
+			end
+			-- Convert from PowerPC numbering to sane numbering
+			f.pos = 33-(f.pos + f.size)
+			fields[#fields+1] = f
+		end
+		i = i + 1
+	end
+	return fields
+end
+
+local function emit(fields, code)
+	local mask = 0
+	local value = 0
+	for _, f in ipairs(fields) do
+		if f.literal then
+			local s = math.pow(2, f.pos)
+			local m = math.pow(2, f.size) - 1
+			mask = mask + m*s
+			value = value + f.value*s
+		end
+	end
+
+	print(string.format("if ((value & 0x%x) == 0x%x) {", mask, value))
+	for _, f in ipairs(fields) do
+		if f.variable then
+			local m = math.pow(2, f.size) - 1
+			print(string.format("uint32_t %s = (value >> %d) & 0x%x;", f.value, f.pos, m))
+		end
+	end
+
+	print(code)
+	print("return;")
+	print("}")
+end
+
+while true do
+	local line = io.stdin:read("*l")
+	if not line then
+		break
+	end
+	line = line:gsub("#.*$", "")
+	line = line:gsub(" *$", "")
+	if line ~= "" then
+		local fields = decode(line)
+		emit(fields, line:sub(34, #line))
+	end
+end
+
--- a/plat/linuxppc/tests/build.lua
+++ b/plat/linuxppc/tests/build.lua
@ -3,5 +3,5 @@ include("tests/plat/build.lua")
 plat_testsuite {
    name = "tests",
    plat = "linuxppc",
-    method = "qemu-ppc"
+    method = "plat/linuxppc/emu+emuppc"
 }