From ab660a44e9a219216d828efc4e93a0b7e6b32699 Mon Sep 17 00:00:00 2001
From: David Given <dg@cowlark.com>
Date: Sun, 17 Jun 2018 09:24:01 +0200
Subject: [PATCH] Lots of floating point, bugfixes, and system calls. Most of
 the tests pass now.

---
 plat/linuxppc/emu/README.md        | 28 +++++++++++++
 plat/linuxppc/emu/emu.c            | 67 ++++++++++++++++++++++++++++--
 plat/linuxppc/emu/instructions.dat | 62 +++++++++++++++++++++++----
 plat/linuxppc/emu/main.c           |  5 +++
 4 files changed, 149 insertions(+), 13 deletions(-)
 create mode 100644 plat/linuxppc/emu/README.md

diff --git a/plat/linuxppc/emu/README.md b/plat/linuxppc/emu/README.md
new file mode 100644
index 000000000..a37a2b44d
--- /dev/null
+++ b/plat/linuxppc/emu/README.md
@@ -0,0 +1,28 @@
+This is just a naive domestic PowerPC simulator, but I think you'll be amused
+by its presumption.
+
+The simulator implements just enough of the instruction set to make the tests
+pass. Certain features aren't supported at all (and an effort has been made
+to detect this and error out). The FPU is crudely approximated using the
+native floating-point support, doesn't support reading and writing FPSCR, and
+will almost certainly produce incorrect results. Plus, there are bugs. It's
+also likely to be very, very slow.
+
+However, it should be easily extensible and the emulator core is only about
+500 lines of code.
+
+Instructions are defined in `instructions.dat`; `mkdispatcher.lua` reads
+these in and generates the instruction decoder. `emu.c` contains the main
+emulator core. `main.c` contains the application front end and the incredibly
+crude syscall interface.
+
+TODO:
+
+  - overflow bit support (instructions that try to set OV error out)
+  - mtcrf
+  - read string / write string
+  - factor out the ELF loader, and linux68k/emu uses it too
+  - floating point condition bits
+  - bit-for-bit FPU emulation, although this looks like a huge amount of work
+
+It was written from scratch for the ACK by me, David Given.
diff --git a/plat/linuxppc/emu/emu.c b/plat/linuxppc/emu/emu.c
index 75800807e..3653d8c2e 100644
--- a/plat/linuxppc/emu/emu.c
+++ b/plat/linuxppc/emu/emu.c
@@ -4,6 +4,7 @@
 #include <stdbool.h>
 #include <strings.h>
 #include <byteswap.h>
+#include <math.h>
 #include "emu.h"
 
 #define BO4 (1<<0)
@@ -12,6 +13,10 @@
 #define BO1 (1<<3)
 #define BO0 (1<<4)
 
+#define XER_SO (1<<31)
+#define XER_OV (1<<30)
+#define XER_CA (1<<29)
+
 cpu_t cpu;
 
 static inline bool carry(void)
@@ -22,6 +27,16 @@ static inline bool carry(void)
 #define swb16(x) bswap_16(x)
 #define swb32(x) bswap_32(x)
 
+/* Returns the state of a carry flag after a three-way add. */
+static inline bool carry_3(uint32_t a, uint32_t b, uint32_t c)
+{
+	if ((a+b) < a)
+		return true;
+	if ((a+b+c) < c)
+		return true;
+	return false;
+}
+
 static inline uint32_t reg(uint8_t n)
 {
 	return cpu.gpr[n];
@@ -34,14 +49,33 @@ static inline uint32_t reg0(uint8_t n)
 	return cpu.gpr[n];
 }
 
-static inline uint64_t tobytes(double n)
+/* Double to bytes */
+static inline uint64_t d2b(double n)
 {
 	return *(uint64_t*)&n;
 }
 
+/* Float to bytes */
+static inline uint32_t f2b(float n)
+{
+	return *(uint32_t*)&n;
+}
+
+/* Bytes to double */
+static inline double b2d(uint64_t n)
+{
+	return *(double*)&n;
+}
+
+/* Bytes to float */
+static inline float b2f(uint32_t n)
+{
+	return *(float*)&n;
+}
+
 static inline double fpr(uint8_t n)
 {
-	return *(double*)&cpu.fpr[n];
+	return b2d(cpu.fpr[n]);
 }
 
 static inline uint32_t ext8(int8_t n)
@@ -149,9 +183,16 @@ static void write_string(uint32_t address, uint8_t reg, uint8_t bytes)
 
 static uint32_t addo(uint32_t a, uint32_t b, uint32_t c, bool set_o, bool set_c)
 {
-	if (set_o || set_c)
-		fatal("can't use O or C bits in add yet");
+	if (set_o)
+		fatal("can't use O bit in add yet");
 	
+	if (set_c)
+	{
+		cpu.xer = cpu.xer & ~XER_CA;
+		if (carry_3(a, b, c))
+			cpu.xer = cpu.xer | XER_CA;
+	}
+
 	return a + b + c;
 }
 
@@ -201,6 +242,24 @@ static void compareu(uint32_t a, uint32_t b, uint8_t field)
 	setcr(bit+3, cpu.xer & (1<<31));
 }
 
+static void comparef(double a, double b, uint8_t field)
+{
+	uint8_t c;
+	if (isnan(a) || isnan(b))
+		c = 0x1;
+	else if (a < b)
+		c = 0x8;
+	else if (a > b)
+		c = 0x4;
+	else
+		c = 0x2;
+	
+	uint8_t bit = 28 - field*4; /* note PowerPC bit numbering */
+	cpu.cr = cpu.cr & ~(0xf<<bit) | (c<<bit);
+
+	/* TODO: ordered/unordered, FSPCR, etc. */
+}
+
 static uint32_t cntlzw(uint32_t source)
 {
 	return 32 - ffs(source);
diff --git a/plat/linuxppc/emu/instructions.dat b/plat/linuxppc/emu/instructions.dat
index 09b2f3ec0..862e7b091 100644
--- a/plat/linuxppc/emu/instructions.dat
+++ b/plat/linuxppc/emu/instructions.dat
@@ -147,21 +147,65 @@
 <31--><RS->0<FXM--->.<144----->. mtcrf(FXM, reg(RS));
 <31--><RT->0.........<19------>. cpu.gpr[RT] = cpu.cr;
 
-# Floating point loads
+# Floating pointer operations follow.
+#
+# These are extremely crude, and just enough has been implemented to make the
+# tests pass. The FPSCR bits are all ignored completely.
 
-<50--><FRT><RA-><D------------>. cpu.fpr[FRT] = read_double(reg0(RA) + ext16(D));
-<31--><FRT><RA-><RB-><599----->. cpu.gpr[FRT] = read_double(reg0(RA) + reg(RB));
-<51--><FRT><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.fpr[FRT] = read_byte(ea); cpu.gpr[RA] = ea;
-<31--><FRT><RA-><RB-><631----->. uint32_t ea = reg(RA) + reg(RB); cpu.fpr[FRT] = read_long(ea); cpu.gpr[RA] = ea;
+# FPSCR manipulation
 
-# Floating point stores
+<63--><FRT>..........<583----->R fatal("mffs not supported");
+<63--><F>..<B>.......<64------>. fatal("mcrfs not supported");
+<63--><F>.......<U->.<134----->R fatal("mtsfsfi not supported");
+<63-->.<FLM--->.<FRB><711----->R fatal("mtfsf not supported");
+<63--><BT->..........<70------>R fatal("mtfsb0 not supported");
+<63--><BT->..........<38------>R fatal("mtfsb1 not supported");
 
-<54--><FRS><RA-><D------------>. write_double(read_double(reg0(RA) + ext16(D)), cpu.fpr[FRS]);
+# Floating point double loads (raw bits)
+
+<50--><FRT><RA-><D-------------> cpu.fpr[FRT] = read_double(reg0(RA) + ext16(D));
+<31--><FRT><RA-><RB-><599----->. cpu.fpr[FRT] = read_double(reg0(RA) + reg(RB));
+<51--><FRT><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.fpr[FRT] = read_double(ea); cpu.gpr[RA] = ea;
+<31--><FRT><RA-><RB-><631----->. uint32_t ea = reg(RA) + reg(RB); cpu.fpr[FRT] = read_double(ea); cpu.gpr[RA] = ea;
+
+# Floating point double stores (raw bits)
+
+<54--><FRS><RA-><D-------------> write_double(reg0(RA) + ext16(D), cpu.fpr[FRS]);
 <31--><FRS><RA-><RB-><727----->. write_double(reg0(RA) + reg(RB), cpu.fpr[FRS]);
 <55--><FRS><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_double(ea, cpu.fpr[FRS]); cpu.gpr[RA] = ea;
-<31--><FRS><RA-><RB-><759----->. uint32_t ea = reg(RA) + reg(RB); write_long(ea, cpu.fpr[FRS]); cpu.gpr[RA] = ea;
+<31--><FRS><RA-><RB-><759----->. uint32_t ea = reg(RA) + reg(RB); write_double(ea, cpu.fpr[FRS]); cpu.gpr[RA] = ea;
+
+# Floating point single loads (convert from single to double)
+
+<48--><FRT><RA-><D-------------> cpu.fpr[FRT] = d2b(b2f(read_long(reg0(RA) + ext16(D))));
+<31--><FRT><RA-><RB-><535----->. cpu.fpr[FRT] = d2b(b2f(read_long(reg0(RA) + reg(RB))));
+<49--><FRT><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); cpu.fpr[FRT] = d2b(b2f(read_long(ea))); cpu.gpr[RA] = ea;
+<31--><FRT><RA-><RB-><567----->. uint32_t ea = reg(RA) + reg(RB); cpu.fpr[FRT] = d2b(b2f(read_long(ea))); cpu.gpr[RA] = ea;
+
+# Floating point single stores (convert from double to single)
+
+<52--><FRS><RA-><D-------------> write_long(reg0(RA) + ext16(D), f2b(fpr(FRS)));
+<31--><FRS><RA-><RB-><663----->. write_long(reg0(RA) + reg(RB), f2b(fpr(FRS)));
+<53--><FRS><RA-><D-------------> uint32_t ea = reg(RA) + ext16(D); write_long(ea, f2b(fpr(FRS))); cpu.gpr[RA] = ea;
+<31--><FRS><RA-><RB-><695----->. uint32_t ea = reg(RA) + reg(RB); write_long(ea, f2b(fpr(FRS))); cpu.gpr[RA] = ea;
 
 # Floating point arithmetic
 
 <63--><FRT>.....<FRB><72------>R setcr1(R, cpu.fpr[FRT] = cpu.fpr[FRB]);
-<63--><FRT><FRA><FRB><20------>R setcr1(R, cpu.fpr[FRT] = tobytes(fpr(FRA) - fpr(FRB)));
+<63--><FRT>.....<FRB><40------>R setcr1(R, cpu.fpr[FRT] = d2b(-fpr(FRB)));
+<63--><FRT>.....<FRB><264----->R setcr1(R, cpu.fpr[FRT] = d2b(fabs(fpr(FRB))));
+<63--><FRT>.....<FRB><136----->R setcr1(R, cpu.fpr[FRT] = d2b(-fabs(fpr(FRB))));
+<63--><FRT><FRA><FRB><21------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) + fpr(FRB)));
+<63--><FRT><FRA><FRB><20------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) - fpr(FRB)));
+<63--><FRT><FRA><FRB><25------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) * fpr(FRB)));
+<63--><FRT><FRA><FRB><18------>R setcr1(R, cpu.fpr[FRT] = d2b(fpr(FRA) / fpr(FRB)));
+
+# Floating point comparisons
+
+<63--><F>..<FRA><FRB><0------->. comparef(fpr(FRA), fpr(FRB), F);
+<63--><F>..<FRA><FRB><32------>. comparef(fpr(FRA), fpr(FRB), F);
+
+# Floating point conversions
+
+<63--><FRT>.....<FRB><14------>R setcr1(R, fpr(FRB)); cpu.fpr[FRT] = (uint32_t)fpr(FRB);
+<63--><FRT>.....<FRB><15------>R setcr1(R, fpr(FRB)); cpu.fpr[FRT] = (uint32_t)fpr(FRB);
diff --git a/plat/linuxppc/emu/main.c b/plat/linuxppc/emu/main.c
index fe52aa566..79e2634d1 100755
--- a/plat/linuxppc/emu/main.c
+++ b/plat/linuxppc/emu/main.c
@@ -157,6 +157,11 @@ void system_call(uint8_t trapno)
 			break;
 		}
 
+		case 20: /* getpid */
+		case 48: /* signal */
+		case 54: /* ioctl */
+		case 67: /* sigaction */
+		case 78: /* gettimeofday */
 		case 126: /* sigprocmask */
 			cpu.gpr[4] = 0;
 			break;